rocksdb-0.23.0/.cargo_vcs_info.json0000644000000001360000000000100125450ustar { "git": { "sha1": "3525109490bcdf66565e5d22ff8a19fdd7240bba" }, "path_in_vcs": "" }rocksdb-0.23.0/.github/workflows/rust.yml000064400000000000000000000076501046102023000164620ustar 00000000000000name: RocksDB CI on: [ push, pull_request ] env: RUST_VERSION: 1.71.1 jobs: fmt: name: Rustfmt runs-on: ubuntu-latest steps: - name: Checkout sources uses: actions/checkout@v4 - name: Install rust uses: actions-rust-lang/setup-rust-toolchain@v1 with: toolchain: ${{ env.RUST_VERSION }} components: rustfmt - name: Run rustfmt run: cargo fmt --all -- --check doc-check: name: Rustdoc-check runs-on: ubuntu-latest steps: - name: Checkout sources uses: actions/checkout@v4 - name: Install rust uses: actions-rust-lang/setup-rust-toolchain@v1 with: toolchain: ${{ env.RUST_VERSION }} components: rust-docs - name: Run cargo rustdoc run: cargo rustdoc -- -D warnings clippy: name: Clippy runs-on: ubuntu-latest steps: - name: Checkout sources uses: actions/checkout@v4 - name: Install rust uses: actions-rust-lang/setup-rust-toolchain@v1 with: toolchain: ${{ env.RUST_VERSION }} components: clippy - name: Install dependencies run: sudo apt-get update && sudo apt-get install -y liburing-dev pkg-config - name: Set PKG_CONFIG_PATH run: echo "PKG_CONFIG_PATH=/usr/lib/x86_64-linux-gnu/pkgconfig" >> $GITHUB_ENV - name: Run clippy run: | cargo clippy --all-targets --features \ "jemalloc \ io-uring \ valgrind \ mt_static \ rtti \ multi-threaded-cf \ serde1" \ -- -D warnings audit: name: Security audit runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 - uses: actions-rust-lang/setup-rust-toolchain@v1 with: toolchain: stable - uses: actions-rust-lang/audit@v1 with: token: ${{ secrets.GITHUB_TOKEN }} test: name: ${{ matrix.build }} runs-on: ${{ matrix.os }} strategy: matrix: build: [ Linux, macOS, Windows ] include: - build: Linux os: ubuntu-latest - build: macOS os: macos-latest - build: Windows os: windows-latest steps: - name: Checkout sources uses: actions/checkout@v4 - name: Install rust uses: actions-rust-lang/setup-rust-toolchain@v1 with: toolchain: ${{ env.RUST_VERSION }} target: ${{ matrix.target }} - name: Remove msys64 # Workaround to resolve link error with C:\msys64\mingw64\bin\libclang.dll if: runner.os == 'Windows' run: Remove-Item -LiteralPath "C:\msys64\" -Force -Recurse - name: Install dependencies if: runner.os == 'Windows' run: choco install llvm -y - name: Mark working directory as read-only # to ensure that the tests always use a temporary directory if: runner.os == 'Linux' run: | mkdir -p target touch Cargo.lock git submodule update --init --recursive chmod -R a-w . chmod -R a+w target Cargo.lock - name: Run rocksdb tests run: | cargo test --all cargo test --all --features multi-threaded-cf - name: Mark working directory as writable if: runner.os == 'Linux' run: chmod -R a+w . - name: Free disk space run: cargo clean - name: Mark working directory as read-only # to ensure that the tests always use a temporary directory if: runner.os == 'Linux' run: | mkdir -p target touch Cargo.lock chmod -R a-w . chmod -R a+w target Cargo.lock - name: Run rocksdb tests (jemalloc) if: runner.os != 'Windows' run: cargo test --all --features jemalloc - name: Mark working directory as writable if: runner.os == 'Linux' run: chmod -R a+w . rocksdb-0.23.0/.gitmodules000064400000000000000000000003331046102023000135110ustar 00000000000000[submodule "rocksdb_sys/snappy"] path = librocksdb-sys/snappy url = https://github.com/google/snappy.git [submodule "rocksdb_sys/rocksdb"] path = librocksdb-sys/rocksdb url = https://github.com/facebook/rocksdb.git rocksdb-0.23.0/CHANGELOG.md000064400000000000000000000532561046102023000131610ustar 00000000000000# Changelog ## 0.23.0 (2024-12-23) * Build status badge and other bits in README.md (jdanford) * Adds `crt_static` method (spector-9) * Fix `ptr::copy` requires both ptrs to be non-null (ruanpetterson) * Add portable feature for RocksDB build (sujayakar) * Update README.md with a new section for the portable feature (sujayakar) * Update to RocksDB 9.0.0 (Dirreke) * Add readme for mt_static feature (spector-9) * Make `BackupEngine` Send (widagdos) * Add linking `libatomic` command to `build.rs` to allow building (willemolding) * Revert portable feature as it doesn't do anything (zaidoon1) * Add delete_range to `OptimisticTransactionDB` (vadim-su) * Update rust toolchain to 1.70.0 (widagdos) * Bump snappy to 1.2.0 (aleksuss) * Document that `default` column family doesn't inherit open options of db (0xdeafbeef) * Toolchain: pin toolchain to rust 1.70.0 (cratelyn) * Add clippy MSRV and some clippy fixes (Congyuwang) * Port all value of `ReadTier` (w41ter) * Update src/db_options.rs (w41ter) * Add option `set_avoid_unnecessary_blocking_io` (w41ter) * Add option to enable autotuned ratelimiter (w41ter) * Add support for enabling blob cache (exabytes18) * Fix: android build in 32-bit devices (LucasXu0) * Support user defined timestamp in rust bindings (siyuan0322) * Bump lz4 1.10 (agourlay) * Properties for the `TransactionDB` (4TT1L4) * Improvements to user defined timestamp (larry0x) * Implement Sync for `{Bound}ColumnFamily` (jhpratt) * Use the provided system rocksdb prebuilt on FreeBSD (girlbossceo) * Deprecated Node version and outdated GitHub Actions (4TT1L4) * Fix cargo audit check in GitHub Actions CI (aleksuss) * RUSTSEC-2023-0020: `const-cstr` is Unmaintained (4TT1L4) * TransactionDB support in MemoryUsageBuilder (4TT1L4) * Bump `tikv-jemalloc-sys` to 0.6 (0xdeafbeef) * Improve statistics by auto gen enum Ticker & enum Histogram (rockeet) * Expose LRU cache options (athre0z) * Add `Env::from_raw` constructor (jgraettinger) * Fix unsoundness via impure `AsRef` (niklasf) * Fix two tests that want to write to the current working directory (mr-c) * Add missing supported BSD OSes (drizzt) * Fix column family creation race (stuhood) * Allow using static feature for the `bindgen` (Congyuwang) * Use `tempfile` instead of the current working directory (mr-c) * Update to RocksDB 9.7.4 (niklasf) * Add `with_capacity_bytes` to `WriteBatch` (0xdeafbeef) * ci: make most directories read-only before running the tests (mr-c) * Add `ROCKSDB_SCHED_GETCPU_PRESENT` for Linux build config (popcnt1) * Add `set_compaction_pri` to `Options` (0xdeafbeef) * Implement get_db_identity using rocksdb_get_db_identity (evanj) * Add `lto` feature to enable link-time optimization using `linker-plugin-lto` (0xdeafbeef) * Add `set_track_and_verify_wals_in_manifest` (evanj) * Fix some typos (DeVikingMark) * Fix multiple typos of different importance (crStiv) * Bump rust version for cargo audit job (aleksuss) * Update to RocksDB 9.9.3 (niklasf) * Allow to specify ttl per column family (0xdeafbeef) ## 0.22.0 (2024-02-13) * Free memory on writebatch index and avoid unnecessary clones (jkurian) * Update snappy to 1.1.10 (timsueberkrueb) * Prefer rocksdb_free to free for RocksDB memory (niklasf) * Expose flush_cfs_opt to flush multiple column families (lizhanhui) * Update to RocksDB 8.3.2 (niklasf) * Remove temporary boxed keys in batched_multi_get (axnsan12) * Convert properties to `&PropName` which can be converted at no cost to `&CStr` and `&str` (mina86) * Bump MSRV to 1.63.0 (mina86) * Add allow_ingest_behind ffi call for DB Options (siyuan0322) * Remove wrong outlive requirements for cache in docs (zheland) * Feat: support `column_family_metadata` and `column_family_metadata_cf` (ovr) * Update RocksDB to 8.5.3 (niklasf) * Expose ReadTier publicly (tinct-martini) * Update RocksDB to 8.6.7 (aleksuss) * Feat: expose `set_optimize_filters_for_memory` (zaidoon1) * Feat: expose compression option parallel_threads (zaidoon1) * Fix: add raw iterator validation before calling next method (aleksuss) * Fix typo in documentation (jazarine) * Feat: Expose `set_wal_compression_type` (ovr) * Update RocksDB to 8.8.1 (zaidoon1) * Feat: Expose `compact_on_deletion_collector_factory` (zaidoon1) * Fix bug in DBWALIterator that would return updates before the given sequence (schmidek) * Feat: Expose wait_for_compact (zaidoon1) * Feat: Expose `set_auto_readahead_size` (niklasf) * Update RocksDB to 8.9.1 (zaidoon1) * Feat: Expose `set_periodic_compaction_seconds` (zaidoon1) * Update hash commit of the rocksdb submodule to corresponding v8.9.1 (aleksuss) * Make CompactOptions Send and Sync (GodTamIt) * Update to RocksDB 8.10.0 (zaidoon1) * Add WriteBufferManager support (benoitmeriaux) * Update doc and parameter name for `optimize_for_point_lookup` (XiangpengHao) * Expose rocksdb cumulative statistics and histograms (AhmedSoliman) * Make FlushOptions Send and Sync (jansegre) * Export memory usage builder and MemoryUsage structs to users (AhmedSoliman) ## 0.21.0 (2023-05-09) * Add doc-check to CI with fix warnings in docs (YuraKotov) * Fix rustdoc::broken-intra-doc-links errors (YuraKotov) * Fix 32-bit ARM build (EyeOfPython) * Allow specifying checksum type (romanz) * Enable librocksdb-sys to be built by rustc_codegen_cranelift (ZePedroResende) * Update to RocksDB 8.0.0 (niklasf) * Block cache creation failure is not recoverable (niklasf) * Update iOS min version to 12 in the build script (mighty840) * Actually enable `io-uring` (niklasf) * Update to RocksDB 8.1.1 (niklasf) * Add `Cache::new_hyper_clock_cache()` (niklasf) * Retrieve Value from KeyMayExist if value found in Cache or Memory (Congyuwang) * Support for comparators as closures (pegesund) * Fix bug in DBWALIterator that would miss updates (Zagitta) ## 0.20.1 (2023-02-10) * Fix supporting MSRV 1.60.0 (aleksuss) ## 0.20.0 (2023-02-09) * Support RocksDB 7.x `BackupEngineOptions` (exabytes18) * Fix `int128` compatibility check (Dirreke) * Add `Options::load_latest` method to load the latest options from RockDB (Congyuwang) * Bump bindgen to 0.64.0 (cwlittle) * Bump rocksdb to 7.9.2 (kwek20) * Make `set_snapshot` method public (a14e) * Add `drop_cf` function to `TransactionDB` (bothra90) * Bump rocksdb to 7.8.3 (aleksuss) * Add doc for `set_cache_index_and_filter_blocks` (guerinoni) * Re-run `build.rs` if env vars change (drahnr) * Add `WriteBatch::data` method (w41ter) * Add `DB::open_cf_with_opts` method (w41ter) * Use lz4-sys crate rather then submodule (niklasf) * Make create_new_backup_flush generic (minshao) ## 0.19.0 (2022-08-05) * Add support for building with `io_uring` on Linux (parazyd) * Change iterators to return Result (mina86) * Support RocksDB transaction (yiyuanliu) * Avoid pulling in dependencies via static feature flag (niklasf) * Bump `rocksdb` to 7.4.4 (niklasf) * Bump `tikv-jemalloc-sys` to 0.5 (niklasf) * Update `set_use_fsync` comment (nazar-pc) * Introduce ReadOptions::set_iterate_range and PrefixRange (mina86) * Bump `rocksdb` to 7.4.3 (aleksuss) * Don’t hold onto ReadOptions.inner when iterating (mina86) * Bump `zstd-sys` from 1.6 to 2.0 (slightknack) * Enable a building on the iOS platform (dignifiedquire) * Add DBRawIteratorWithThreadMode::item method (mina86) * Use NonNull in DBRawIteratorWithThreadMode (mina86) * Tiny refactoring including fix for UB (niklasf) * Add batched version MultiGet API (yhchiang-sol) * Upgrade to rocksdb v7.3.1 (yhchiang-sol) * Consistently use `ffi_util::to_cpath` to convert `Path` to `CString` (mina86) * Convert properties to `&CStr` (mina86) * Allow passing `&CStr` arguments (mina86) * Fix memory leak when reading properties and avoid memory allocation (mina86) * Fix Windows UTF-8 build flag (rajivshah3) * Use more target features to build librocksdb-sys (niklasf) * Fix `bz_internal_error` symbol multiply defined (nanpuyue) * Bump rocksdb to 7.1.2 (dignifiedquire) * Add BlobDB options (dignifiedquire) * Add snapshot `PinnableSlice` based API (zheland) ## 0.18.0 (2022-02-03) * Add open_cf_descriptor methods for Secondary and ReadOnly AccessType (steviez) * Make Ribbon filters available (niklasf) * Change versioning scheme of `librocksdb-sys` crate (aleksuss) * Upgrade to RocksDB 6.28.2 (akrylysov) * Fix theoretical UB while transmuting Arc (niklasf) * Support configuring bottom-most compression level (mina86) * Add BlockBasedOptions::set_whole_key_filtering (niklasf) * Add constants for all supported properties (steviez) * Make CacheWrapper and EnvWrapper Send and Sync (aleksuss) * Replace mem::transmute with narrower conversions (niklasf) * Optimize non-overlapping copy in raw_data (niklasf) * Support multi_get_* methods (olegnn) * Optimize multi_get_cf_opt() to use size hint (niklasf) * Fix typo in set_background_purge_on_iterator_cleanup method (Congyuwang) * Use external compression crates where possible (Dr-Emann) * Update compression dependencies (akrylysov) * Add method for opening DB with ro access and cf descriptors (nikurt) * Support restoring from a specified backup (GoldenLeaves) * Add merge operands iterator (0xdeafbeef) * Derive serde::{Serialize, Deserialize} for configuration enums (thibault-martinez) * Add feature flag for runtime type information and metadata (jgraettinger) * Add set_info_log_level to control log verbosity (tkintscher) * Replace jemalloc-sys for tikv-jemalloc-sys (Rexagon) * Support UTF-8 file paths on Windows (rajivshah3) * Support building RocksDB with jemalloc (akrylysov) * Add rocksdb WAL flush api (duarten) * Update rocksdb to v6.22.1 (duarten) ## 0.17.0 (2021-07-22) * Fix `multi_get` method (mikhailOK) * Bump `librocksdb-sys` up to 6.19.3 (olegnn) * Add support for the cuckoo table format (rbost) * RocksDB is not compiled with SSE4 instructions anymore unless the corresponding features are enabled in rustc (mbargull) * Bump `librocksdb-sys` up to 6.20.3 (olegnn, akrylysov) * Add `DB::key_may_exist_cf_opt` method (stanislav-tkach) * Add `Options::set_zstd_max_train_bytes` method (stanislav-tkach) * Mark Cache and Env as Send and Sync (akrylysov) * Allow cloning the Cache and Env (duarten) * Make SSE inclusion conditional for target features (mbargull) * Use Self where possible (adamnemecek) * Don't leak dropped column families (ryoqun) ## 0.16.0 (2021-04-18) * Add `DB::cancel_all_background_work` method (stanislav-tkach) * Bump `librocksdb-sys` up to 6.13.3 (aleksuss) * Add `multi_get`, `multi_get_opt`, `multi_get_cf` and `multi_get_cf_opt` `DB` methods (stanislav-tkach) * Allow setting options on a ColumnFamily (romanz) * Fix logic related to merge operator settings (BoOTheFurious) * Export persist_period_sec option and background_threads (developerfred) * Remove unneeded bindgen features (Kixunil) * Add merge delete_callback omitted by mistake (zhangsoledad) * Bump `librocksdb-sys` up to 6.17.3 (ordian) * Remove the need for `&mut self` in `create_cf` and `drop_cf` (v2) (ryoqun) * Keep Cache and Env alive with Rc (acrrd) * Add `DB::open_cf_with_ttl` method (fdeantoni) ## 0.15.0 (2020-08-25) * Fix building rocksdb library on windows host (aleksuss) * Add github actions CI for windows build (aleksuss) * Update doc for `Options::set_compression_type` (wqfish) * Add clippy linter in CI (aleksuss) * Use DBPath for backup_restore test (wqfish) * Allow to build RocksDB with a different stdlib (calavera) * Add some doc-comments and tiny refactoring (aleksuss) * Expose `open_with_ttl`. (calavera) * Fixed build for `x86_64-linux-android` that doesn't support PCLMUL (vimmerru) * Add support for `SstFileWriter` and `DB::ingest_external_file` (methyl) * Add set_max_log_file_size and set_recycle_log_file_num to the Options (stanislav-tkach) * Export the `DEFAULT_COLUMN_FAMILY_NAME` constant (stanislav-tkach) * Fix slice transformers with no in_domain callback (nelhage) * Don't segfault on failed a merge operator (nelhage) * Adding read/write/db/compaction options (linxGnu) * Add dbpath and env options (linxGnu) * Add compaction filter factory API (unrealhoang) * Add link stdlib when linking prebuilt rocksdb (unrealhoang) * Support fetching sst files metadata, delete files in range, get mem usage (linxGnu) * Do not set rerun-if-changed=build.rs (xu-cheng) * Use pretty_assertions in tests (stanislav-tkach) * librocksdb-sys: update rocksdb to 6.11.4 (ordian) * Adding backup engine info (linxGnu) * Implement `Clone` trait for `Options` (stanislav-tkach) * Added `Send` implementation to `WriteBatch` (stanislav-tkach) * Extend github actions (stanislav-tkach) * Avoid copy for merge operator result using delete_callback (xuchen-plus) ## 0.14.0 (2020-04-22) * Updated lz4 to v1.9.2 (ordian) * BlockBasedOptions: expose `format_version`, `[index_]block_restart_interval` (ordian) * Improve `ffi_try` macro to make trailing comma optional (wqfish) * Add `set_ratelimiter` to the `Options` (PatrickNicholas) * Add `set_max_total_wal_size` to the `Options` (wqfish) * Simplify conversion on iterator item (zhangsoledad) * Add `flush_cf` method to the `DB` (wqfish) * Fix potential segfault when calling `next` on the `DBIterator` that is at the end of the range (wqfish) * Move to Rust 2018 (wqfish) * Fix doc for `WriteBatch::delete` (wqfish) * Bump `uuid` and `bindgen` dependencies (jonhoo) * Change APIs that never return error to not return `Result` (wqfish) * Fix lifetime parameter for iterators (wqfish) * Add a doc for `optimize_level_style_compaction` method (NikVolf) * Make `DBPath` use `tempfile` (jder) * Refactor `db.rs` and `lib.rs` into smaller pieces (jder) * Check if we're on a big endian system and act upon it (knarz) * Bump internal snappy version up to 1.1.8 (aleksuss) * Bump rocksdb version up to 6.7.3 (aleksuss) * Atomic flush option (mappum) * Make `set_iterate_upper_bound` method safe (wqfish) * Add support for data block hash index (dvdplm) * Add some extra config options (casualjim) * Add support for range delete APIs (wqfish) * Improve building `librocksdb-sys` with system libraries (basvandijk) * Add support for `open_for_read_only` APIs (wqfish) * Fix doc for `DBRawIterator::prev` and `next` methods (wqfish) * Add support for `open_as_secondary` APIs (calavera) ## 0.13.0 (2019-11-12) ### Changes * Added `ReadOptions::set_verify_checksums` and `Options::set_level_compaction_dynamic_level_bytes` methods (ordian) * Array of bytes has been changed for pinnable slice for get operations (nbdd0121) * Implemented `Sync` for `DBRawIterator` (nbdd0121) * Removed extra copy in DBRawIterator (nbdd0121) * Added `Options::max_dict_bytes` and `Options::zstd_max_training_bytes` methods(methyl) * Added Android support (rtsisyk) * Added lifetimes for `DBIterator` return types (ngotchac) * Bumped rocksdb up to 6.2.4 (aleksuss) * Disabled trait derivation for librocksdb-sys (EyeOfPython) * Added `DB::get_updates_since()` to iterate write batches in a given sequence (nlfiedler) * Added `ReadOptions::set_tailing()` to create a tailing iterator that continues to iterate over the database as new records are added (cjbradfield) * Changed column families storing (aleksuss) * Exposed the `status` method on iterators (rnarubin) ## 0.12.3 (2019-07-19) ### Changes * Enabled sse4.2/pclmul for accelerated crc32c (yjh0502) * Added `set_db_write_buffer_size` to the Options API (rnarubin) * Bumped RocksDB to 6.1.2 (lispy) * Added `Sync` and `Send` implementations to `Snapshot` (pavel-mukhanov) * Added `raw_iterator_cf_opt` to the DB API (rnarubin) * Added `DB::latest_sequence_number` method (vitvakatu) ## 0.12.2 (2019-05-03) ### Changes * Updated `compact_range_cf` to use generic arguments (romanz) * Removed allocations from `SliceTransform` implementation (ekmartin) * Bumped RocksDB to 5.18.3 (baptistejamin) * Implemented `delete_range` and `delete_range_cf` (baptistejamin) * Added contribution guide (rhurkes) * Cleaned up documentation for `ReadOptions.set_iterate_upper_bound` method (xiaobogaga) * Added `flush` and `flush_opt` operations (valeriansaliou) ## 0.12.1 (2019-03-27) ### Changes * Added `iterator_cf_opt` function to `DB` (elichai) * Added `set_allow_mmap_writes` and `set_allow_mmap_reads` functions to `Options` (aleksuss) ## 0.12.0 (2019-03-10) ### Changes * Added support for PlainTable factories (ekmartin) * Added ability to restore latest backup (rohitjoshi) * Added support for pinnable slices (xxuejie) * Added ability to get property values (ekmartin) * Simplified opening database when using non-default column families (iSynaptic) * `ColumnFamily`, `DBIterator` and `DBRawIterator` now have lifetime parameters to prevent using them after the `DB` has been dropped (iSynaptic) * Creating `DBIterator` and `DBRawIterator` now accept `ReadOptions` (iSynaptic) * All database operations that accepted byte slices, `&[u8]`, are now generic and accept anything that implements `AsRef<[u8]>` (iSynaptic) * Bumped RocksDB to version 5.17.2 (aleksuss) * Added `set_readahead_size` to `ReadOptions` (iSynaptic) * Updated main example in doc tests (mohanson) * Updated requirements documentation (jamesray1) * Implemented `AsRef<[u8]>` for `DBVector` (iSynaptic) ## 0.11.0 (2019-01-10) ### Announcements * This is the first release under the new [Maintainership](MAINTAINERSHIP.md) model. Three contributors have been selected to help maintain this library -- Oleksandr Anyshchenko ([@aleksuss](https://github.com/aleksuss)), Jordan Terrell ([@iSynaptic](https://github.com/iSynaptic)), and Ilya Bogdanov ([@vitvakatu](https://github.com/vitvakatu)). Many thanks to Tyler Neely ([@spacejam](https://github.com/spacejam)) for your support while taking on this new role. * A [gitter.im chat room](https://gitter.im/rust-rocksdb/Lobby) has been created. Although it's not guaranteed to be "staffed", it may help to collaborate on changes to `rust-rocksdb`. ### Changes * added LZ4, ZSTD, ZLIB, and BZIP2 compression support (iSynaptic) * added support for `Checkpoint` (aleksuss) * added support for `SliceTransform` (spacejam) * added `DBPath` struct to ensure test databases are cleaned up (ekmartin, iSynaptic) * fixed `rustfmt.toml` to work with newer `rustfmt` version (ekmartin, iSynaptic) * bindgen bumped up to 0.43 (s-panferov) * made `ColumnFamily` struct `Send` (Tpt) * made `DBIterator` struct `Send` (Elzor) * `create_cf` and `drop_cf` methods on `DB` now work with immutable references (aleksuss) * fixed crash in `test_column_family` test on macOS (aleksuss) * fixed/implemented CI builds for macOS and Windows (aleksuss, iSynaptic) * exposed `set_skip_stats_update_on_db_open` option (romanz) * exposed `keep_log_file_num` option (romanz) * added ability to retrieve `WriteBatch` serialized size (romanz) * added `set_options` method to `DB` to allow changing options without closing and re-opening the database (romanz) ## 0.10.1 (2018-07-17) * bump bindgen to 0.37 (ekmartin) * bump rocksdb to 5.14.2 (ekmartin) * add disable_cache to block-based options (ekmartin) * add set_wal_dir (ekmartin) * add set_memtable_prefix_bloom_ratio (ekmartin) * add MemtableFactory support (ekmartin) * add full_iterator (ekmartin) * allow index type specification on block options (ekmartin) * fix windows build (iSynaptic) ## 0.10.0 (2018-03-17) * Bump rocksdb to 5.11.3 (spacejam) ### New Features * Link with system rocksdb and snappy libs through envvars (ozkriff) ### Breaking Changes * Fix reverse iteration from a given key (ongardie) ## 0.9.1 (2018-02-10) ### New Features * SliceTransform support (spacejam) ## 0.9.0 (2018-02-10) ### New Features * Allow creating iterators over prefixes (glittershark) ### Breaking Changes * Open cfs with options (garyttierney, rrichardson) * Non-Associative merge ops (rrichardson) ## 0.8.3 (2018-02-10) * Bump rocksdb to 5.10.2 (ongardie) * Add Send marker to Options (iSynaptic) * Expose advise_random_on_open option (ongardie) ## 0.8.2 (2017-12-28) * Bump rocksdb to 5.7.1 (jquesnelle) ## 0.8.1 (2017-09-08) * Added list_cf (jeizsm) ## 0.8.0 (2017-09-02) * Removed set_disable_data_sync (glittershark) ## 0.7.2 (2017-09-02) * Bumped rocksdb to 5.6.2 (spacejam) ## 0.7.1 (2017-08-29) * Bumped rocksdb to 5.6.1 (vmx) ## 0.7 (2017-07-26) ### Breaking Changes * Bumped rocksdb to 5.4.6 (derekdreery) * Remove `use_direct_writes` now that `use_direct_io_for_flush_and_compaction` exists (derekdreery) ### New Features * ReadOptions is now public (rschmukler) * Implement Clone and AsRef for Error (daboross) * Support for `seek_for_prev` (kaedroho) * Support for DirectIO (kaedroho) ### Internal Cleanups * Fixed race condition in tests (debris) * Move tests to the default `tests` directory (vmx) ## 0.6.1 (2017-03-13) ### New Features * Support for raw iterator access (kaedroho) ## 0.6 (2016-12-18) ### Breaking Changes * Comparator function now returns an Ordering (alexreg) ### New Features * Compaction filter (tmccombs) * Support for backups (alexreg) 0.5 (2016-11-20) ### Breaking changes * No more Writable trait, as WriteBatch is not thread-safe as a DB (spacejam) * All imports of `rocksdb::rocksdb::*` should now be simply `rocksdb::*` (alexreg) * All errors changed to use a new `rocksdb::Error` type (kaedroho, alexreg) * Removed `Options.set_filter_deletes` as it was removed in RocksDB (kaedroho) * Renamed `add_merge_operator` to `set_merge_operator` and `add_comparator` to `set_comparator` (kaedroho) ### New Features * Windows support (development by jsgf and arkpar. ported by kaedroho) * The RocksDB library is now built at crate compile-time and statically linked with the resulting binary (development by jsgf and arkpar. ported by kaedroho) * Cleaned up and improved coverage and tests of the ffi module (alexreg) * Added many new methods to the `Options` type (development by ngaut, BusyJay, zhangjinpeng1987, siddontang and hhkbp2. ported by kaedroho) * Added `len` and `is_empty` methods to `WriteBatch` (development by siddontang. ported by kaedroho) * Added `path` mathod to `DB` (development by siddontang. ported by kaedroho) * `DB::open` now accepts any type that implements `Into` as the path argument (kaedroho) * `DB` now implements the `Debug` trait (kaedroho) * Add iterator_cf to snapshot (jezell) * Changelog started rocksdb-0.23.0/CONTRIBUTING.md000064400000000000000000000070131046102023000135670ustar 00000000000000# Contributing to rust-rocksdb Thank you for taking an interest in the project, and contributing to it - it's appreciated! There are several ways you can contribute: - [Bug Reports](#bug-reports) - [Feature Requests](#feature-requests) - [Documentation](#documentation) - [Discussion](#discussion) - [Pull Requests](#pull-requests) **Please note all contributors must adhere to the [code of conduct](code-of-conduct.md).** ## Bug Reports [bug-reports]: #bug-reports - **Ensure the bug has not already been reported** - this can be done with a quick search of the [existing open issues](https://github.com/rust-rocksdb/rust-rocksdb/issues?q=is%3Aissue+is%3Aopen+). - **Ensure the bug applies to the Rust wrapper, and not the underlying library** - bugs in the RocksDB library should be [reported upstream](https://github.com/facebook/rocksdb/issues). - When [creating an issue](https://github.com/rust-rocksdb/rust-rocksdb/issues/new) please try to: - **Use a clear and descriptive title** to identify the issue - **Provide enough context** to acurately summarize the issue. Not every issue will need detailed steps to recreate, example code, stack traces, etc. - use your own judgment on what information would be helpful to anyone working on the issue. It's easier for someone to skim over too much context, than stop and wait for a response when context is missing. ## Feature Requests [feature-requests]: #feature-requests Feature requests will primarily come in the form of ergonomics involving the Rust language, or in bringing the wrapper into parity with the library's API. Please create an issue with any relevant information. ## Documentation [documentation]: #documentation Much of the documentation should mirror or reference the library's [documentation](https://github.com/facebook/rocksdb/wiki). If the wrapper or its exposed functions are missing documentation or contain inaccurate information please submit a pull request. ## Discussion [discussion]: #discussion Discussion around design and development of the wrapper primarily occurs within issues and pull requests. Don't be afraid to participate if you have questions, concerns, insight, or advice. ## Pull Requests [pull-requests]: #pull-requests Pull requests are welcome, and when contributing code, the author agrees to do so under the project's [licensing](https://github.com/rust-rocksdb/rust-rocksdb/blob/master/LICENSE) - Apache 2.0 as of the time of this writing. The maintainers greatly appreciate PRs that follow open-source contribution best practices: 1. Fork this repository to your personal GitHub account. 1. Create a branch that includes your changes, **keep changes isolated and granular**. 1. Include any relevant documentation and/or tests. Write [documentation tests](https://doc.rust-lang.org/rustdoc/documentation-tests.html) when relevant. 1. Apply `cargo fmt` to ensure consistent formatting. 1. [Create a pull request](https://help.github.com/en/articles/about-pull-requests) against this repository. For pull requests that would benefit from discussion and review earlier in the development process, use a [Draft Pull Request](https://help.github.com/en/articles/about-pull-requests#draft-pull-requests). ## Additional Resources Some useful information for working with RocksDB in Rust: - [RocksDB library primary site](https://rocksdb.org) - [RocksDB library GitHub repository](https://github.com/facebook/rocksdb) - [RocksDB library documentation](https://github.com/facebook/rocksdb/wiki) - [Rust's Foreign Function Interface (ffi)](https://doc.rust-lang.org/nomicon/ffi.html) rocksdb-0.23.0/Cargo.toml0000644000000041340000000000100105450ustar # THIS FILE IS AUTOMATICALLY GENERATED BY CARGO # # When uploading crates to the registry Cargo will automatically # "normalize" Cargo.toml files for maximal compatibility # with all versions of Cargo and also rewrite `path` dependencies # to registry (e.g., crates.io) dependencies. # # If you are reading this file be aware that the original Cargo.toml # will likely look very different (and much more reasonable). # See Cargo.toml.orig for the original contents. [package] edition = "2021" rust-version = "1.71.1" name = "rocksdb" version = "0.23.0" authors = [ "Tyler Neely ", "David Greenberg ", ] exclude = [ ".gitignore", ".travis.yml", "deploy.sh", "test/**/*", ] description = "Rust wrapper for Facebook's RocksDB embeddable database" homepage = "https://github.com/rust-rocksdb/rust-rocksdb" readme = "README.md" keywords = [ "database", "embedded", "LSM-tree", "persistence", ] categories = ["database"] license = "Apache-2.0" repository = "https://github.com/rust-rocksdb/rust-rocksdb" [dependencies.libc] version = "0.2" [dependencies.librocksdb-sys] version = "0.17.1" features = ["static"] default-features = false [dependencies.serde] version = "1" features = ["derive"] optional = true [dev-dependencies.bincode] version = "1.3" [dev-dependencies.pretty_assertions] version = "1.0" [dev-dependencies.serde] version = "1" features = ["derive"] [dev-dependencies.tempfile] version = "3.1" [dev-dependencies.trybuild] version = "1" [features] bindgen-runtime = ["librocksdb-sys/bindgen-runtime"] bindgen-static = ["librocksdb-sys/bindgen-static"] bzip2 = ["librocksdb-sys/bzip2"] default = [ "snappy", "lz4", "zstd", "zlib", "bzip2", "bindgen-runtime", ] io-uring = ["librocksdb-sys/io-uring"] jemalloc = ["librocksdb-sys/jemalloc"] lto = ["librocksdb-sys/lto"] lz4 = ["librocksdb-sys/lz4"] mt_static = ["librocksdb-sys/mt_static"] multi-threaded-cf = [] rtti = ["librocksdb-sys/rtti"] serde1 = ["serde"] snappy = ["librocksdb-sys/snappy"] valgrind = [] zlib = ["librocksdb-sys/zlib"] zstd = ["librocksdb-sys/zstd"] rocksdb-0.23.0/Cargo.toml.orig000064400000000000000000000027741046102023000142360ustar 00000000000000[package] name = "rocksdb" description = "Rust wrapper for Facebook's RocksDB embeddable database" version = "0.23.0" edition = "2021" rust-version = "1.71.1" authors = [ "Tyler Neely ", "David Greenberg ", ] repository = "https://github.com/rust-rocksdb/rust-rocksdb" license = "Apache-2.0" categories = ["database"] keywords = ["database", "embedded", "LSM-tree", "persistence"] homepage = "https://github.com/rust-rocksdb/rust-rocksdb" exclude = [".gitignore", ".travis.yml", "deploy.sh", "test/**/*"] [workspace] members = ["librocksdb-sys"] [features] default = ["snappy", "lz4", "zstd", "zlib", "bzip2", "bindgen-runtime"] jemalloc = ["librocksdb-sys/jemalloc"] io-uring = ["librocksdb-sys/io-uring"] valgrind = [] mt_static = ["librocksdb-sys/mt_static"] snappy = ["librocksdb-sys/snappy"] lz4 = ["librocksdb-sys/lz4"] zstd = ["librocksdb-sys/zstd"] zlib = ["librocksdb-sys/zlib"] bzip2 = ["librocksdb-sys/bzip2"] rtti = ["librocksdb-sys/rtti"] multi-threaded-cf = [] serde1 = ["serde"] bindgen-runtime = ["librocksdb-sys/bindgen-runtime"] bindgen-static = ["librocksdb-sys/bindgen-static"] lto = ["librocksdb-sys/lto"] [dependencies] libc = "0.2" librocksdb-sys = { path = "librocksdb-sys", version = "0.17.1", default-features = false, features = [ "static", ] } serde = { version = "1", features = ["derive"], optional = true } [dev-dependencies] trybuild = "1" tempfile = "3.1" pretty_assertions = "1.0" bincode = "1.3" serde = { version = "1", features = ["derive"] } rocksdb-0.23.0/LICENSE000064400000000000000000000261361046102023000123520ustar 00000000000000 Apache License Version 2.0, January 2004 http://www.apache.org/licenses/ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 1. Definitions. "License" shall mean the terms and conditions for use, reproduction, and distribution as defined by Sections 1 through 9 of this document. "Licensor" shall mean the copyright owner or entity authorized by the copyright owner that is granting the License. "Legal Entity" shall mean the union of the acting entity and all other entities that control, are controlled by, or are under common control with that entity. For the purposes of this definition, "control" means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity. "You" (or "Your") shall mean an individual or Legal Entity exercising permissions granted by this License. "Source" form shall mean the preferred form for making modifications, including but not limited to software source code, documentation source, and configuration files. "Object" form shall mean any form resulting from mechanical transformation or translation of a Source form, including but not limited to compiled object code, generated documentation, and conversions to other media types. "Work" shall mean the work of authorship, whether in Source or Object form, made available under the License, as indicated by a copyright notice that is included in or attached to the work (an example is provided in the Appendix below). "Derivative Works" shall mean any work, whether in Source or Object form, that is based on (or derived from) the Work and for which the editorial revisions, annotations, elaborations, or other modifications represent, as a whole, an original work of authorship. For the purposes of this License, Derivative Works shall not include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work and Derivative Works thereof. "Contribution" shall mean any work of authorship, including the original version of the Work and any modifications or additions to that Work or Derivative Works thereof, that is intentionally submitted to Licensor for inclusion in the Work by the copyright owner or by an individual or Legal Entity authorized to submit on behalf of the copyright owner. For the purposes of this definition, "submitted" means any form of electronic, verbal, or written communication sent to the Licensor or its representatives, including but not limited to communication on electronic mailing lists, source code control systems, and issue tracking systems that are managed by, or on behalf of, the Licensor for the purpose of discussing and improving the Work, but excluding communication that is conspicuously marked or otherwise designated in writing by the copyright owner as "Not a Contribution." "Contributor" shall mean Licensor and any individual or Legal Entity on behalf of whom a Contribution has been received by Licensor and subsequently incorporated within the Work. 2. Grant of Copyright License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, prepare Derivative Works of, publicly display, publicly perform, sublicense, and distribute the Work and such Derivative Works in Source or Object form. 3. Grant of Patent License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this section) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Work, where such license applies only to those patent claims licensable by such Contributor that are necessarily infringed by their Contribution(s) alone or by combination of their Contribution(s) with the Work to which such Contribution(s) was submitted. If You institute patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Work or a Contribution incorporated within the Work constitutes direct or contributory patent infringement, then any patent licenses granted to You under this License for that Work shall terminate as of the date such litigation is filed. 4. Redistribution. You may reproduce and distribute copies of the Work or Derivative Works thereof in any medium, with or without modifications, and in Source or Object form, provided that You meet the following conditions: (a) You must give any other recipients of the Work or Derivative Works a copy of this License; and (b) You must cause any modified files to carry prominent notices stating that You changed the files; and (c) You must retain, in the Source form of any Derivative Works that You distribute, all copyright, patent, trademark, and attribution notices from the Source form of the Work, excluding those notices that do not pertain to any part of the Derivative Works; and (d) If the Work includes a "NOTICE" text file as part of its distribution, then any Derivative Works that You distribute must include a readable copy of the attribution notices contained within such NOTICE file, excluding those notices that do not pertain to any part of the Derivative Works, in at least one of the following places: within a NOTICE text file distributed as part of the Derivative Works; within the Source form or documentation, if provided along with the Derivative Works; or, within a display generated by the Derivative Works, if and wherever such third-party notices normally appear. The contents of the NOTICE file are for informational purposes only and do not modify the License. You may add Your own attribution notices within Derivative Works that You distribute, alongside or as an addendum to the NOTICE text from the Work, provided that such additional attribution notices cannot be construed as modifying the License. You may add Your own copyright statement to Your modifications and may provide additional or different license terms and conditions for use, reproduction, or distribution of Your modifications, or for any such Derivative Works as a whole, provided Your use, reproduction, and distribution of the Work otherwise complies with the conditions stated in this License. 5. Submission of Contributions. Unless You explicitly state otherwise, any Contribution intentionally submitted for inclusion in the Work by You to the Licensor shall be under the terms and conditions of this License, without any additional terms or conditions. Notwithstanding the above, nothing herein shall supersede or modify the terms of any separate license agreement you may have executed with Licensor regarding such Contributions. 6. Trademarks. This License does not grant permission to use the trade names, trademarks, service marks, or product names of the Licensor, except as required for reasonable and customary use in describing the origin of the Work and reproducing the content of the NOTICE file. 7. Disclaimer of Warranty. Unless required by applicable law or agreed to in writing, Licensor provides the Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, including, without limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are solely responsible for determining the appropriateness of using or redistributing the Work and assume any risks associated with Your exercise of permissions under this License. 8. Limitation of Liability. In no event and under no legal theory, whether in tort (including negligence), contract, or otherwise, unless required by applicable law (such as deliberate and grossly negligent acts) or agreed to in writing, shall any Contributor be liable to You for damages, including any direct, indirect, special, incidental, or consequential damages of any character arising as a result of this License or out of the use or inability to use the Work (including but not limited to damages for loss of goodwill, work stoppage, computer failure or malfunction, or any and all other commercial damages or losses), even if such Contributor has been advised of the possibility of such damages. 9. Accepting Warranty or Additional Liability. While redistributing the Work or Derivative Works thereof, You may choose to offer, and charge a fee for, acceptance of support, warranty, indemnity, or other liability obligations and/or rights consistent with this License. However, in accepting such obligations, You may act only on Your own behalf and on Your sole responsibility, not on behalf of any other Contributor, and only if You agree to indemnify, defend, and hold each Contributor harmless for any liability incurred by, or claims asserted against, such Contributor by reason of your accepting any such warranty or additional liability. END OF TERMS AND CONDITIONS APPENDIX: How to apply the Apache License to your work. To apply the Apache License to your work, attach the following boilerplate notice, with the fields enclosed by brackets "[]" replaced with your own identifying information. (Don't include the brackets!) The text should be enclosed in the appropriate comment syntax for the file format. We also recommend that a file or class name and description of purpose be included on the same "printed page" as the copyright notice for easier identification within third-party archives. Copyright [yyyy] [name of copyright owner] Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. rocksdb-0.23.0/MAINTAINERSHIP.md000064400000000000000000000060731046102023000140000ustar 00000000000000Maintainers agree to operate under this set of guidelines: #### Authority Maintainers are trusted to close issues, merge pull requests, and publish crates to cargo. #### Categories of Work 0. Minor * updating the changelog * requires no approval 1. Normal * librocksdb-sys updates * API tracking code in the rocksdb crate that does not change control flow * breaking changes due to removed functionality in rocksdb * require 1 approval from another maintainer. if no maintainer is able to be reached for 2 weeks, then progress may be made anyway * patch (and post-1.0, minor) releases to crates.io that contain only the above work * on each update of submodule rocksdb, run `make -C librocksdb-sys gen_statistics` 2. Major * breaking API changes that are not direct consequences of underlying rocksdb changes * refactoring, which should generally only be done for clearly functional reasons like to aid in the completion of a specific task * require consensus among all maintainers unless 2 weeks have gone by without full participation * if 2 weeks have gone by after seeking feedback, and at least one other maintainer has participated, and all participating maintainers are in agreement, then progress may be made anyway * if action is absolutely urgent, an organization owner may act as a tiebreaker if specifically requested to do so and they agree that making a controversial decision is worth the risk. This should hopefully never occur. If any maintainer thinks an issue is major, it is major. #### Changelog Maintenance * If you are the one who merges a PR that includes an externally-visible change, please describe the change in the changelog and merge it in. #### Releasing, Publishing * Releases adhere to [semver](https://semver.org/) * To cut a release, an issue should be opened for it and reach the required approval based on the above `Categories of Work` section above * When progress is possible, the issue may be closed and the proposer may publish to crates.io. This is controlled by those in the [crate publishers organization-level team](https://github.com/orgs/rust-rocksdb/teams/crate-publishers). * Releases should have an associated tag pushed to this repo. I recommend doing this after the publish to crates.io succeeds to prevent any mishaps around pushing a tag for something that can't actually be published. * The changelog serves as a sort of logical staging area for releases * If a breaking API change happens, and the changelog has not advanced to a new major version, we roll the changelog to a new major version and open an issue to release the previous patch (and post-1.0, minor) version. * Before rolling to a new major version, it would be nice to release a non-breaking point release to let current users silently take advantage of any improvements #### Becoming a Maintainer * If you have a history of participation in this repo, agree to these rules, and wish to take on maintainership responsibilities, you may open an issue. If an owner agrees, they will add you to the maintainer group and the crate publishers team. rocksdb-0.23.0/README.md000064400000000000000000000077421046102023000126260ustar 00000000000000# rust-rocksdb [![RocksDB build](https://github.com/rust-rocksdb/rust-rocksdb/actions/workflows/rust.yml/badge.svg?branch=master)](https://github.com/rust-rocksdb/rust-rocksdb/actions/workflows/rust.yml) [![crates.io](https://img.shields.io/crates/v/rocksdb.svg)](https://crates.io/crates/rocksdb) [![documentation](https://docs.rs/rocksdb/badge.svg)](https://docs.rs/rocksdb) [![license](https://img.shields.io/crates/l/rocksdb.svg)](https://github.com/rust-rocksdb/rust-rocksdb/blob/master/LICENSE) [![Gitter chat](https://badges.gitter.im/rust-rocksdb/gitter.svg)](https://gitter.im/rust-rocksdb/lobby) ![rust 1.71.1 required](https://img.shields.io/badge/rust-1.71.1-blue.svg?label=MSRV) ![GitHub commits (since latest release)](https://img.shields.io/github/commits-since/rust-rocksdb/rust-rocksdb/latest.svg) ## Requirements - Clang and LLVM ## Contributing Feedback and pull requests welcome! If a particular feature of RocksDB is important to you, please let me know by opening an issue, and I'll prioritize it. ## Usage This binding is statically linked with a specific version of RocksDB. If you want to build it yourself, make sure you've also cloned the RocksDB and compression submodules: ```shell git submodule update --init --recursive ``` ## Compression Support By default, support for [Snappy](https://github.com/google/snappy), [LZ4](https://github.com/lz4/lz4), [Zstd](https://github.com/facebook/zstd), [Zlib](https://zlib.net), and [Bzip2](http://www.bzip.org) compression is enabled through crate features. If support for all of these compression algorithms is not needed, default features can be disabled and specific compression algorithms can be enabled. For example, to enable only LZ4 compression support, make these changes to your Cargo.toml: ```toml [dependencies.rocksdb] default-features = false features = ["lz4"] ``` ## Multithreaded ColumnFamily alternation RocksDB allows column families to be created and dropped from multiple threads concurrently, but this crate doesn't allow it by default for compatibility. If you need to modify column families concurrently, enable the crate feature `multi-threaded-cf`, which makes this binding's data structures use `RwLock` by default. Alternatively, you can directly create `DBWithThreadMode` without enabling the crate feature. ## Switch between /MT or /MD run time library (Only for Windows) The feature `mt_static` will request the library to be built with [/MT](https://learn.microsoft.com/en-us/cpp/build/reference/md-mt-ld-use-run-time-library?view=msvc-170) flag, which results in library using the static version of the run-time library. *This can be useful in case there's a conflict in the dependency tree between different run-time versions.* ## Switch between static and dynamic linking for bindgen (features `bindgen-static` and `bindgen-runtime`) The feature `bindgen-runtime` will enable the `runtime` feature of bindgen, which dynamically links to libclang. This is suitable for most platforms, and is enabled by default. The feature `bindgen-static` will enable the `static` feature of bindgen, which statically links to libclang. This is suitable for musllinux platforms, such as Alpine linux. To build on Alpine linux for example, make these changes to your Cargo.toml: ```toml [dependencies.rocksdb] default-features = false features = ["bindgen-static", "snappy", "lz4", "zstd", "zlib", "bzip2"] ``` Notice that `runtime` and `static` features are mutually exclusive, and won't compile if both enabled. ## LTO Enable the `lto` feature to enable link-time optimization. It will compile rocksdb with `-flto` flag. This feature is disabled by default. > [!IMPORTANT] > You must use clang as `CC`. Eg. `CC=/usr/bin/clang CXX=/usr/bin/clang++`. Clang llvm version must be the same as the one used by rust compiler. > On the rust side you should use `RUSTFLAGS="-Clinker-plugin-lto -Clinker=clang -Clink-arg=-fuse-ld=lld"`. Check the [Rust documentation](https://doc.rust-lang.org/rustc/linker-plugin-lto.html) for more information. rocksdb-0.23.0/code-of-conduct.md000064400000000000000000000064211046102023000146330ustar 00000000000000# Contributor Covenant Code of Conduct ## Our Pledge In the interest of fostering an open and welcoming environment, we as contributors and maintainers pledge to making participation in our project and our community a harassment-free experience for everyone, regardless of age, body size, disability, ethnicity, sex characteristics, gender identity and expression, level of experience, education, socio-economic status, nationality, personal appearance, race, religion, or sexual identity and orientation. ## Our Standards Examples of behavior that contributes to creating a positive environment include: * Using welcoming and inclusive language * Being respectful of differing viewpoints and experiences * Gracefully accepting constructive criticism * Focusing on what is best for the community * Showing empathy towards other community members Examples of unacceptable behavior by participants include: * The use of sexualized language or imagery and unwelcome sexual attention or advances * Trolling, insulting/derogatory comments, and personal or political attacks * Public or private harassment * Publishing others' private information, such as a physical or electronic address, without explicit permission * Other conduct which could reasonably be considered inappropriate in a professional setting ## Our Responsibilities Project maintainers are responsible for clarifying the standards of acceptable behavior and are expected to take appropriate and fair corrective action in response to any instances of unacceptable behavior. Project maintainers have the right and responsibility to remove, edit, or reject comments, commits, code, wiki edits, issues, and other contributions that are not aligned to this Code of Conduct, or to ban temporarily or permanently any contributor for other behaviors that they deem inappropriate, threatening, offensive, or harmful. ## Scope This Code of Conduct applies both within project spaces and in public spaces when an individual is representing the project or its community. Examples of representing a project or community include using an official project e-mail address, posting via an official social media account, or acting as an appointed representative at an online or offline event. Representation of a project may be further defined and clarified by project maintainers. ## Enforcement Instances of abusive, harassing, or otherwise unacceptable behavior may be reported by contacting the project team at t@jujit.su. All complaints will be reviewed and investigated and will result in a response that is deemed necessary and appropriate to the circumstances. The project team is obligated to maintain confidentiality with regard to the reporter of an incident. Further details of specific enforcement policies may be posted separately. Project maintainers who do not follow or enforce the Code of Conduct in good faith may face temporary or permanent repercussions as determined by other members of the project's leadership. ## Attribution This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4, available at https://www.contributor-covenant.org/version/1/4/code-of-conduct.html [homepage]: https://www.contributor-covenant.org For answers to common questions about this code of conduct, see https://www.contributor-covenant.org/faq rocksdb-0.23.0/rust-toolchain.toml000064400000000000000000000000371046102023000152050ustar 00000000000000[toolchain] channel = "1.71.1" rocksdb-0.23.0/src/backup.rs000064400000000000000000000241331046102023000137420ustar 00000000000000// Copyright 2016 Alex Regueiro // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. // use crate::env::Env; use crate::{db::DBInner, ffi, ffi_util::to_cpath, DBCommon, Error, ThreadMode}; use libc::c_uchar; use std::ffi::CString; use std::path::Path; /// Represents information of a backup including timestamp of the backup /// and the size (please note that sum of all backups' sizes is bigger than the actual /// size of the backup directory because some data is shared by multiple backups). /// Backups are identified by their always-increasing IDs. pub struct BackupEngineInfo { /// Timestamp of the backup pub timestamp: i64, /// ID of the backup pub backup_id: u32, /// Size of the backup pub size: u64, /// Number of files related to the backup pub num_files: u32, } pub struct BackupEngine { inner: *mut ffi::rocksdb_backup_engine_t, _outlive: Env, } pub struct BackupEngineOptions { inner: *mut ffi::rocksdb_backup_engine_options_t, } pub struct RestoreOptions { inner: *mut ffi::rocksdb_restore_options_t, } // BackupEngine is a simple pointer wrapper, so it's safe to send to another thread // since the underlying RocksDB backup engine is thread-safe. unsafe impl Send for BackupEngine {} impl BackupEngine { /// Open a backup engine with the specified options and RocksDB Env. pub fn open(opts: &BackupEngineOptions, env: &Env) -> Result { let be: *mut ffi::rocksdb_backup_engine_t; unsafe { be = ffi_try!(ffi::rocksdb_backup_engine_open_opts( opts.inner, env.0.inner )); } if be.is_null() { return Err(Error::new("Could not initialize backup engine.".to_owned())); } Ok(Self { inner: be, _outlive: env.clone(), }) } /// Captures the state of the database in the latest backup. /// /// Note: no flush before backup is performed. User might want to /// use `create_new_backup_flush` instead. pub fn create_new_backup( &mut self, db: &DBCommon, ) -> Result<(), Error> { self.create_new_backup_flush(db, false) } /// Captures the state of the database in the latest backup. /// /// Set flush_before_backup=true to avoid losing unflushed key/value /// pairs from the memtable. pub fn create_new_backup_flush( &mut self, db: &DBCommon, flush_before_backup: bool, ) -> Result<(), Error> { unsafe { ffi_try!(ffi::rocksdb_backup_engine_create_new_backup_flush( self.inner, db.inner.inner(), c_uchar::from(flush_before_backup), )); Ok(()) } } pub fn purge_old_backups(&mut self, num_backups_to_keep: usize) -> Result<(), Error> { unsafe { ffi_try!(ffi::rocksdb_backup_engine_purge_old_backups( self.inner, num_backups_to_keep as u32, )); Ok(()) } } /// Restore from the latest backup /// /// # Arguments /// /// * `db_dir` - A path to the database directory /// * `wal_dir` - A path to the wal directory /// * `opts` - Restore options /// /// # Examples /// /// ```ignore /// use rocksdb::backup::{BackupEngine, BackupEngineOptions}; /// let backup_opts = BackupEngineOptions::default(); /// let mut backup_engine = BackupEngine::open(&backup_opts, &backup_path).unwrap(); /// let mut restore_option = rocksdb::backup::RestoreOptions::default(); /// restore_option.set_keep_log_files(true); /// true to keep log files /// if let Err(e) = backup_engine.restore_from_latest_backup(&db_path, &wal_dir, &restore_option) { /// error!("Failed to restore from the backup. Error:{:?}", e); /// return Err(e.to_string()); /// } /// ``` pub fn restore_from_latest_backup, W: AsRef>( &mut self, db_dir: D, wal_dir: W, opts: &RestoreOptions, ) -> Result<(), Error> { let c_db_dir = to_cpath(db_dir)?; let c_wal_dir = to_cpath(wal_dir)?; unsafe { ffi_try!(ffi::rocksdb_backup_engine_restore_db_from_latest_backup( self.inner, c_db_dir.as_ptr(), c_wal_dir.as_ptr(), opts.inner, )); } Ok(()) } /// Restore from a specified backup /// /// The specified backup id should be passed in as an additional parameter. pub fn restore_from_backup, W: AsRef>( &mut self, db_dir: D, wal_dir: W, opts: &RestoreOptions, backup_id: u32, ) -> Result<(), Error> { let c_db_dir = to_cpath(db_dir)?; let c_wal_dir = to_cpath(wal_dir)?; unsafe { ffi_try!(ffi::rocksdb_backup_engine_restore_db_from_backup( self.inner, c_db_dir.as_ptr(), c_wal_dir.as_ptr(), opts.inner, backup_id, )); } Ok(()) } /// Checks that each file exists and that the size of the file matches our /// expectations. it does not check file checksum. /// /// If this BackupEngine created the backup, it compares the files' current /// sizes against the number of bytes written to them during creation. /// Otherwise, it compares the files' current sizes against their sizes when /// the BackupEngine was opened. pub fn verify_backup(&self, backup_id: u32) -> Result<(), Error> { unsafe { ffi_try!(ffi::rocksdb_backup_engine_verify_backup( self.inner, backup_id, )); } Ok(()) } /// Get a list of all backups together with information on timestamp of the backup /// and the size (please note that sum of all backups' sizes is bigger than the actual /// size of the backup directory because some data is shared by multiple backups). /// Backups are identified by their always-increasing IDs. /// /// You can perform this function safely, even with other BackupEngine performing /// backups on the same directory pub fn get_backup_info(&self) -> Vec { unsafe { let i = ffi::rocksdb_backup_engine_get_backup_info(self.inner); let n = ffi::rocksdb_backup_engine_info_count(i); let mut info = Vec::with_capacity(n as usize); for index in 0..n { info.push(BackupEngineInfo { timestamp: ffi::rocksdb_backup_engine_info_timestamp(i, index), backup_id: ffi::rocksdb_backup_engine_info_backup_id(i, index), size: ffi::rocksdb_backup_engine_info_size(i, index), num_files: ffi::rocksdb_backup_engine_info_number_files(i, index), }); } // destroy backup info object ffi::rocksdb_backup_engine_info_destroy(i); info } } } impl BackupEngineOptions { /// Initializes `BackupEngineOptions` with the directory to be used for storing/accessing the /// backup files. pub fn new>(backup_dir: P) -> Result { let backup_dir = backup_dir.as_ref(); let c_backup_dir = CString::new(backup_dir.to_string_lossy().as_bytes()).map_err(|_| { Error::new( "Failed to convert backup_dir to CString \ when constructing BackupEngineOptions" .to_owned(), ) })?; unsafe { let opts = ffi::rocksdb_backup_engine_options_create(c_backup_dir.as_ptr()); assert!(!opts.is_null(), "Could not create RocksDB backup options"); Ok(Self { inner: opts }) } } /// Sets the number of operations (such as file copies or file checksums) that `RocksDB` may /// perform in parallel when executing a backup or restore. /// /// Default: 1 pub fn set_max_background_operations(&mut self, max_background_operations: i32) { unsafe { ffi::rocksdb_backup_engine_options_set_max_background_operations( self.inner, max_background_operations, ); } } } impl RestoreOptions { /// Sets `keep_log_files`. If true, restore won't overwrite the existing log files in wal_dir. /// It will also move all log files from archive directory to wal_dir. Use this option in /// combination with BackupEngineOptions::backup_log_files = false for persisting in-memory /// databases. /// /// Default: false pub fn set_keep_log_files(&mut self, keep_log_files: bool) { unsafe { ffi::rocksdb_restore_options_set_keep_log_files(self.inner, i32::from(keep_log_files)); } } } impl Default for RestoreOptions { fn default() -> Self { unsafe { let opts = ffi::rocksdb_restore_options_create(); assert!(!opts.is_null(), "Could not create RocksDB restore options"); Self { inner: opts } } } } impl Drop for BackupEngine { fn drop(&mut self) { unsafe { ffi::rocksdb_backup_engine_close(self.inner); } } } impl Drop for BackupEngineOptions { fn drop(&mut self) { unsafe { ffi::rocksdb_backup_engine_options_destroy(self.inner); } } } impl Drop for RestoreOptions { fn drop(&mut self) { unsafe { ffi::rocksdb_restore_options_destroy(self.inner); } } } rocksdb-0.23.0/src/checkpoint.rs000064400000000000000000000046341046102023000146300ustar 00000000000000// Copyright 2018 Eugene P. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. // //! Implementation of bindings to RocksDB Checkpoint[1] API //! //! [1]: https://github.com/facebook/rocksdb/wiki/Checkpoints use crate::{db::DBInner, ffi, ffi_util::to_cpath, DBCommon, Error, ThreadMode}; use std::{marker::PhantomData, path::Path}; /// Undocumented parameter for `ffi::rocksdb_checkpoint_create` function. Zero by default. const LOG_SIZE_FOR_FLUSH: u64 = 0_u64; /// Database's checkpoint object. /// Used to create checkpoints of the specified DB from time to time. pub struct Checkpoint<'db> { inner: *mut ffi::rocksdb_checkpoint_t, _db: PhantomData<&'db ()>, } impl<'db> Checkpoint<'db> { /// Creates new checkpoint object for specific DB. /// /// Does not actually produce checkpoints, call `.create_checkpoint()` method to produce /// a DB checkpoint. pub fn new(db: &'db DBCommon) -> Result { let checkpoint: *mut ffi::rocksdb_checkpoint_t; unsafe { checkpoint = ffi_try!(ffi::rocksdb_checkpoint_object_create(db.inner.inner())); } if checkpoint.is_null() { return Err(Error::new("Could not create checkpoint object.".to_owned())); } Ok(Self { inner: checkpoint, _db: PhantomData, }) } /// Creates new physical DB checkpoint in directory specified by `path`. pub fn create_checkpoint>(&self, path: P) -> Result<(), Error> { let cpath = to_cpath(path)?; unsafe { ffi_try!(ffi::rocksdb_checkpoint_create( self.inner, cpath.as_ptr(), LOG_SIZE_FOR_FLUSH, )); } Ok(()) } } impl<'db> Drop for Checkpoint<'db> { fn drop(&mut self) { unsafe { ffi::rocksdb_checkpoint_object_destroy(self.inner); } } } rocksdb-0.23.0/src/column_family.rs000064400000000000000000000151311046102023000153310ustar 00000000000000// Copyright 2020 Tyler Neely // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. use crate::{db::MultiThreaded, ffi, Options}; use std::sync::Arc; use std::time::Duration; /// The name of the default column family. /// /// The column family with this name is created implicitly whenever column /// families are used. pub const DEFAULT_COLUMN_FAMILY_NAME: &str = "default"; /// A descriptor for a RocksDB column family. /// /// A description of the column family, containing the name and `Options`. pub struct ColumnFamilyDescriptor { pub(crate) name: String, pub(crate) options: Options, pub(crate) ttl: ColumnFamilyTtl, } impl ColumnFamilyDescriptor { /// Create a new column family descriptor with the specified name and options. /// *WARNING*: /// Will use [`ColumnFamilyTtl::SameAsDb`] as ttl. pub fn new(name: S, options: Options) -> Self where S: Into, { Self { name: name.into(), options, ttl: ColumnFamilyTtl::SameAsDb, } } /// Create a new column family descriptor with the specified name, options, and ttl. /// *WARNING*: /// The ttl is applied only when DB is opened with [`crate::db::DB::open_with_ttl()`]. pub fn new_with_ttl(name: S, options: Options, ttl: ColumnFamilyTtl) -> Self where S: Into, { Self { name: name.into(), options, ttl, } } /// Sets ttl for the column family. It's applied only when DB is opened with /// [`crate::db::DB::open_with_ttl()`]. Changing ttl after DB is opened has no effect. pub fn set_ttl(&mut self, ttl: ColumnFamilyTtl) { self.ttl = ttl; } /// Get the name of the ColumnFamilyDescriptor. pub fn name(&self) -> &str { &self.name } pub fn ttl(&self) -> ColumnFamilyTtl { self.ttl } } #[derive(Debug, Clone, Copy, Default)] /// Specifies the TTL behavior for a column family. /// pub enum ColumnFamilyTtl { /// Will internally set TTL to -1 (disabled) #[default] Disabled, /// Will set ttl to the specified duration Duration(Duration), /// Will use ttl specified at db open time SameAsDb, } /// An opaque type used to represent a column family. Returned from some functions, and used /// in others pub struct ColumnFamily { pub(crate) inner: *mut ffi::rocksdb_column_family_handle_t, } /// A specialized opaque type used to represent a column family by the [`MultiThreaded`] /// mode. Clone (and Copy) is derived to behave like `&ColumnFamily` (this is used for /// single-threaded mode). `Clone`/`Copy` is safe because this lifetime is bound to DB like /// iterators/snapshots. On top of it, this is as cheap and small as `&ColumnFamily` because /// this only has a single pointer-wide field. pub struct BoundColumnFamily<'a> { pub(crate) inner: *mut ffi::rocksdb_column_family_handle_t, pub(crate) multi_threaded_cfs: std::marker::PhantomData<&'a MultiThreaded>, } // internal struct which isn't exposed to public api. // but its memory will be exposed after transmute()-ing to BoundColumnFamily. // ColumnFamily's lifetime should be bound to DB. But, db holds cfs and cfs can't easily // self-reference DB as its lifetime due to rust's type system pub(crate) struct UnboundColumnFamily { pub(crate) inner: *mut ffi::rocksdb_column_family_handle_t, } impl UnboundColumnFamily { pub(crate) fn bound_column_family<'a>(self: Arc) -> Arc> { // SAFETY: the new BoundColumnFamily here just adding lifetime, // so that column family handle won't outlive db. unsafe { Arc::from_raw(Arc::into_raw(self).cast()) } } } fn destroy_handle(handle: *mut ffi::rocksdb_column_family_handle_t) { // SAFETY: This should be called only from various Drop::drop(), strictly keeping a 1-to-1 // ownership to avoid double invocation to the rocksdb function with same handle. unsafe { ffi::rocksdb_column_family_handle_destroy(handle); } } impl Drop for ColumnFamily { fn drop(&mut self) { destroy_handle(self.inner); } } // these behaviors must be identical between BoundColumnFamily and UnboundColumnFamily // due to the unsafe transmute() in bound_column_family()! impl<'a> Drop for BoundColumnFamily<'a> { fn drop(&mut self) { destroy_handle(self.inner); } } impl Drop for UnboundColumnFamily { fn drop(&mut self) { destroy_handle(self.inner); } } /// Handy type alias to hide actual type difference to reference [`ColumnFamily`] /// depending on the `multi-threaded-cf` crate feature. #[cfg(not(feature = "multi-threaded-cf"))] pub type ColumnFamilyRef<'a> = &'a ColumnFamily; #[cfg(feature = "multi-threaded-cf")] pub type ColumnFamilyRef<'a> = Arc>; /// Utility trait to accept both supported references to `ColumnFamily` /// (`&ColumnFamily` and `BoundColumnFamily`) pub trait AsColumnFamilyRef { fn inner(&self) -> *mut ffi::rocksdb_column_family_handle_t; } impl AsColumnFamilyRef for ColumnFamily { fn inner(&self) -> *mut ffi::rocksdb_column_family_handle_t { self.inner } } impl<'a> AsColumnFamilyRef for &'a ColumnFamily { fn inner(&self) -> *mut ffi::rocksdb_column_family_handle_t { self.inner } } // Only implement for Arc-ed BoundColumnFamily as this tightly coupled and // implementation detail, considering use of std::mem::transmute. BoundColumnFamily // isn't expected to be used as naked. // Also, ColumnFamilyRef might not be Arc> depending crate // feature flags so, we can't use the type alias here. impl<'a> AsColumnFamilyRef for Arc> { fn inner(&self) -> *mut ffi::rocksdb_column_family_handle_t { self.inner } } unsafe impl Send for ColumnFamily {} unsafe impl Sync for ColumnFamily {} unsafe impl Send for UnboundColumnFamily {} unsafe impl Sync for UnboundColumnFamily {} unsafe impl<'a> Send for BoundColumnFamily<'a> {} unsafe impl<'a> Sync for BoundColumnFamily<'a> {} rocksdb-0.23.0/src/compaction_filter.rs000064400000000000000000000141561046102023000162020ustar 00000000000000// Copyright 2020 Tyler Neely // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. // use libc::{c_char, c_int, c_uchar, c_void, size_t}; use std::ffi::{CStr, CString}; use std::slice; /// Decision about how to handle compacting an object /// /// This is returned by a compaction filter callback. Depending /// on the value, the object may be kept, removed, or changed /// in the database during a compaction. pub enum Decision { /// Keep the old value Keep, /// Remove the object from the database Remove, /// Change the value for the key Change(&'static [u8]), } /// CompactionFilter allows an application to modify/delete a key-value at /// the time of compaction. pub trait CompactionFilter { /// The compaction process invokes this /// method for kv that is being compacted. The application can inspect /// the existing value of the key and make decision based on it. /// /// Key-Values that are results of merge operation during compaction are not /// passed into this function. Currently, when you have a mix of Put()s and /// Merge()s on a same key, we only guarantee to process the merge operands /// through the compaction filters. Put()s might be processed, or might not. /// /// When the value is to be preserved, the application has the option /// to modify the existing_value and pass it back through new_value. /// value_changed needs to be set to true in this case. /// /// Note that RocksDB snapshots (i.e. call GetSnapshot() API on a /// DB* object) will not guarantee to preserve the state of the DB with /// CompactionFilter. Data seen from a snapshot might disappear after a /// compaction finishes. If you use snapshots, think twice about whether you /// want to use compaction filter and whether you are using it in a safe way. /// /// If the CompactionFilter was created by a factory, then it will only ever /// be used by a single thread that is doing the compaction run, and this /// call does not need to be thread-safe. However, multiple filters may be /// in existence and operating concurrently. fn filter(&mut self, level: u32, key: &[u8], value: &[u8]) -> Decision; /// Returns a name that identifies this compaction filter. /// The name will be printed to LOG file on start up for diagnosis. fn name(&self) -> &CStr; } /// Function to filter compaction with. /// /// This function takes the level of compaction, the key, and the existing value /// and returns the decision about how to handle the Key-Value pair. /// /// See [Options::set_compaction_filter][set_compaction_filter] for more details /// /// [set_compaction_filter]: ../struct.Options.html#method.set_compaction_filter pub trait CompactionFilterFn: FnMut(u32, &[u8], &[u8]) -> Decision {} impl CompactionFilterFn for F where F: FnMut(u32, &[u8], &[u8]) -> Decision + Send + 'static {} pub struct CompactionFilterCallback where F: CompactionFilterFn, { pub name: CString, pub filter_fn: F, } impl CompactionFilter for CompactionFilterCallback where F: CompactionFilterFn, { fn name(&self) -> &CStr { self.name.as_c_str() } fn filter(&mut self, level: u32, key: &[u8], value: &[u8]) -> Decision { (self.filter_fn)(level, key, value) } } pub unsafe extern "C" fn destructor_callback(raw_cb: *mut c_void) where F: CompactionFilter, { drop(Box::from_raw(raw_cb as *mut F)); } pub unsafe extern "C" fn name_callback(raw_cb: *mut c_void) -> *const c_char where F: CompactionFilter, { let cb = &*(raw_cb as *mut F); cb.name().as_ptr() } pub unsafe extern "C" fn filter_callback( raw_cb: *mut c_void, level: c_int, raw_key: *const c_char, key_length: size_t, existing_value: *const c_char, value_length: size_t, new_value: *mut *mut c_char, new_value_length: *mut size_t, value_changed: *mut c_uchar, ) -> c_uchar where F: CompactionFilter, { use self::Decision::{Change, Keep, Remove}; let cb = &mut *(raw_cb as *mut F); let key = slice::from_raw_parts(raw_key as *const u8, key_length); let oldval = slice::from_raw_parts(existing_value as *const u8, value_length); let result = cb.filter(level as u32, key, oldval); match result { Keep => 0, Remove => 1, Change(newval) => { *new_value = newval.as_ptr() as *mut c_char; *new_value_length = newval.len() as size_t; *value_changed = 1_u8; 0 } } } #[cfg(test)] #[allow(unused_variables)] fn test_filter(level: u32, key: &[u8], value: &[u8]) -> Decision { use self::Decision::{Change, Keep, Remove}; match key.first() { Some(&b'_') => Remove, Some(&b'%') => Change(b"secret"), _ => Keep, } } #[test] fn compaction_filter_test() { use crate::{Options, DB}; let tempdir = tempfile::Builder::new() .prefix("_rust_rocksdb_filter_test") .tempdir() .expect("Failed to create temporary path for the _rust_rocksdb_filter_test"); let path = tempdir.path(); let mut opts = Options::default(); opts.create_if_missing(true); opts.set_compaction_filter("test", test_filter); { let db = DB::open(&opts, path).unwrap(); let _r = db.put(b"k1", b"a"); let _r = db.put(b"_k", b"b"); let _r = db.put(b"%k", b"c"); db.compact_range(None::<&[u8]>, None::<&[u8]>); assert_eq!(&*db.get(b"k1").unwrap().unwrap(), b"a"); assert!(db.get(b"_k").unwrap().is_none()); assert_eq!(&*db.get(b"%k").unwrap().unwrap(), b"secret"); } let result = DB::destroy(&opts, path); assert!(result.is_ok()); } rocksdb-0.23.0/src/compaction_filter_factory.rs000064400000000000000000000107131046102023000177240ustar 00000000000000use std::ffi::CStr; use libc::{self, c_char, c_void}; use crate::{ compaction_filter::{self, CompactionFilter}, ffi, }; /// Each compaction will create a new CompactionFilter allowing the /// application to know about different compactions. /// /// See [compaction_filter::CompactionFilter][CompactionFilter] and /// [Options::set_compaction_filter_factory][set_compaction_filter_factory] /// for more details /// /// [CompactionFilter]: ../compaction_filter/trait.CompactionFilter.html /// [set_compaction_filter_factory]: ../struct.Options.html#method.set_compaction_filter_factory pub trait CompactionFilterFactory { type Filter: CompactionFilter; /// Returns a CompactionFilter for the compaction process fn create(&mut self, context: CompactionFilterContext) -> Self::Filter; /// Returns a name that identifies this compaction filter factory. fn name(&self) -> &CStr; } pub unsafe extern "C" fn destructor_callback(raw_self: *mut c_void) where F: CompactionFilterFactory, { drop(Box::from_raw(raw_self as *mut F)); } pub unsafe extern "C" fn name_callback(raw_self: *mut c_void) -> *const c_char where F: CompactionFilterFactory, { let self_ = &*(raw_self.cast_const() as *const F); self_.name().as_ptr() } /// Context information of a compaction run pub struct CompactionFilterContext { /// Does this compaction run include all data files pub is_full_compaction: bool, /// Is this compaction requested by the client (true), /// or is it occurring as an automatic compaction process pub is_manual_compaction: bool, } impl CompactionFilterContext { unsafe fn from_raw(ptr: *mut ffi::rocksdb_compactionfiltercontext_t) -> Self { let is_full_compaction = ffi::rocksdb_compactionfiltercontext_is_full_compaction(ptr) != 0; let is_manual_compaction = ffi::rocksdb_compactionfiltercontext_is_manual_compaction(ptr) != 0; Self { is_full_compaction, is_manual_compaction, } } } pub unsafe extern "C" fn create_compaction_filter_callback( raw_self: *mut c_void, context: *mut ffi::rocksdb_compactionfiltercontext_t, ) -> *mut ffi::rocksdb_compactionfilter_t where F: CompactionFilterFactory, { let self_ = &mut *(raw_self as *mut F); let context = CompactionFilterContext::from_raw(context); let filter = Box::new(self_.create(context)); let filter_ptr = Box::into_raw(filter); ffi::rocksdb_compactionfilter_create( filter_ptr as *mut c_void, Some(compaction_filter::destructor_callback::), Some(compaction_filter::filter_callback::), Some(compaction_filter::name_callback::), ) } #[cfg(test)] mod tests { use super::*; use crate::compaction_filter::Decision; use crate::{Options, DB}; use std::ffi::CString; struct CountFilter(u16, CString); impl CompactionFilter for CountFilter { fn filter(&mut self, _level: u32, _key: &[u8], _value: &[u8]) -> crate::CompactionDecision { self.0 += 1; if self.0 > 2 { Decision::Remove } else { Decision::Keep } } fn name(&self) -> &CStr { &self.1 } } struct TestFactory(CString); impl CompactionFilterFactory for TestFactory { type Filter = CountFilter; fn create(&mut self, _context: CompactionFilterContext) -> Self::Filter { CountFilter(0, CString::new("CountFilter").unwrap()) } fn name(&self) -> &CStr { &self.0 } } #[test] fn compaction_filter_factory_test() { let tempdir = tempfile::Builder::new() .prefix("_rust_rocksdb_filter_factory_test") .tempdir() .expect("Failed to create temporary path for the _rust_rocksdb_filter_factory_test."); let path = tempdir.path(); let mut opts = Options::default(); opts.create_if_missing(true); opts.set_compaction_filter_factory(TestFactory(CString::new("TestFactory").unwrap())); { let db = DB::open(&opts, path).unwrap(); let _r = db.put(b"k1", b"a"); let _r = db.put(b"_rk", b"b"); let _r = db.put(b"%k", b"c"); db.compact_range(None::<&[u8]>, None::<&[u8]>); assert_eq!(db.get(b"%k1").unwrap(), None); } let result = DB::destroy(&opts, path); assert!(result.is_ok()); } } rocksdb-0.23.0/src/comparator.rs000064400000000000000000000075571046102023000146570ustar 00000000000000// Copyright 2020 Tyler Neely // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. // use libc::{c_char, c_int, c_uchar, c_void, size_t}; use std::cmp::Ordering; use std::ffi::CString; use std::slice; pub type CompareFn = dyn Fn(&[u8], &[u8]) -> Ordering; pub type CompareTsFn = dyn Fn(&[u8], &[u8]) -> Ordering; pub type CompareWithoutTsFn = dyn Fn(&[u8], bool, &[u8], bool) -> Ordering; pub struct ComparatorCallback { pub name: CString, pub compare_fn: Box, } impl ComparatorCallback { pub unsafe extern "C" fn destructor_callback(raw_cb: *mut c_void) { drop(Box::from_raw(raw_cb as *mut Self)); } pub unsafe extern "C" fn name_callback(raw_cb: *mut c_void) -> *const c_char { let cb: &mut Self = &mut *(raw_cb as *mut Self); let ptr = cb.name.as_ptr(); ptr as *const c_char } pub unsafe extern "C" fn compare_callback( raw_cb: *mut c_void, a_raw: *const c_char, a_len: size_t, b_raw: *const c_char, b_len: size_t, ) -> c_int { let cb: &mut Self = &mut *(raw_cb as *mut Self); let a: &[u8] = slice::from_raw_parts(a_raw as *const u8, a_len); let b: &[u8] = slice::from_raw_parts(b_raw as *const u8, b_len); (cb.compare_fn)(a, b) as c_int } } pub struct ComparatorWithTsCallback { pub name: CString, pub compare_fn: Box, pub compare_ts_fn: Box, pub compare_without_ts_fn: Box, } impl ComparatorWithTsCallback { pub unsafe extern "C" fn destructor_callback(raw_cb: *mut c_void) { drop(Box::from_raw(raw_cb as *mut Self)); } pub unsafe extern "C" fn name_callback(raw_cb: *mut c_void) -> *const c_char { let cb: &mut Self = &mut *(raw_cb as *mut Self); let ptr = cb.name.as_ptr(); ptr as *const c_char } pub unsafe extern "C" fn compare_callback( raw_cb: *mut c_void, a_raw: *const c_char, a_len: size_t, b_raw: *const c_char, b_len: size_t, ) -> c_int { let cb: &mut Self = &mut *(raw_cb as *mut Self); let a: &[u8] = slice::from_raw_parts(a_raw as *const u8, a_len); let b: &[u8] = slice::from_raw_parts(b_raw as *const u8, b_len); (cb.compare_fn)(a, b) as c_int } pub unsafe extern "C" fn compare_ts_callback( raw_cb: *mut c_void, a_ts_raw: *const c_char, a_ts_len: size_t, b_ts_raw: *const c_char, b_ts_len: size_t, ) -> c_int { let cb: &mut Self = &mut *(raw_cb as *mut Self); let a_ts: &[u8] = slice::from_raw_parts(a_ts_raw as *const u8, a_ts_len); let b_ts: &[u8] = slice::from_raw_parts(b_ts_raw as *const u8, b_ts_len); (cb.compare_ts_fn)(a_ts, b_ts) as c_int } pub unsafe extern "C" fn compare_without_ts_callback( raw_cb: *mut c_void, a_raw: *const c_char, a_len: size_t, a_has_ts_raw: c_uchar, b_raw: *const c_char, b_len: size_t, b_has_ts_raw: c_uchar, ) -> c_int { let cb: &mut Self = &mut *(raw_cb as *mut Self); let a: &[u8] = slice::from_raw_parts(a_raw as *const u8, a_len); let a_has_ts = a_has_ts_raw != 0; let b: &[u8] = slice::from_raw_parts(b_raw as *const u8, b_len); let b_has_ts = b_has_ts_raw != 0; (cb.compare_without_ts_fn)(a, a_has_ts, b, b_has_ts) as c_int } } rocksdb-0.23.0/src/db.rs000064400000000000000000002604741046102023000130740ustar 00000000000000// Copyright 2020 Tyler Neely // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. // use crate::{ column_family::AsColumnFamilyRef, column_family::BoundColumnFamily, column_family::UnboundColumnFamily, db_options::OptionsMustOutliveDB, ffi, ffi_util::{from_cstr, opt_bytes_to_ptr, raw_data, to_cpath, CStrLike}, ColumnFamily, ColumnFamilyDescriptor, CompactOptions, DBIteratorWithThreadMode, DBPinnableSlice, DBRawIteratorWithThreadMode, DBWALIterator, Direction, Error, FlushOptions, IngestExternalFileOptions, IteratorMode, Options, ReadOptions, SnapshotWithThreadMode, WaitForCompactOptions, WriteBatch, WriteOptions, DEFAULT_COLUMN_FAMILY_NAME, }; use crate::column_family::ColumnFamilyTtl; use crate::ffi_util::CSlice; use libc::{self, c_char, c_int, c_uchar, c_void, size_t}; use std::collections::BTreeMap; use std::ffi::{CStr, CString}; use std::fmt; use std::fs; use std::iter; use std::path::Path; use std::path::PathBuf; use std::ptr; use std::slice; use std::str; use std::sync::Arc; use std::sync::RwLock; use std::time::Duration; /// Marker trait to specify single or multi threaded column family alternations for /// [`DBWithThreadMode`] /// /// This arrangement makes differences in self mutability and return type in /// some of `DBWithThreadMode` methods. /// /// While being a marker trait to be generic over `DBWithThreadMode`, this trait /// also has a minimum set of not-encapsulated internal methods between /// [`SingleThreaded`] and [`MultiThreaded`]. These methods aren't expected to be /// called and defined externally. pub trait ThreadMode { /// Internal implementation for storing column family handles fn new_cf_map_internal( cf_map: BTreeMap, ) -> Self; /// Internal implementation for dropping column family handles fn drop_all_cfs_internal(&mut self); } /// Actual marker type for the marker trait `ThreadMode`, which holds /// a collection of column families without synchronization primitive, providing /// no overhead for the single-threaded column family alternations. The other /// mode is [`MultiThreaded`]. /// /// See [`DB`] for more details, including performance implications for each mode pub struct SingleThreaded { pub(crate) cfs: BTreeMap, } /// Actual marker type for the marker trait `ThreadMode`, which holds /// a collection of column families wrapped in a RwLock to be mutated /// concurrently. The other mode is [`SingleThreaded`]. /// /// See [`DB`] for more details, including performance implications for each mode pub struct MultiThreaded { pub(crate) cfs: RwLock>>, } impl ThreadMode for SingleThreaded { fn new_cf_map_internal( cfs: BTreeMap, ) -> Self { Self { cfs: cfs .into_iter() .map(|(n, c)| (n, ColumnFamily { inner: c })) .collect(), } } fn drop_all_cfs_internal(&mut self) { // Cause all ColumnFamily objects to be Drop::drop()-ed. self.cfs.clear(); } } impl ThreadMode for MultiThreaded { fn new_cf_map_internal( cfs: BTreeMap, ) -> Self { Self { cfs: RwLock::new( cfs.into_iter() .map(|(n, c)| (n, Arc::new(UnboundColumnFamily { inner: c }))) .collect(), ), } } fn drop_all_cfs_internal(&mut self) { // Cause all UnboundColumnFamily objects to be Drop::drop()-ed. self.cfs.write().unwrap().clear(); } } /// Get underlying `rocksdb_t`. pub trait DBInner { fn inner(&self) -> *mut ffi::rocksdb_t; } /// A helper type to implement some common methods for [`DBWithThreadMode`] /// and [`OptimisticTransactionDB`]. /// /// [`OptimisticTransactionDB`]: crate::OptimisticTransactionDB pub struct DBCommon { pub(crate) inner: D, cfs: T, // Column families are held differently depending on thread mode path: PathBuf, _outlive: Vec, } /// Minimal set of DB-related methods, intended to be generic over /// `DBWithThreadMode`. Mainly used internally pub trait DBAccess { unsafe fn create_snapshot(&self) -> *const ffi::rocksdb_snapshot_t; unsafe fn release_snapshot(&self, snapshot: *const ffi::rocksdb_snapshot_t); unsafe fn create_iterator(&self, readopts: &ReadOptions) -> *mut ffi::rocksdb_iterator_t; unsafe fn create_iterator_cf( &self, cf_handle: *mut ffi::rocksdb_column_family_handle_t, readopts: &ReadOptions, ) -> *mut ffi::rocksdb_iterator_t; fn get_opt>( &self, key: K, readopts: &ReadOptions, ) -> Result>, Error>; fn get_cf_opt>( &self, cf: &impl AsColumnFamilyRef, key: K, readopts: &ReadOptions, ) -> Result>, Error>; fn get_pinned_opt>( &self, key: K, readopts: &ReadOptions, ) -> Result, Error>; fn get_pinned_cf_opt>( &self, cf: &impl AsColumnFamilyRef, key: K, readopts: &ReadOptions, ) -> Result, Error>; fn multi_get_opt( &self, keys: I, readopts: &ReadOptions, ) -> Vec>, Error>> where K: AsRef<[u8]>, I: IntoIterator; fn multi_get_cf_opt<'b, K, I, W>( &self, keys_cf: I, readopts: &ReadOptions, ) -> Vec>, Error>> where K: AsRef<[u8]>, I: IntoIterator, W: AsColumnFamilyRef + 'b; } impl DBAccess for DBCommon { unsafe fn create_snapshot(&self) -> *const ffi::rocksdb_snapshot_t { ffi::rocksdb_create_snapshot(self.inner.inner()) } unsafe fn release_snapshot(&self, snapshot: *const ffi::rocksdb_snapshot_t) { ffi::rocksdb_release_snapshot(self.inner.inner(), snapshot); } unsafe fn create_iterator(&self, readopts: &ReadOptions) -> *mut ffi::rocksdb_iterator_t { ffi::rocksdb_create_iterator(self.inner.inner(), readopts.inner) } unsafe fn create_iterator_cf( &self, cf_handle: *mut ffi::rocksdb_column_family_handle_t, readopts: &ReadOptions, ) -> *mut ffi::rocksdb_iterator_t { ffi::rocksdb_create_iterator_cf(self.inner.inner(), readopts.inner, cf_handle) } fn get_opt>( &self, key: K, readopts: &ReadOptions, ) -> Result>, Error> { self.get_opt(key, readopts) } fn get_cf_opt>( &self, cf: &impl AsColumnFamilyRef, key: K, readopts: &ReadOptions, ) -> Result>, Error> { self.get_cf_opt(cf, key, readopts) } fn get_pinned_opt>( &self, key: K, readopts: &ReadOptions, ) -> Result, Error> { self.get_pinned_opt(key, readopts) } fn get_pinned_cf_opt>( &self, cf: &impl AsColumnFamilyRef, key: K, readopts: &ReadOptions, ) -> Result, Error> { self.get_pinned_cf_opt(cf, key, readopts) } fn multi_get_opt( &self, keys: Iter, readopts: &ReadOptions, ) -> Vec>, Error>> where K: AsRef<[u8]>, Iter: IntoIterator, { self.multi_get_opt(keys, readopts) } fn multi_get_cf_opt<'b, K, Iter, W>( &self, keys_cf: Iter, readopts: &ReadOptions, ) -> Vec>, Error>> where K: AsRef<[u8]>, Iter: IntoIterator, W: AsColumnFamilyRef + 'b, { self.multi_get_cf_opt(keys_cf, readopts) } } pub struct DBWithThreadModeInner { inner: *mut ffi::rocksdb_t, } impl DBInner for DBWithThreadModeInner { fn inner(&self) -> *mut ffi::rocksdb_t { self.inner } } impl Drop for DBWithThreadModeInner { fn drop(&mut self) { unsafe { ffi::rocksdb_close(self.inner); } } } /// A type alias to RocksDB database. /// /// See crate level documentation for a simple usage example. /// See [`DBCommon`] for full list of methods. pub type DBWithThreadMode = DBCommon; /// A type alias to DB instance type with the single-threaded column family /// creations/deletions /// /// # Compatibility and multi-threaded mode /// /// Previously, [`DB`] was defined as a direct `struct`. Now, it's type-aliased for /// compatibility. Use `DBCommon` for multi-threaded /// column family alternations. /// /// # Limited performance implication for single-threaded mode /// /// Even with [`SingleThreaded`], almost all of RocksDB operations is /// multi-threaded unless the underlying RocksDB instance is /// specifically configured otherwise. `SingleThreaded` only forces /// serialization of column family alternations by requiring `&mut self` of DB /// instance due to its wrapper implementation details. /// /// # Multi-threaded mode /// /// [`MultiThreaded`] can be appropriate for the situation of multi-threaded /// workload including multi-threaded column family alternations, costing the /// RwLock overhead inside `DB`. #[cfg(not(feature = "multi-threaded-cf"))] pub type DB = DBWithThreadMode; #[cfg(feature = "multi-threaded-cf")] pub type DB = DBWithThreadMode; // Safety note: auto-implementing Send on most db-related types is prevented by the inner FFI // pointer. In most cases, however, this pointer is Send-safe because it is never aliased and // rocksdb internally does not rely on thread-local information for its user-exposed types. unsafe impl Send for DBCommon {} // Sync is similarly safe for many types because they do not expose interior mutability, and their // use within the rocksdb library is generally behind a const reference unsafe impl Sync for DBCommon {} // Specifies whether open DB for read only. enum AccessType<'a> { ReadWrite, ReadOnly { error_if_log_file_exist: bool }, Secondary { secondary_path: &'a Path }, WithTTL { ttl: Duration }, } /// Methods of `DBWithThreadMode`. impl DBWithThreadMode { /// Opens a database with default options. pub fn open_default>(path: P) -> Result { let mut opts = Options::default(); opts.create_if_missing(true); Self::open(&opts, path) } /// Opens the database with the specified options. pub fn open>(opts: &Options, path: P) -> Result { Self::open_cf(opts, path, None::<&str>) } /// Opens the database for read only with the specified options. pub fn open_for_read_only>( opts: &Options, path: P, error_if_log_file_exist: bool, ) -> Result { Self::open_cf_for_read_only(opts, path, None::<&str>, error_if_log_file_exist) } /// Opens the database as a secondary. pub fn open_as_secondary>( opts: &Options, primary_path: P, secondary_path: P, ) -> Result { Self::open_cf_as_secondary(opts, primary_path, secondary_path, None::<&str>) } /// Opens the database with a Time to Live compaction filter. /// /// This applies the given `ttl` to all column families created without an explicit TTL. /// See [`DB::open_cf_descriptors_with_ttl`] for more control over individual column family TTLs. pub fn open_with_ttl>( opts: &Options, path: P, ttl: Duration, ) -> Result { Self::open_cf_descriptors_with_ttl(opts, path, std::iter::empty(), ttl) } /// Opens the database with a Time to Live compaction filter and column family names. /// /// Column families opened using this function will be created with default `Options`. pub fn open_cf_with_ttl( opts: &Options, path: P, cfs: I, ttl: Duration, ) -> Result where P: AsRef, I: IntoIterator, N: AsRef, { let cfs = cfs .into_iter() .map(|name| ColumnFamilyDescriptor::new(name.as_ref(), Options::default())); Self::open_cf_descriptors_with_ttl(opts, path, cfs, ttl) } /// Opens a database with the given database with a Time to Live compaction filter and /// column family descriptors. /// /// Applies the provided `ttl` as the default TTL for all column families. /// Column families will inherit this TTL by default, unless their descriptor explicitly /// sets a different TTL using [`ColumnFamilyTtl::Duration`] or opts out using [`ColumnFamilyTtl::Disabled`]. /// /// *NOTE*: The `default` column family is opened with `Options::default()` unless /// explicitly configured within the `cfs` iterator. /// To customize the `default` column family's options, include a `ColumnFamilyDescriptor` /// with the name "default" in the `cfs` iterator. /// /// If you want to open `default` cf with different options, set them explicitly in `cfs`. pub fn open_cf_descriptors_with_ttl( opts: &Options, path: P, cfs: I, ttl: Duration, ) -> Result where P: AsRef, I: IntoIterator, { Self::open_cf_descriptors_internal(opts, path, cfs, &AccessType::WithTTL { ttl }) } /// Opens a database with the given database options and column family names. /// /// Column families opened using this function will be created with default `Options`. pub fn open_cf(opts: &Options, path: P, cfs: I) -> Result where P: AsRef, I: IntoIterator, N: AsRef, { let cfs = cfs .into_iter() .map(|name| ColumnFamilyDescriptor::new(name.as_ref(), Options::default())); Self::open_cf_descriptors_internal(opts, path, cfs, &AccessType::ReadWrite) } /// Opens a database with the given database options and column family names. /// /// Column families opened using given `Options`. pub fn open_cf_with_opts(opts: &Options, path: P, cfs: I) -> Result where P: AsRef, I: IntoIterator, N: AsRef, { let cfs = cfs .into_iter() .map(|(name, opts)| ColumnFamilyDescriptor::new(name.as_ref(), opts)); Self::open_cf_descriptors(opts, path, cfs) } /// Opens a database for read only with the given database options and column family names. /// *NOTE*: `default` column family is opened with `Options::default()`. /// If you want to open `default` cf with different options, set them explicitly in `cfs`. pub fn open_cf_for_read_only( opts: &Options, path: P, cfs: I, error_if_log_file_exist: bool, ) -> Result where P: AsRef, I: IntoIterator, N: AsRef, { let cfs = cfs .into_iter() .map(|name| ColumnFamilyDescriptor::new(name.as_ref(), Options::default())); Self::open_cf_descriptors_internal( opts, path, cfs, &AccessType::ReadOnly { error_if_log_file_exist, }, ) } /// Opens a database for read only with the given database options and column family names. /// *NOTE*: `default` column family is opened with `Options::default()`. /// If you want to open `default` cf with different options, set them explicitly in `cfs`. pub fn open_cf_with_opts_for_read_only( db_opts: &Options, path: P, cfs: I, error_if_log_file_exist: bool, ) -> Result where P: AsRef, I: IntoIterator, N: AsRef, { let cfs = cfs .into_iter() .map(|(name, cf_opts)| ColumnFamilyDescriptor::new(name.as_ref(), cf_opts)); Self::open_cf_descriptors_internal( db_opts, path, cfs, &AccessType::ReadOnly { error_if_log_file_exist, }, ) } /// Opens a database for ready only with the given database options and /// column family descriptors. /// *NOTE*: `default` column family is opened with `Options::default()`. /// If you want to open `default` cf with different options, set them explicitly in `cfs`. pub fn open_cf_descriptors_read_only( opts: &Options, path: P, cfs: I, error_if_log_file_exist: bool, ) -> Result where P: AsRef, I: IntoIterator, { Self::open_cf_descriptors_internal( opts, path, cfs, &AccessType::ReadOnly { error_if_log_file_exist, }, ) } /// Opens the database as a secondary with the given database options and column family names. /// *NOTE*: `default` column family is opened with `Options::default()`. /// If you want to open `default` cf with different options, set them explicitly in `cfs`. pub fn open_cf_as_secondary( opts: &Options, primary_path: P, secondary_path: P, cfs: I, ) -> Result where P: AsRef, I: IntoIterator, N: AsRef, { let cfs = cfs .into_iter() .map(|name| ColumnFamilyDescriptor::new(name.as_ref(), Options::default())); Self::open_cf_descriptors_internal( opts, primary_path, cfs, &AccessType::Secondary { secondary_path: secondary_path.as_ref(), }, ) } /// Opens the database as a secondary with the given database options and /// column family descriptors. /// *NOTE*: `default` column family is opened with `Options::default()`. /// If you want to open `default` cf with different options, set them explicitly in `cfs`. pub fn open_cf_descriptors_as_secondary( opts: &Options, path: P, secondary_path: P, cfs: I, ) -> Result where P: AsRef, I: IntoIterator, { Self::open_cf_descriptors_internal( opts, path, cfs, &AccessType::Secondary { secondary_path: secondary_path.as_ref(), }, ) } /// Opens a database with the given database options and column family descriptors. /// *NOTE*: `default` column family is opened with `Options::default()`. /// If you want to open `default` cf with different options, set them explicitly in `cfs`. pub fn open_cf_descriptors(opts: &Options, path: P, cfs: I) -> Result where P: AsRef, I: IntoIterator, { Self::open_cf_descriptors_internal(opts, path, cfs, &AccessType::ReadWrite) } /// Internal implementation for opening RocksDB. fn open_cf_descriptors_internal( opts: &Options, path: P, cfs: I, access_type: &AccessType, ) -> Result where P: AsRef, I: IntoIterator, { let cfs: Vec<_> = cfs.into_iter().collect(); let outlive = iter::once(opts.outlive.clone()) .chain(cfs.iter().map(|cf| cf.options.outlive.clone())) .collect(); let cpath = to_cpath(&path)?; if let Err(e) = fs::create_dir_all(&path) { return Err(Error::new(format!( "Failed to create RocksDB directory: `{e:?}`." ))); } let db: *mut ffi::rocksdb_t; let mut cf_map = BTreeMap::new(); if cfs.is_empty() { db = Self::open_raw(opts, &cpath, access_type)?; } else { let mut cfs_v = cfs; // Always open the default column family. if !cfs_v.iter().any(|cf| cf.name == DEFAULT_COLUMN_FAMILY_NAME) { cfs_v.push(ColumnFamilyDescriptor { name: String::from(DEFAULT_COLUMN_FAMILY_NAME), options: Options::default(), ttl: ColumnFamilyTtl::SameAsDb, }); } // We need to store our CStrings in an intermediate vector // so that their pointers remain valid. let c_cfs: Vec = cfs_v .iter() .map(|cf| CString::new(cf.name.as_bytes()).unwrap()) .collect(); let cfnames: Vec<_> = c_cfs.iter().map(|cf| cf.as_ptr()).collect(); // These handles will be populated by DB. let mut cfhandles: Vec<_> = cfs_v.iter().map(|_| ptr::null_mut()).collect(); let cfopts: Vec<_> = cfs_v .iter() .map(|cf| cf.options.inner.cast_const()) .collect(); db = Self::open_cf_raw( opts, &cpath, &cfs_v, &cfnames, &cfopts, &mut cfhandles, access_type, )?; for handle in &cfhandles { if handle.is_null() { return Err(Error::new( "Received null column family handle from DB.".to_owned(), )); } } for (cf_desc, inner) in cfs_v.iter().zip(cfhandles) { cf_map.insert(cf_desc.name.clone(), inner); } } if db.is_null() { return Err(Error::new("Could not initialize database.".to_owned())); } Ok(Self { inner: DBWithThreadModeInner { inner: db }, path: path.as_ref().to_path_buf(), cfs: T::new_cf_map_internal(cf_map), _outlive: outlive, }) } fn open_raw( opts: &Options, cpath: &CString, access_type: &AccessType, ) -> Result<*mut ffi::rocksdb_t, Error> { let db = unsafe { match *access_type { AccessType::ReadOnly { error_if_log_file_exist, } => ffi_try!(ffi::rocksdb_open_for_read_only( opts.inner, cpath.as_ptr(), c_uchar::from(error_if_log_file_exist), )), AccessType::ReadWrite => { ffi_try!(ffi::rocksdb_open(opts.inner, cpath.as_ptr())) } AccessType::Secondary { secondary_path } => { ffi_try!(ffi::rocksdb_open_as_secondary( opts.inner, cpath.as_ptr(), to_cpath(secondary_path)?.as_ptr(), )) } AccessType::WithTTL { ttl } => ffi_try!(ffi::rocksdb_open_with_ttl( opts.inner, cpath.as_ptr(), ttl.as_secs() as c_int, )), } }; Ok(db) } #[allow(clippy::pedantic)] fn open_cf_raw( opts: &Options, cpath: &CString, cfs_v: &[ColumnFamilyDescriptor], cfnames: &[*const c_char], cfopts: &[*const ffi::rocksdb_options_t], cfhandles: &mut [*mut ffi::rocksdb_column_family_handle_t], access_type: &AccessType, ) -> Result<*mut ffi::rocksdb_t, Error> { let db = unsafe { match *access_type { AccessType::ReadOnly { error_if_log_file_exist, } => ffi_try!(ffi::rocksdb_open_for_read_only_column_families( opts.inner, cpath.as_ptr(), cfs_v.len() as c_int, cfnames.as_ptr(), cfopts.as_ptr(), cfhandles.as_mut_ptr(), c_uchar::from(error_if_log_file_exist), )), AccessType::ReadWrite => ffi_try!(ffi::rocksdb_open_column_families( opts.inner, cpath.as_ptr(), cfs_v.len() as c_int, cfnames.as_ptr(), cfopts.as_ptr(), cfhandles.as_mut_ptr(), )), AccessType::Secondary { secondary_path } => { ffi_try!(ffi::rocksdb_open_as_secondary_column_families( opts.inner, cpath.as_ptr(), to_cpath(secondary_path)?.as_ptr(), cfs_v.len() as c_int, cfnames.as_ptr(), cfopts.as_ptr(), cfhandles.as_mut_ptr(), )) } AccessType::WithTTL { ttl } => { let ttls: Vec<_> = cfs_v .iter() .map(|cf| match cf.ttl { ColumnFamilyTtl::Disabled => i32::MAX, ColumnFamilyTtl::Duration(duration) => duration.as_secs() as i32, ColumnFamilyTtl::SameAsDb => ttl.as_secs() as i32, }) .collect(); ffi_try!(ffi::rocksdb_open_column_families_with_ttl( opts.inner, cpath.as_ptr(), cfs_v.len() as c_int, cfnames.as_ptr(), cfopts.as_ptr(), cfhandles.as_mut_ptr(), ttls.as_ptr(), )) } } }; Ok(db) } /// Removes the database entries in the range `["from", "to")` using given write options. pub fn delete_range_cf_opt>( &self, cf: &impl AsColumnFamilyRef, from: K, to: K, writeopts: &WriteOptions, ) -> Result<(), Error> { let from = from.as_ref(); let to = to.as_ref(); unsafe { ffi_try!(ffi::rocksdb_delete_range_cf( self.inner.inner(), writeopts.inner, cf.inner(), from.as_ptr() as *const c_char, from.len() as size_t, to.as_ptr() as *const c_char, to.len() as size_t, )); Ok(()) } } /// Removes the database entries in the range `["from", "to")` using default write options. pub fn delete_range_cf>( &self, cf: &impl AsColumnFamilyRef, from: K, to: K, ) -> Result<(), Error> { self.delete_range_cf_opt(cf, from, to, &WriteOptions::default()) } pub fn write_opt(&self, batch: WriteBatch, writeopts: &WriteOptions) -> Result<(), Error> { unsafe { ffi_try!(ffi::rocksdb_write( self.inner.inner(), writeopts.inner, batch.inner )); } Ok(()) } pub fn write(&self, batch: WriteBatch) -> Result<(), Error> { self.write_opt(batch, &WriteOptions::default()) } pub fn write_without_wal(&self, batch: WriteBatch) -> Result<(), Error> { let mut wo = WriteOptions::new(); wo.disable_wal(true); self.write_opt(batch, &wo) } } /// Common methods of `DBWithThreadMode` and `OptimisticTransactionDB`. impl DBCommon { pub(crate) fn new(inner: D, cfs: T, path: PathBuf, outlive: Vec) -> Self { Self { inner, cfs, path, _outlive: outlive, } } pub fn list_cf>(opts: &Options, path: P) -> Result, Error> { let cpath = to_cpath(path)?; let mut length = 0; unsafe { let ptr = ffi_try!(ffi::rocksdb_list_column_families( opts.inner, cpath.as_ptr(), &mut length, )); let vec = slice::from_raw_parts(ptr, length) .iter() .map(|ptr| CStr::from_ptr(*ptr).to_string_lossy().into_owned()) .collect(); ffi::rocksdb_list_column_families_destroy(ptr, length); Ok(vec) } } pub fn destroy>(opts: &Options, path: P) -> Result<(), Error> { let cpath = to_cpath(path)?; unsafe { ffi_try!(ffi::rocksdb_destroy_db(opts.inner, cpath.as_ptr())); } Ok(()) } pub fn repair>(opts: &Options, path: P) -> Result<(), Error> { let cpath = to_cpath(path)?; unsafe { ffi_try!(ffi::rocksdb_repair_db(opts.inner, cpath.as_ptr())); } Ok(()) } pub fn path(&self) -> &Path { self.path.as_path() } /// Flushes the WAL buffer. If `sync` is set to `true`, also syncs /// the data to disk. pub fn flush_wal(&self, sync: bool) -> Result<(), Error> { unsafe { ffi_try!(ffi::rocksdb_flush_wal( self.inner.inner(), c_uchar::from(sync) )); } Ok(()) } /// Flushes database memtables to SST files on the disk. pub fn flush_opt(&self, flushopts: &FlushOptions) -> Result<(), Error> { unsafe { ffi_try!(ffi::rocksdb_flush(self.inner.inner(), flushopts.inner)); } Ok(()) } /// Flushes database memtables to SST files on the disk using default options. pub fn flush(&self) -> Result<(), Error> { self.flush_opt(&FlushOptions::default()) } /// Flushes database memtables to SST files on the disk for a given column family. pub fn flush_cf_opt( &self, cf: &impl AsColumnFamilyRef, flushopts: &FlushOptions, ) -> Result<(), Error> { unsafe { ffi_try!(ffi::rocksdb_flush_cf( self.inner.inner(), flushopts.inner, cf.inner() )); } Ok(()) } /// Flushes multiple column families. /// /// If atomic flush is not enabled, it is equivalent to calling flush_cf multiple times. /// If atomic flush is enabled, it will flush all column families specified in `cfs` up to the latest sequence /// number at the time when flush is requested. pub fn flush_cfs_opt( &self, cfs: &[&impl AsColumnFamilyRef], opts: &FlushOptions, ) -> Result<(), Error> { let mut cfs = cfs.iter().map(|cf| cf.inner()).collect::>(); unsafe { ffi_try!(ffi::rocksdb_flush_cfs( self.inner.inner(), opts.inner, cfs.as_mut_ptr(), cfs.len() as libc::c_int, )); } Ok(()) } /// Flushes database memtables to SST files on the disk for a given column family using default /// options. pub fn flush_cf(&self, cf: &impl AsColumnFamilyRef) -> Result<(), Error> { self.flush_cf_opt(cf, &FlushOptions::default()) } /// Return the bytes associated with a key value with read options. If you only intend to use /// the vector returned temporarily, consider using [`get_pinned_opt`](#method.get_pinned_opt) /// to avoid unnecessary memory copy. pub fn get_opt>( &self, key: K, readopts: &ReadOptions, ) -> Result>, Error> { self.get_pinned_opt(key, readopts) .map(|x| x.map(|v| v.as_ref().to_vec())) } /// Return the bytes associated with a key value. If you only intend to use the vector returned /// temporarily, consider using [`get_pinned`](#method.get_pinned) to avoid unnecessary memory /// copy. pub fn get>(&self, key: K) -> Result>, Error> { self.get_opt(key.as_ref(), &ReadOptions::default()) } /// Return the bytes associated with a key value and the given column family with read options. /// If you only intend to use the vector returned temporarily, consider using /// [`get_pinned_cf_opt`](#method.get_pinned_cf_opt) to avoid unnecessary memory. pub fn get_cf_opt>( &self, cf: &impl AsColumnFamilyRef, key: K, readopts: &ReadOptions, ) -> Result>, Error> { self.get_pinned_cf_opt(cf, key, readopts) .map(|x| x.map(|v| v.as_ref().to_vec())) } /// Return the bytes associated with a key value and the given column family. If you only /// intend to use the vector returned temporarily, consider using /// [`get_pinned_cf`](#method.get_pinned_cf) to avoid unnecessary memory. pub fn get_cf>( &self, cf: &impl AsColumnFamilyRef, key: K, ) -> Result>, Error> { self.get_cf_opt(cf, key.as_ref(), &ReadOptions::default()) } /// Return the value associated with a key using RocksDB's PinnableSlice /// so as to avoid unnecessary memory copy. pub fn get_pinned_opt>( &self, key: K, readopts: &ReadOptions, ) -> Result, Error> { if readopts.inner.is_null() { return Err(Error::new( "Unable to create RocksDB read options. This is a fairly trivial call, and its \ failure may be indicative of a mis-compiled or mis-loaded RocksDB library." .to_owned(), )); } let key = key.as_ref(); unsafe { let val = ffi_try!(ffi::rocksdb_get_pinned( self.inner.inner(), readopts.inner, key.as_ptr() as *const c_char, key.len() as size_t, )); if val.is_null() { Ok(None) } else { Ok(Some(DBPinnableSlice::from_c(val))) } } } /// Return the value associated with a key using RocksDB's PinnableSlice /// so as to avoid unnecessary memory copy. Similar to get_pinned_opt but /// leverages default options. pub fn get_pinned>(&self, key: K) -> Result, Error> { self.get_pinned_opt(key, &ReadOptions::default()) } /// Return the value associated with a key using RocksDB's PinnableSlice /// so as to avoid unnecessary memory copy. Similar to get_pinned_opt but /// allows specifying ColumnFamily pub fn get_pinned_cf_opt>( &self, cf: &impl AsColumnFamilyRef, key: K, readopts: &ReadOptions, ) -> Result, Error> { if readopts.inner.is_null() { return Err(Error::new( "Unable to create RocksDB read options. This is a fairly trivial call, and its \ failure may be indicative of a mis-compiled or mis-loaded RocksDB library." .to_owned(), )); } let key = key.as_ref(); unsafe { let val = ffi_try!(ffi::rocksdb_get_pinned_cf( self.inner.inner(), readopts.inner, cf.inner(), key.as_ptr() as *const c_char, key.len() as size_t, )); if val.is_null() { Ok(None) } else { Ok(Some(DBPinnableSlice::from_c(val))) } } } /// Return the value associated with a key using RocksDB's PinnableSlice /// so as to avoid unnecessary memory copy. Similar to get_pinned_cf_opt but /// leverages default options. pub fn get_pinned_cf>( &self, cf: &impl AsColumnFamilyRef, key: K, ) -> Result, Error> { self.get_pinned_cf_opt(cf, key, &ReadOptions::default()) } /// Return the values associated with the given keys. pub fn multi_get(&self, keys: I) -> Vec>, Error>> where K: AsRef<[u8]>, I: IntoIterator, { self.multi_get_opt(keys, &ReadOptions::default()) } /// Return the values associated with the given keys using read options. pub fn multi_get_opt( &self, keys: I, readopts: &ReadOptions, ) -> Vec>, Error>> where K: AsRef<[u8]>, I: IntoIterator, { let (keys, keys_sizes): (Vec>, Vec<_>) = keys .into_iter() .map(|k| { let k = k.as_ref(); (Box::from(k), k.len()) }) .unzip(); let ptr_keys: Vec<_> = keys.iter().map(|k| k.as_ptr() as *const c_char).collect(); let mut values = vec![ptr::null_mut(); keys.len()]; let mut values_sizes = vec![0_usize; keys.len()]; let mut errors = vec![ptr::null_mut(); keys.len()]; unsafe { ffi::rocksdb_multi_get( self.inner.inner(), readopts.inner, ptr_keys.len(), ptr_keys.as_ptr(), keys_sizes.as_ptr(), values.as_mut_ptr(), values_sizes.as_mut_ptr(), errors.as_mut_ptr(), ); } convert_values(values, values_sizes, errors) } /// Return the values associated with the given keys and column families. pub fn multi_get_cf<'a, 'b: 'a, K, I, W>( &'a self, keys: I, ) -> Vec>, Error>> where K: AsRef<[u8]>, I: IntoIterator, W: 'b + AsColumnFamilyRef, { self.multi_get_cf_opt(keys, &ReadOptions::default()) } /// Return the values associated with the given keys and column families using read options. pub fn multi_get_cf_opt<'a, 'b: 'a, K, I, W>( &'a self, keys: I, readopts: &ReadOptions, ) -> Vec>, Error>> where K: AsRef<[u8]>, I: IntoIterator, W: 'b + AsColumnFamilyRef, { let (cfs_and_keys, keys_sizes): (Vec<(_, Box<[u8]>)>, Vec<_>) = keys .into_iter() .map(|(cf, key)| { let key = key.as_ref(); ((cf, Box::from(key)), key.len()) }) .unzip(); let ptr_keys: Vec<_> = cfs_and_keys .iter() .map(|(_, k)| k.as_ptr() as *const c_char) .collect(); let ptr_cfs: Vec<_> = cfs_and_keys .iter() .map(|(c, _)| c.inner().cast_const()) .collect(); let mut values = vec![ptr::null_mut(); ptr_keys.len()]; let mut values_sizes = vec![0_usize; ptr_keys.len()]; let mut errors = vec![ptr::null_mut(); ptr_keys.len()]; unsafe { ffi::rocksdb_multi_get_cf( self.inner.inner(), readopts.inner, ptr_cfs.as_ptr(), ptr_keys.len(), ptr_keys.as_ptr(), keys_sizes.as_ptr(), values.as_mut_ptr(), values_sizes.as_mut_ptr(), errors.as_mut_ptr(), ); } convert_values(values, values_sizes, errors) } /// Return the values associated with the given keys and the specified column family /// where internally the read requests are processed in batch if block-based table /// SST format is used. It is a more optimized version of multi_get_cf. pub fn batched_multi_get_cf<'a, K, I>( &self, cf: &impl AsColumnFamilyRef, keys: I, sorted_input: bool, ) -> Vec, Error>> where K: AsRef<[u8]> + 'a + ?Sized, I: IntoIterator, { self.batched_multi_get_cf_opt(cf, keys, sorted_input, &ReadOptions::default()) } /// Return the values associated with the given keys and the specified column family /// where internally the read requests are processed in batch if block-based table /// SST format is used. It is a more optimized version of multi_get_cf_opt. pub fn batched_multi_get_cf_opt<'a, K, I>( &self, cf: &impl AsColumnFamilyRef, keys: I, sorted_input: bool, readopts: &ReadOptions, ) -> Vec, Error>> where K: AsRef<[u8]> + 'a + ?Sized, I: IntoIterator, { let (ptr_keys, keys_sizes): (Vec<_>, Vec<_>) = keys .into_iter() .map(|k| { let k = k.as_ref(); (k.as_ptr() as *const c_char, k.len()) }) .unzip(); let mut pinned_values = vec![ptr::null_mut(); ptr_keys.len()]; let mut errors = vec![ptr::null_mut(); ptr_keys.len()]; unsafe { ffi::rocksdb_batched_multi_get_cf( self.inner.inner(), readopts.inner, cf.inner(), ptr_keys.len(), ptr_keys.as_ptr(), keys_sizes.as_ptr(), pinned_values.as_mut_ptr(), errors.as_mut_ptr(), sorted_input, ); pinned_values .into_iter() .zip(errors) .map(|(v, e)| { if e.is_null() { if v.is_null() { Ok(None) } else { Ok(Some(DBPinnableSlice::from_c(v))) } } else { Err(Error::new(crate::ffi_util::error_message(e))) } }) .collect() } } /// Returns `false` if the given key definitely doesn't exist in the database, otherwise returns /// `true`. This function uses default `ReadOptions`. pub fn key_may_exist>(&self, key: K) -> bool { self.key_may_exist_opt(key, &ReadOptions::default()) } /// Returns `false` if the given key definitely doesn't exist in the database, otherwise returns /// `true`. pub fn key_may_exist_opt>(&self, key: K, readopts: &ReadOptions) -> bool { let key = key.as_ref(); unsafe { 0 != ffi::rocksdb_key_may_exist( self.inner.inner(), readopts.inner, key.as_ptr() as *const c_char, key.len() as size_t, ptr::null_mut(), /*value*/ ptr::null_mut(), /*val_len*/ ptr::null(), /*timestamp*/ 0, /*timestamp_len*/ ptr::null_mut(), /*value_found*/ ) } } /// Returns `false` if the given key definitely doesn't exist in the specified column family, /// otherwise returns `true`. This function uses default `ReadOptions`. pub fn key_may_exist_cf>(&self, cf: &impl AsColumnFamilyRef, key: K) -> bool { self.key_may_exist_cf_opt(cf, key, &ReadOptions::default()) } /// Returns `false` if the given key definitely doesn't exist in the specified column family, /// otherwise returns `true`. pub fn key_may_exist_cf_opt>( &self, cf: &impl AsColumnFamilyRef, key: K, readopts: &ReadOptions, ) -> bool { let key = key.as_ref(); 0 != unsafe { ffi::rocksdb_key_may_exist_cf( self.inner.inner(), readopts.inner, cf.inner(), key.as_ptr() as *const c_char, key.len() as size_t, ptr::null_mut(), /*value*/ ptr::null_mut(), /*val_len*/ ptr::null(), /*timestamp*/ 0, /*timestamp_len*/ ptr::null_mut(), /*value_found*/ ) } } /// If the key definitely does not exist in the database, then this method /// returns `(false, None)`, else `(true, None)` if it may. /// If the key is found in memory, then it returns `(true, Some)`. /// /// This check is potentially lighter-weight than calling `get()`. One way /// to make this lighter weight is to avoid doing any IOs. pub fn key_may_exist_cf_opt_value>( &self, cf: &impl AsColumnFamilyRef, key: K, readopts: &ReadOptions, ) -> (bool, Option) { let key = key.as_ref(); let mut val: *mut c_char = ptr::null_mut(); let mut val_len: usize = 0; let mut value_found: c_uchar = 0; let may_exists = 0 != unsafe { ffi::rocksdb_key_may_exist_cf( self.inner.inner(), readopts.inner, cf.inner(), key.as_ptr() as *const c_char, key.len() as size_t, &mut val, /*value*/ &mut val_len, /*val_len*/ ptr::null(), /*timestamp*/ 0, /*timestamp_len*/ &mut value_found, /*value_found*/ ) }; // The value is only allocated (using malloc) and returned if it is found and // value_found isn't NULL. In that case the user is responsible for freeing it. if may_exists && value_found != 0 { ( may_exists, Some(unsafe { CSlice::from_raw_parts(val, val_len) }), ) } else { (may_exists, None) } } fn create_inner_cf_handle( &self, name: impl CStrLike, opts: &Options, ) -> Result<*mut ffi::rocksdb_column_family_handle_t, Error> { let cf_name = name.bake().map_err(|err| { Error::new(format!( "Failed to convert path to CString when creating cf: {err}" )) })?; Ok(unsafe { ffi_try!(ffi::rocksdb_create_column_family( self.inner.inner(), opts.inner, cf_name.as_ptr(), )) }) } pub fn iterator<'a: 'b, 'b>( &'a self, mode: IteratorMode, ) -> DBIteratorWithThreadMode<'b, Self> { let readopts = ReadOptions::default(); self.iterator_opt(mode, readopts) } pub fn iterator_opt<'a: 'b, 'b>( &'a self, mode: IteratorMode, readopts: ReadOptions, ) -> DBIteratorWithThreadMode<'b, Self> { DBIteratorWithThreadMode::new(self, readopts, mode) } /// Opens an iterator using the provided ReadOptions. /// This is used when you want to iterate over a specific ColumnFamily with a modified ReadOptions pub fn iterator_cf_opt<'a: 'b, 'b>( &'a self, cf_handle: &impl AsColumnFamilyRef, readopts: ReadOptions, mode: IteratorMode, ) -> DBIteratorWithThreadMode<'b, Self> { DBIteratorWithThreadMode::new_cf(self, cf_handle.inner(), readopts, mode) } /// Opens an iterator with `set_total_order_seek` enabled. /// This must be used to iterate across prefixes when `set_memtable_factory` has been called /// with a Hash-based implementation. pub fn full_iterator<'a: 'b, 'b>( &'a self, mode: IteratorMode, ) -> DBIteratorWithThreadMode<'b, Self> { let mut opts = ReadOptions::default(); opts.set_total_order_seek(true); DBIteratorWithThreadMode::new(self, opts, mode) } pub fn prefix_iterator<'a: 'b, 'b, P: AsRef<[u8]>>( &'a self, prefix: P, ) -> DBIteratorWithThreadMode<'b, Self> { let mut opts = ReadOptions::default(); opts.set_prefix_same_as_start(true); DBIteratorWithThreadMode::new( self, opts, IteratorMode::From(prefix.as_ref(), Direction::Forward), ) } pub fn iterator_cf<'a: 'b, 'b>( &'a self, cf_handle: &impl AsColumnFamilyRef, mode: IteratorMode, ) -> DBIteratorWithThreadMode<'b, Self> { let opts = ReadOptions::default(); DBIteratorWithThreadMode::new_cf(self, cf_handle.inner(), opts, mode) } pub fn full_iterator_cf<'a: 'b, 'b>( &'a self, cf_handle: &impl AsColumnFamilyRef, mode: IteratorMode, ) -> DBIteratorWithThreadMode<'b, Self> { let mut opts = ReadOptions::default(); opts.set_total_order_seek(true); DBIteratorWithThreadMode::new_cf(self, cf_handle.inner(), opts, mode) } pub fn prefix_iterator_cf<'a, P: AsRef<[u8]>>( &'a self, cf_handle: &impl AsColumnFamilyRef, prefix: P, ) -> DBIteratorWithThreadMode<'a, Self> { let mut opts = ReadOptions::default(); opts.set_prefix_same_as_start(true); DBIteratorWithThreadMode::<'a, Self>::new_cf( self, cf_handle.inner(), opts, IteratorMode::From(prefix.as_ref(), Direction::Forward), ) } /// Opens a raw iterator over the database, using the default read options pub fn raw_iterator<'a: 'b, 'b>(&'a self) -> DBRawIteratorWithThreadMode<'b, Self> { let opts = ReadOptions::default(); DBRawIteratorWithThreadMode::new(self, opts) } /// Opens a raw iterator over the given column family, using the default read options pub fn raw_iterator_cf<'a: 'b, 'b>( &'a self, cf_handle: &impl AsColumnFamilyRef, ) -> DBRawIteratorWithThreadMode<'b, Self> { let opts = ReadOptions::default(); DBRawIteratorWithThreadMode::new_cf(self, cf_handle.inner(), opts) } /// Opens a raw iterator over the database, using the given read options pub fn raw_iterator_opt<'a: 'b, 'b>( &'a self, readopts: ReadOptions, ) -> DBRawIteratorWithThreadMode<'b, Self> { DBRawIteratorWithThreadMode::new(self, readopts) } /// Opens a raw iterator over the given column family, using the given read options pub fn raw_iterator_cf_opt<'a: 'b, 'b>( &'a self, cf_handle: &impl AsColumnFamilyRef, readopts: ReadOptions, ) -> DBRawIteratorWithThreadMode<'b, Self> { DBRawIteratorWithThreadMode::new_cf(self, cf_handle.inner(), readopts) } pub fn snapshot(&self) -> SnapshotWithThreadMode { SnapshotWithThreadMode::::new(self) } pub fn put_opt(&self, key: K, value: V, writeopts: &WriteOptions) -> Result<(), Error> where K: AsRef<[u8]>, V: AsRef<[u8]>, { let key = key.as_ref(); let value = value.as_ref(); unsafe { ffi_try!(ffi::rocksdb_put( self.inner.inner(), writeopts.inner, key.as_ptr() as *const c_char, key.len() as size_t, value.as_ptr() as *const c_char, value.len() as size_t, )); Ok(()) } } pub fn put_cf_opt( &self, cf: &impl AsColumnFamilyRef, key: K, value: V, writeopts: &WriteOptions, ) -> Result<(), Error> where K: AsRef<[u8]>, V: AsRef<[u8]>, { let key = key.as_ref(); let value = value.as_ref(); unsafe { ffi_try!(ffi::rocksdb_put_cf( self.inner.inner(), writeopts.inner, cf.inner(), key.as_ptr() as *const c_char, key.len() as size_t, value.as_ptr() as *const c_char, value.len() as size_t, )); Ok(()) } } /// Set the database entry for "key" to "value" with WriteOptions. /// If "key" already exists, it will coexist with previous entry. /// `Get` with a timestamp ts specified in ReadOptions will return /// the most recent key/value whose timestamp is smaller than or equal to ts. /// Takes an additional argument `ts` as the timestamp. /// Note: the DB must be opened with user defined timestamp enabled. pub fn put_with_ts_opt( &self, key: K, ts: S, value: V, writeopts: &WriteOptions, ) -> Result<(), Error> where K: AsRef<[u8]>, V: AsRef<[u8]>, S: AsRef<[u8]>, { let key = key.as_ref(); let value = value.as_ref(); let ts = ts.as_ref(); unsafe { ffi_try!(ffi::rocksdb_put_with_ts( self.inner.inner(), writeopts.inner, key.as_ptr() as *const c_char, key.len() as size_t, ts.as_ptr() as *const c_char, ts.len() as size_t, value.as_ptr() as *const c_char, value.len() as size_t, )); Ok(()) } } /// Put with timestamp in a specific column family with WriteOptions. /// If "key" already exists, it will coexist with previous entry. /// `Get` with a timestamp ts specified in ReadOptions will return /// the most recent key/value whose timestamp is smaller than or equal to ts. /// Takes an additional argument `ts` as the timestamp. /// Note: the DB must be opened with user defined timestamp enabled. pub fn put_cf_with_ts_opt( &self, cf: &impl AsColumnFamilyRef, key: K, ts: S, value: V, writeopts: &WriteOptions, ) -> Result<(), Error> where K: AsRef<[u8]>, V: AsRef<[u8]>, S: AsRef<[u8]>, { let key = key.as_ref(); let value = value.as_ref(); let ts = ts.as_ref(); unsafe { ffi_try!(ffi::rocksdb_put_cf_with_ts( self.inner.inner(), writeopts.inner, cf.inner(), key.as_ptr() as *const c_char, key.len() as size_t, ts.as_ptr() as *const c_char, ts.len() as size_t, value.as_ptr() as *const c_char, value.len() as size_t, )); Ok(()) } } pub fn merge_opt(&self, key: K, value: V, writeopts: &WriteOptions) -> Result<(), Error> where K: AsRef<[u8]>, V: AsRef<[u8]>, { let key = key.as_ref(); let value = value.as_ref(); unsafe { ffi_try!(ffi::rocksdb_merge( self.inner.inner(), writeopts.inner, key.as_ptr() as *const c_char, key.len() as size_t, value.as_ptr() as *const c_char, value.len() as size_t, )); Ok(()) } } pub fn merge_cf_opt( &self, cf: &impl AsColumnFamilyRef, key: K, value: V, writeopts: &WriteOptions, ) -> Result<(), Error> where K: AsRef<[u8]>, V: AsRef<[u8]>, { let key = key.as_ref(); let value = value.as_ref(); unsafe { ffi_try!(ffi::rocksdb_merge_cf( self.inner.inner(), writeopts.inner, cf.inner(), key.as_ptr() as *const c_char, key.len() as size_t, value.as_ptr() as *const c_char, value.len() as size_t, )); Ok(()) } } pub fn delete_opt>( &self, key: K, writeopts: &WriteOptions, ) -> Result<(), Error> { let key = key.as_ref(); unsafe { ffi_try!(ffi::rocksdb_delete( self.inner.inner(), writeopts.inner, key.as_ptr() as *const c_char, key.len() as size_t, )); Ok(()) } } pub fn delete_cf_opt>( &self, cf: &impl AsColumnFamilyRef, key: K, writeopts: &WriteOptions, ) -> Result<(), Error> { let key = key.as_ref(); unsafe { ffi_try!(ffi::rocksdb_delete_cf( self.inner.inner(), writeopts.inner, cf.inner(), key.as_ptr() as *const c_char, key.len() as size_t, )); Ok(()) } } /// Remove the database entry (if any) for "key" with WriteOptions. /// Takes an additional argument `ts` as the timestamp. /// Note: the DB must be opened with user defined timestamp enabled. pub fn delete_with_ts_opt( &self, key: K, ts: S, writeopts: &WriteOptions, ) -> Result<(), Error> where K: AsRef<[u8]>, S: AsRef<[u8]>, { let key = key.as_ref(); let ts = ts.as_ref(); unsafe { ffi_try!(ffi::rocksdb_delete_with_ts( self.inner.inner(), writeopts.inner, key.as_ptr() as *const c_char, key.len() as size_t, ts.as_ptr() as *const c_char, ts.len() as size_t, )); Ok(()) } } /// Delete with timestamp in a specific column family with WriteOptions. /// Takes an additional argument `ts` as the timestamp. /// Note: the DB must be opened with user defined timestamp enabled. pub fn delete_cf_with_ts_opt( &self, cf: &impl AsColumnFamilyRef, key: K, ts: S, writeopts: &WriteOptions, ) -> Result<(), Error> where K: AsRef<[u8]>, S: AsRef<[u8]>, { let key = key.as_ref(); let ts = ts.as_ref(); unsafe { ffi_try!(ffi::rocksdb_delete_cf_with_ts( self.inner.inner(), writeopts.inner, cf.inner(), key.as_ptr() as *const c_char, key.len() as size_t, ts.as_ptr() as *const c_char, ts.len() as size_t, )); Ok(()) } } pub fn put(&self, key: K, value: V) -> Result<(), Error> where K: AsRef<[u8]>, V: AsRef<[u8]>, { self.put_opt(key.as_ref(), value.as_ref(), &WriteOptions::default()) } pub fn put_cf(&self, cf: &impl AsColumnFamilyRef, key: K, value: V) -> Result<(), Error> where K: AsRef<[u8]>, V: AsRef<[u8]>, { self.put_cf_opt(cf, key.as_ref(), value.as_ref(), &WriteOptions::default()) } /// Set the database entry for "key" to "value". /// If "key" already exists, it will coexist with previous entry. /// `Get` with a timestamp ts specified in ReadOptions will return /// the most recent key/value whose timestamp is smaller than or equal to ts. /// Takes an additional argument `ts` as the timestamp. /// Note: the DB must be opened with user defined timestamp enabled. pub fn put_with_ts(&self, key: K, ts: S, value: V) -> Result<(), Error> where K: AsRef<[u8]>, V: AsRef<[u8]>, S: AsRef<[u8]>, { self.put_with_ts_opt( key.as_ref(), ts.as_ref(), value.as_ref(), &WriteOptions::default(), ) } /// Put with timestamp in a specific column family. /// If "key" already exists, it will coexist with previous entry. /// `Get` with a timestamp ts specified in ReadOptions will return /// the most recent key/value whose timestamp is smaller than or equal to ts. /// Takes an additional argument `ts` as the timestamp. /// Note: the DB must be opened with user defined timestamp enabled. pub fn put_cf_with_ts( &self, cf: &impl AsColumnFamilyRef, key: K, ts: S, value: V, ) -> Result<(), Error> where K: AsRef<[u8]>, V: AsRef<[u8]>, S: AsRef<[u8]>, { self.put_cf_with_ts_opt( cf, key.as_ref(), ts.as_ref(), value.as_ref(), &WriteOptions::default(), ) } pub fn merge(&self, key: K, value: V) -> Result<(), Error> where K: AsRef<[u8]>, V: AsRef<[u8]>, { self.merge_opt(key.as_ref(), value.as_ref(), &WriteOptions::default()) } pub fn merge_cf(&self, cf: &impl AsColumnFamilyRef, key: K, value: V) -> Result<(), Error> where K: AsRef<[u8]>, V: AsRef<[u8]>, { self.merge_cf_opt(cf, key.as_ref(), value.as_ref(), &WriteOptions::default()) } pub fn delete>(&self, key: K) -> Result<(), Error> { self.delete_opt(key.as_ref(), &WriteOptions::default()) } pub fn delete_cf>( &self, cf: &impl AsColumnFamilyRef, key: K, ) -> Result<(), Error> { self.delete_cf_opt(cf, key.as_ref(), &WriteOptions::default()) } /// Remove the database entry (if any) for "key". /// Takes an additional argument `ts` as the timestamp. /// Note: the DB must be opened with user defined timestamp enabled. pub fn delete_with_ts, S: AsRef<[u8]>>( &self, key: K, ts: S, ) -> Result<(), Error> { self.delete_with_ts_opt(key.as_ref(), ts.as_ref(), &WriteOptions::default()) } /// Delete with timestamp in a specific column family. /// Takes an additional argument `ts` as the timestamp. /// Note: the DB must be opened with user defined timestamp enabled. pub fn delete_cf_with_ts, S: AsRef<[u8]>>( &self, cf: &impl AsColumnFamilyRef, key: K, ts: S, ) -> Result<(), Error> { self.delete_cf_with_ts_opt(cf, key.as_ref(), ts.as_ref(), &WriteOptions::default()) } /// Runs a manual compaction on the Range of keys given. This is not likely to be needed for typical usage. pub fn compact_range, E: AsRef<[u8]>>(&self, start: Option, end: Option) { unsafe { let start = start.as_ref().map(AsRef::as_ref); let end = end.as_ref().map(AsRef::as_ref); ffi::rocksdb_compact_range( self.inner.inner(), opt_bytes_to_ptr(start), start.map_or(0, <[u8]>::len) as size_t, opt_bytes_to_ptr(end), end.map_or(0, <[u8]>::len) as size_t, ); } } /// Same as `compact_range` but with custom options. pub fn compact_range_opt, E: AsRef<[u8]>>( &self, start: Option, end: Option, opts: &CompactOptions, ) { unsafe { let start = start.as_ref().map(AsRef::as_ref); let end = end.as_ref().map(AsRef::as_ref); ffi::rocksdb_compact_range_opt( self.inner.inner(), opts.inner, opt_bytes_to_ptr(start), start.map_or(0, <[u8]>::len) as size_t, opt_bytes_to_ptr(end), end.map_or(0, <[u8]>::len) as size_t, ); } } /// Runs a manual compaction on the Range of keys given on the /// given column family. This is not likely to be needed for typical usage. pub fn compact_range_cf, E: AsRef<[u8]>>( &self, cf: &impl AsColumnFamilyRef, start: Option, end: Option, ) { unsafe { let start = start.as_ref().map(AsRef::as_ref); let end = end.as_ref().map(AsRef::as_ref); ffi::rocksdb_compact_range_cf( self.inner.inner(), cf.inner(), opt_bytes_to_ptr(start), start.map_or(0, <[u8]>::len) as size_t, opt_bytes_to_ptr(end), end.map_or(0, <[u8]>::len) as size_t, ); } } /// Same as `compact_range_cf` but with custom options. pub fn compact_range_cf_opt, E: AsRef<[u8]>>( &self, cf: &impl AsColumnFamilyRef, start: Option, end: Option, opts: &CompactOptions, ) { unsafe { let start = start.as_ref().map(AsRef::as_ref); let end = end.as_ref().map(AsRef::as_ref); ffi::rocksdb_compact_range_cf_opt( self.inner.inner(), cf.inner(), opts.inner, opt_bytes_to_ptr(start), start.map_or(0, <[u8]>::len) as size_t, opt_bytes_to_ptr(end), end.map_or(0, <[u8]>::len) as size_t, ); } } /// Wait for all flush and compactions jobs to finish. Jobs to wait include the /// unscheduled (queued, but not scheduled yet). /// /// NOTE: This may also never return if there's sufficient ongoing writes that /// keeps flush and compaction going without stopping. The user would have to /// cease all the writes to DB to make this eventually return in a stable /// state. The user may also use timeout option in WaitForCompactOptions to /// make this stop waiting and return when timeout expires. pub fn wait_for_compact(&self, opts: &WaitForCompactOptions) -> Result<(), Error> { unsafe { ffi_try!(ffi::rocksdb_wait_for_compact( self.inner.inner(), opts.inner )); } Ok(()) } pub fn set_options(&self, opts: &[(&str, &str)]) -> Result<(), Error> { let copts = convert_options(opts)?; let cnames: Vec<*const c_char> = copts.iter().map(|opt| opt.0.as_ptr()).collect(); let cvalues: Vec<*const c_char> = copts.iter().map(|opt| opt.1.as_ptr()).collect(); let count = opts.len() as i32; unsafe { ffi_try!(ffi::rocksdb_set_options( self.inner.inner(), count, cnames.as_ptr(), cvalues.as_ptr(), )); } Ok(()) } pub fn set_options_cf( &self, cf: &impl AsColumnFamilyRef, opts: &[(&str, &str)], ) -> Result<(), Error> { let copts = convert_options(opts)?; let cnames: Vec<*const c_char> = copts.iter().map(|opt| opt.0.as_ptr()).collect(); let cvalues: Vec<*const c_char> = copts.iter().map(|opt| opt.1.as_ptr()).collect(); let count = opts.len() as i32; unsafe { ffi_try!(ffi::rocksdb_set_options_cf( self.inner.inner(), cf.inner(), count, cnames.as_ptr(), cvalues.as_ptr(), )); } Ok(()) } /// Implementation for property_value et al methods. /// /// `name` is the name of the property. It will be converted into a CString /// and passed to `get_property` as argument. `get_property` reads the /// specified property and either returns NULL or a pointer to a C allocated /// string; this method takes ownership of that string and will free it at /// the end. That string is parsed using `parse` callback which produces /// the returned result. fn property_value_impl( name: impl CStrLike, get_property: impl FnOnce(*const c_char) -> *mut c_char, parse: impl FnOnce(&str) -> Result, ) -> Result, Error> { let value = match name.bake() { Ok(prop_name) => get_property(prop_name.as_ptr()), Err(e) => { return Err(Error::new(format!( "Failed to convert property name to CString: {e}" ))); } }; if value.is_null() { return Ok(None); } let result = match unsafe { CStr::from_ptr(value) }.to_str() { Ok(s) => parse(s).map(|value| Some(value)), Err(e) => Err(Error::new(format!( "Failed to convert property value to string: {e}" ))), }; unsafe { ffi::rocksdb_free(value as *mut c_void); } result } /// Retrieves a RocksDB property by name. /// /// Full list of properties could be find /// [here](https://github.com/facebook/rocksdb/blob/08809f5e6cd9cc4bc3958dd4d59457ae78c76660/include/rocksdb/db.h#L428-L634). pub fn property_value(&self, name: impl CStrLike) -> Result, Error> { Self::property_value_impl( name, |prop_name| unsafe { ffi::rocksdb_property_value(self.inner.inner(), prop_name) }, |str_value| Ok(str_value.to_owned()), ) } /// Retrieves a RocksDB property by name, for a specific column family. /// /// Full list of properties could be find /// [here](https://github.com/facebook/rocksdb/blob/08809f5e6cd9cc4bc3958dd4d59457ae78c76660/include/rocksdb/db.h#L428-L634). pub fn property_value_cf( &self, cf: &impl AsColumnFamilyRef, name: impl CStrLike, ) -> Result, Error> { Self::property_value_impl( name, |prop_name| unsafe { ffi::rocksdb_property_value_cf(self.inner.inner(), cf.inner(), prop_name) }, |str_value| Ok(str_value.to_owned()), ) } fn parse_property_int_value(value: &str) -> Result { value.parse::().map_err(|err| { Error::new(format!( "Failed to convert property value {value} to int: {err}" )) }) } /// Retrieves a RocksDB property and casts it to an integer. /// /// Full list of properties that return int values could be find /// [here](https://github.com/facebook/rocksdb/blob/08809f5e6cd9cc4bc3958dd4d59457ae78c76660/include/rocksdb/db.h#L654-L689). pub fn property_int_value(&self, name: impl CStrLike) -> Result, Error> { Self::property_value_impl( name, |prop_name| unsafe { ffi::rocksdb_property_value(self.inner.inner(), prop_name) }, Self::parse_property_int_value, ) } /// Retrieves a RocksDB property for a specific column family and casts it to an integer. /// /// Full list of properties that return int values could be find /// [here](https://github.com/facebook/rocksdb/blob/08809f5e6cd9cc4bc3958dd4d59457ae78c76660/include/rocksdb/db.h#L654-L689). pub fn property_int_value_cf( &self, cf: &impl AsColumnFamilyRef, name: impl CStrLike, ) -> Result, Error> { Self::property_value_impl( name, |prop_name| unsafe { ffi::rocksdb_property_value_cf(self.inner.inner(), cf.inner(), prop_name) }, Self::parse_property_int_value, ) } /// The sequence number of the most recent transaction. pub fn latest_sequence_number(&self) -> u64 { unsafe { ffi::rocksdb_get_latest_sequence_number(self.inner.inner()) } } /// Iterate over batches of write operations since a given sequence. /// /// Produce an iterator that will provide the batches of write operations /// that have occurred since the given sequence (see /// `latest_sequence_number()`). Use the provided iterator to retrieve each /// (`u64`, `WriteBatch`) tuple, and then gather the individual puts and /// deletes using the `WriteBatch::iterate()` function. /// /// Calling `get_updates_since()` with a sequence number that is out of /// bounds will return an error. pub fn get_updates_since(&self, seq_number: u64) -> Result { unsafe { // rocksdb_wal_readoptions_t does not appear to have any functions // for creating and destroying it; fortunately we can pass a nullptr // here to get the default behavior let opts: *const ffi::rocksdb_wal_readoptions_t = ptr::null(); let iter = ffi_try!(ffi::rocksdb_get_updates_since( self.inner.inner(), seq_number, opts )); Ok(DBWALIterator { inner: iter, start_seq_number: seq_number, }) } } /// Tries to catch up with the primary by reading as much as possible from the /// log files. pub fn try_catch_up_with_primary(&self) -> Result<(), Error> { unsafe { ffi_try!(ffi::rocksdb_try_catch_up_with_primary(self.inner.inner())); } Ok(()) } /// Loads a list of external SST files created with SstFileWriter into the DB with default opts pub fn ingest_external_file>(&self, paths: Vec

) -> Result<(), Error> { let opts = IngestExternalFileOptions::default(); self.ingest_external_file_opts(&opts, paths) } /// Loads a list of external SST files created with SstFileWriter into the DB pub fn ingest_external_file_opts>( &self, opts: &IngestExternalFileOptions, paths: Vec

, ) -> Result<(), Error> { let paths_v: Vec = paths.iter().map(to_cpath).collect::, _>>()?; let cpaths: Vec<_> = paths_v.iter().map(|path| path.as_ptr()).collect(); self.ingest_external_file_raw(opts, &paths_v, &cpaths) } /// Loads a list of external SST files created with SstFileWriter into the DB for given Column Family /// with default opts pub fn ingest_external_file_cf>( &self, cf: &impl AsColumnFamilyRef, paths: Vec

, ) -> Result<(), Error> { let opts = IngestExternalFileOptions::default(); self.ingest_external_file_cf_opts(cf, &opts, paths) } /// Loads a list of external SST files created with SstFileWriter into the DB for given Column Family pub fn ingest_external_file_cf_opts>( &self, cf: &impl AsColumnFamilyRef, opts: &IngestExternalFileOptions, paths: Vec

, ) -> Result<(), Error> { let paths_v: Vec = paths.iter().map(to_cpath).collect::, _>>()?; let cpaths: Vec<_> = paths_v.iter().map(|path| path.as_ptr()).collect(); self.ingest_external_file_raw_cf(cf, opts, &paths_v, &cpaths) } fn ingest_external_file_raw( &self, opts: &IngestExternalFileOptions, paths_v: &[CString], cpaths: &[*const c_char], ) -> Result<(), Error> { unsafe { ffi_try!(ffi::rocksdb_ingest_external_file( self.inner.inner(), cpaths.as_ptr(), paths_v.len(), opts.inner.cast_const() )); Ok(()) } } fn ingest_external_file_raw_cf( &self, cf: &impl AsColumnFamilyRef, opts: &IngestExternalFileOptions, paths_v: &[CString], cpaths: &[*const c_char], ) -> Result<(), Error> { unsafe { ffi_try!(ffi::rocksdb_ingest_external_file_cf( self.inner.inner(), cf.inner(), cpaths.as_ptr(), paths_v.len(), opts.inner.cast_const() )); Ok(()) } } /// Obtains the LSM-tree meta data of the default column family of the DB pub fn get_column_family_metadata(&self) -> ColumnFamilyMetaData { unsafe { let ptr = ffi::rocksdb_get_column_family_metadata(self.inner.inner()); let metadata = ColumnFamilyMetaData { size: ffi::rocksdb_column_family_metadata_get_size(ptr), name: from_cstr(ffi::rocksdb_column_family_metadata_get_name(ptr)), file_count: ffi::rocksdb_column_family_metadata_get_file_count(ptr), }; // destroy ffi::rocksdb_column_family_metadata_destroy(ptr); // return metadata } } /// Obtains the LSM-tree meta data of the specified column family of the DB pub fn get_column_family_metadata_cf( &self, cf: &impl AsColumnFamilyRef, ) -> ColumnFamilyMetaData { unsafe { let ptr = ffi::rocksdb_get_column_family_metadata_cf(self.inner.inner(), cf.inner()); let metadata = ColumnFamilyMetaData { size: ffi::rocksdb_column_family_metadata_get_size(ptr), name: from_cstr(ffi::rocksdb_column_family_metadata_get_name(ptr)), file_count: ffi::rocksdb_column_family_metadata_get_file_count(ptr), }; // destroy ffi::rocksdb_column_family_metadata_destroy(ptr); // return metadata } } /// Returns a list of all table files with their level, start key /// and end key pub fn live_files(&self) -> Result, Error> { unsafe { let files = ffi::rocksdb_livefiles(self.inner.inner()); if files.is_null() { Err(Error::new("Could not get live files".to_owned())) } else { let n = ffi::rocksdb_livefiles_count(files); let mut livefiles = Vec::with_capacity(n as usize); let mut key_size: usize = 0; for i in 0..n { let column_family_name = from_cstr(ffi::rocksdb_livefiles_column_family_name(files, i)); let name = from_cstr(ffi::rocksdb_livefiles_name(files, i)); let size = ffi::rocksdb_livefiles_size(files, i); let level = ffi::rocksdb_livefiles_level(files, i); // get smallest key inside file let smallest_key = ffi::rocksdb_livefiles_smallestkey(files, i, &mut key_size); let smallest_key = raw_data(smallest_key, key_size); // get largest key inside file let largest_key = ffi::rocksdb_livefiles_largestkey(files, i, &mut key_size); let largest_key = raw_data(largest_key, key_size); livefiles.push(LiveFile { column_family_name, name, size, level, start_key: smallest_key, end_key: largest_key, num_entries: ffi::rocksdb_livefiles_entries(files, i), num_deletions: ffi::rocksdb_livefiles_deletions(files, i), }); } // destroy livefiles metadata(s) ffi::rocksdb_livefiles_destroy(files); // return Ok(livefiles) } } } /// Delete sst files whose keys are entirely in the given range. /// /// Could leave some keys in the range which are in files which are not /// entirely in the range. /// /// Note: L0 files are left regardless of whether they're in the range. /// /// SnapshotWithThreadModes before the delete might not see the data in the given range. pub fn delete_file_in_range>(&self, from: K, to: K) -> Result<(), Error> { let from = from.as_ref(); let to = to.as_ref(); unsafe { ffi_try!(ffi::rocksdb_delete_file_in_range( self.inner.inner(), from.as_ptr() as *const c_char, from.len() as size_t, to.as_ptr() as *const c_char, to.len() as size_t, )); Ok(()) } } /// Same as `delete_file_in_range` but only for specific column family pub fn delete_file_in_range_cf>( &self, cf: &impl AsColumnFamilyRef, from: K, to: K, ) -> Result<(), Error> { let from = from.as_ref(); let to = to.as_ref(); unsafe { ffi_try!(ffi::rocksdb_delete_file_in_range_cf( self.inner.inner(), cf.inner(), from.as_ptr() as *const c_char, from.len() as size_t, to.as_ptr() as *const c_char, to.len() as size_t, )); Ok(()) } } /// Request stopping background work, if wait is true wait until it's done. pub fn cancel_all_background_work(&self, wait: bool) { unsafe { ffi::rocksdb_cancel_all_background_work(self.inner.inner(), c_uchar::from(wait)); } } fn drop_column_family( &self, cf_inner: *mut ffi::rocksdb_column_family_handle_t, cf: C, ) -> Result<(), Error> { unsafe { // first mark the column family as dropped ffi_try!(ffi::rocksdb_drop_column_family( self.inner.inner(), cf_inner )); } // then finally reclaim any resources (mem, files) by destroying the only single column // family handle by drop()-ing it drop(cf); Ok(()) } /// Increase the full_history_ts of column family. The new ts_low value should /// be newer than current full_history_ts value. /// If another thread updates full_history_ts_low concurrently to a higher /// timestamp than the requested ts_low, a try again error will be returned. pub fn increase_full_history_ts_low>( &self, cf: &impl AsColumnFamilyRef, ts: S, ) -> Result<(), Error> { let ts = ts.as_ref(); unsafe { ffi_try!(ffi::rocksdb_increase_full_history_ts_low( self.inner.inner(), cf.inner(), ts.as_ptr() as *const c_char, ts.len() as size_t, )); Ok(()) } } /// Get current full_history_ts value. pub fn get_full_history_ts_low(&self, cf: &impl AsColumnFamilyRef) -> Result, Error> { unsafe { let mut ts_lowlen = 0; let ts = ffi_try!(ffi::rocksdb_get_full_history_ts_low( self.inner.inner(), cf.inner(), &mut ts_lowlen, )); if ts.is_null() { Err(Error::new("Could not get full_history_ts_low".to_owned())) } else { let mut vec = vec![0; ts_lowlen]; ptr::copy_nonoverlapping(ts as *mut u8, vec.as_mut_ptr(), ts_lowlen); ffi::rocksdb_free(ts as *mut c_void); Ok(vec) } } } /// Returns the DB identity. This is typically ASCII bytes, but that is not guaranteed. pub fn get_db_identity(&self) -> Result, Error> { unsafe { let mut length: usize = 0; let identity_ptr = ffi::rocksdb_get_db_identity(self.inner.inner(), &mut length); let identity_vec = raw_data(identity_ptr, length); ffi::rocksdb_free(identity_ptr as *mut c_void); // In RocksDB: get_db_identity copies a std::string so it should not fail, but // the API allows it to be overridden, so it might identity_vec.ok_or_else(|| Error::new("get_db_identity returned NULL".to_string())) } } } impl DBCommon { /// Creates column family with given name and options pub fn create_cf>(&mut self, name: N, opts: &Options) -> Result<(), Error> { let inner = self.create_inner_cf_handle(name.as_ref(), opts)?; self.cfs .cfs .insert(name.as_ref().to_string(), ColumnFamily { inner }); Ok(()) } /// Drops the column family with the given name pub fn drop_cf(&mut self, name: &str) -> Result<(), Error> { if let Some(cf) = self.cfs.cfs.remove(name) { self.drop_column_family(cf.inner, cf) } else { Err(Error::new(format!("Invalid column family: {name}"))) } } /// Returns the underlying column family handle pub fn cf_handle(&self, name: &str) -> Option<&ColumnFamily> { self.cfs.cfs.get(name) } } impl DBCommon { /// Creates column family with given name and options pub fn create_cf>(&self, name: N, opts: &Options) -> Result<(), Error> { // Note that we acquire the cfs lock before inserting: otherwise we might race // another caller who observed the handle as missing. let mut cfs = self.cfs.cfs.write().unwrap(); let inner = self.create_inner_cf_handle(name.as_ref(), opts)?; cfs.insert( name.as_ref().to_string(), Arc::new(UnboundColumnFamily { inner }), ); Ok(()) } /// Drops the column family with the given name by internally locking the inner column /// family map. This avoids needing `&mut self` reference pub fn drop_cf(&self, name: &str) -> Result<(), Error> { if let Some(cf) = self.cfs.cfs.write().unwrap().remove(name) { self.drop_column_family(cf.inner, cf) } else { Err(Error::new(format!("Invalid column family: {name}"))) } } /// Returns the underlying column family handle pub fn cf_handle(&self, name: &str) -> Option> { self.cfs .cfs .read() .unwrap() .get(name) .cloned() .map(UnboundColumnFamily::bound_column_family) } } impl Drop for DBCommon { fn drop(&mut self) { self.cfs.drop_all_cfs_internal(); } } impl fmt::Debug for DBCommon { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { write!(f, "RocksDB {{ path: {:?} }}", self.path()) } } /// The metadata that describes a column family. #[derive(Debug, Clone)] pub struct ColumnFamilyMetaData { // The size of this column family in bytes, which is equal to the sum of // the file size of its "levels". pub size: u64, // The name of the column family. pub name: String, // The number of files in this column family. pub file_count: usize, } /// The metadata that describes a SST file #[derive(Debug, Clone)] pub struct LiveFile { /// Name of the column family the file belongs to pub column_family_name: String, /// Name of the file pub name: String, /// Size of the file pub size: usize, /// Level at which this file resides pub level: i32, /// Smallest user defined key in the file pub start_key: Option>, /// Largest user defined key in the file pub end_key: Option>, /// Number of entries/alive keys in the file pub num_entries: u64, /// Number of deletions/tomb key(s) in the file pub num_deletions: u64, } fn convert_options(opts: &[(&str, &str)]) -> Result, Error> { opts.iter() .map(|(name, value)| { let cname = match CString::new(name.as_bytes()) { Ok(cname) => cname, Err(e) => return Err(Error::new(format!("Invalid option name `{e}`"))), }; let cvalue = match CString::new(value.as_bytes()) { Ok(cvalue) => cvalue, Err(e) => return Err(Error::new(format!("Invalid option value: `{e}`"))), }; Ok((cname, cvalue)) }) .collect() } pub(crate) fn convert_values( values: Vec<*mut c_char>, values_sizes: Vec, errors: Vec<*mut c_char>, ) -> Vec>, Error>> { values .into_iter() .zip(values_sizes) .zip(errors) .map(|((v, s), e)| { if e.is_null() { let value = unsafe { crate::ffi_util::raw_data(v, s) }; unsafe { ffi::rocksdb_free(v as *mut c_void); } Ok(value) } else { Err(Error::new(crate::ffi_util::error_message(e))) } }) .collect() } rocksdb-0.23.0/src/db_iterator.rs000064400000000000000000000502721046102023000147760ustar 00000000000000// Copyright 2020 Tyler Neely // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. use crate::{ db::{DBAccess, DB}, ffi, Error, ReadOptions, WriteBatch, }; use libc::{c_char, c_uchar, size_t}; use std::{marker::PhantomData, slice}; /// A type alias to keep compatibility. See [`DBRawIteratorWithThreadMode`] for details pub type DBRawIterator<'a> = DBRawIteratorWithThreadMode<'a, DB>; /// An iterator over a database or column family, with specifiable /// ranges and direction. /// /// This iterator is different to the standard ``DBIteratorWithThreadMode`` as it aims Into /// replicate the underlying iterator API within RocksDB itself. This should /// give access to more performance and flexibility but departs from the /// widely recognized Rust idioms. /// /// ``` /// use rocksdb::{DB, Options}; /// /// let tempdir = tempfile::Builder::new() /// .prefix("_path_for_rocksdb_storage4") /// .tempdir() /// .expect("Failed to create temporary path for the _path_for_rocksdb_storage4."); /// let path = tempdir.path(); /// { /// let db = DB::open_default(path).unwrap(); /// let mut iter = db.raw_iterator(); /// /// // Forwards iteration /// iter.seek_to_first(); /// while iter.valid() { /// println!("Saw {:?} {:?}", iter.key(), iter.value()); /// iter.next(); /// } /// /// // Reverse iteration /// iter.seek_to_last(); /// while iter.valid() { /// println!("Saw {:?} {:?}", iter.key(), iter.value()); /// iter.prev(); /// } /// /// // Seeking /// iter.seek(b"my key"); /// while iter.valid() { /// println!("Saw {:?} {:?}", iter.key(), iter.value()); /// iter.next(); /// } /// /// // Reverse iteration from key /// // Note, use seek_for_prev when reversing because if this key doesn't exist, /// // this will make the iterator start from the previous key rather than the next. /// iter.seek_for_prev(b"my key"); /// while iter.valid() { /// println!("Saw {:?} {:?}", iter.key(), iter.value()); /// iter.prev(); /// } /// } /// let _ = DB::destroy(&Options::default(), path); /// ``` pub struct DBRawIteratorWithThreadMode<'a, D: DBAccess> { inner: std::ptr::NonNull, /// When iterate_lower_bound or iterate_upper_bound are set, the inner /// C iterator keeps a pointer to the upper bound inside `_readopts`. /// Storing this makes sure the upper bound is always alive when the /// iterator is being used. /// /// And yes, we need to store the entire ReadOptions structure since C++ /// ReadOptions keep reference to C rocksdb_readoptions_t wrapper which /// point to vectors we own. See issue #660. _readopts: ReadOptions, db: PhantomData<&'a D>, } impl<'a, D: DBAccess> DBRawIteratorWithThreadMode<'a, D> { pub(crate) fn new(db: &D, readopts: ReadOptions) -> Self { let inner = unsafe { db.create_iterator(&readopts) }; Self::from_inner(inner, readopts) } pub(crate) fn new_cf( db: &'a D, cf_handle: *mut ffi::rocksdb_column_family_handle_t, readopts: ReadOptions, ) -> Self { let inner = unsafe { db.create_iterator_cf(cf_handle, &readopts) }; Self::from_inner(inner, readopts) } fn from_inner(inner: *mut ffi::rocksdb_iterator_t, readopts: ReadOptions) -> Self { // This unwrap will never fail since rocksdb_create_iterator and // rocksdb_create_iterator_cf functions always return non-null. They // use new and deference the result so any nulls would end up with SIGSEGV // there and we would have a bigger issue. let inner = std::ptr::NonNull::new(inner).unwrap(); Self { inner, _readopts: readopts, db: PhantomData, } } /// Returns `true` if the iterator is valid. An iterator is invalidated when /// it reaches the end of its defined range, or when it encounters an error. /// /// To check whether the iterator encountered an error after `valid` has /// returned `false`, use the [`status`](DBRawIteratorWithThreadMode::status) method. `status` will never /// return an error when `valid` is `true`. pub fn valid(&self) -> bool { unsafe { ffi::rocksdb_iter_valid(self.inner.as_ptr()) != 0 } } /// Returns an error `Result` if the iterator has encountered an error /// during operation. When an error is encountered, the iterator is /// invalidated and [`valid`](DBRawIteratorWithThreadMode::valid) will return `false` when called. /// /// Performing a seek will discard the current status. pub fn status(&self) -> Result<(), Error> { unsafe { ffi_try!(ffi::rocksdb_iter_get_error(self.inner.as_ptr())); } Ok(()) } /// Seeks to the first key in the database. /// /// # Examples /// /// ```rust /// use rocksdb::{DB, Options}; /// /// let tempdir = tempfile::Builder::new() /// .prefix("_path_for_rocksdb_storage5") /// .tempdir() /// .expect("Failed to create temporary path for the _path_for_rocksdb_storage5."); /// let path = tempdir.path(); /// { /// let db = DB::open_default(path).unwrap(); /// let mut iter = db.raw_iterator(); /// /// // Iterate all keys from the start in lexicographic order /// iter.seek_to_first(); /// /// while iter.valid() { /// println!("{:?} {:?}", iter.key(), iter.value()); /// iter.next(); /// } /// /// // Read just the first key /// iter.seek_to_first(); /// /// if iter.valid() { /// println!("{:?} {:?}", iter.key(), iter.value()); /// } else { /// // There are no keys in the database /// } /// } /// let _ = DB::destroy(&Options::default(), path); /// ``` pub fn seek_to_first(&mut self) { unsafe { ffi::rocksdb_iter_seek_to_first(self.inner.as_ptr()); } } /// Seeks to the last key in the database. /// /// # Examples /// /// ```rust /// use rocksdb::{DB, Options}; /// /// let tempdir = tempfile::Builder::new() /// .prefix("_path_for_rocksdb_storage6") /// .tempdir() /// .expect("Failed to create temporary path for the _path_for_rocksdb_storage6."); /// let path = tempdir.path(); /// { /// let db = DB::open_default(path).unwrap(); /// let mut iter = db.raw_iterator(); /// /// // Iterate all keys from the end in reverse lexicographic order /// iter.seek_to_last(); /// /// while iter.valid() { /// println!("{:?} {:?}", iter.key(), iter.value()); /// iter.prev(); /// } /// /// // Read just the last key /// iter.seek_to_last(); /// /// if iter.valid() { /// println!("{:?} {:?}", iter.key(), iter.value()); /// } else { /// // There are no keys in the database /// } /// } /// let _ = DB::destroy(&Options::default(), path); /// ``` pub fn seek_to_last(&mut self) { unsafe { ffi::rocksdb_iter_seek_to_last(self.inner.as_ptr()); } } /// Seeks to the specified key or the first key that lexicographically follows it. /// /// This method will attempt to seek to the specified key. If that key does not exist, it will /// find and seek to the key that lexicographically follows it instead. /// /// # Examples /// /// ```rust /// use rocksdb::{DB, Options}; /// /// let tempdir = tempfile::Builder::new() /// .prefix("_path_for_rocksdb_storage7") /// .tempdir() /// .expect("Failed to create temporary path for the _path_for_rocksdb_storage7."); /// let path = tempdir.path(); /// { /// let db = DB::open_default(path).unwrap(); /// let mut iter = db.raw_iterator(); /// /// // Read the first key that starts with 'a' /// iter.seek(b"a"); /// /// if iter.valid() { /// println!("{:?} {:?}", iter.key(), iter.value()); /// } else { /// // There are no keys in the database /// } /// } /// let _ = DB::destroy(&Options::default(), path); /// ``` pub fn seek>(&mut self, key: K) { let key = key.as_ref(); unsafe { ffi::rocksdb_iter_seek( self.inner.as_ptr(), key.as_ptr() as *const c_char, key.len() as size_t, ); } } /// Seeks to the specified key, or the first key that lexicographically precedes it. /// /// Like ``.seek()`` this method will attempt to seek to the specified key. /// The difference with ``.seek()`` is that if the specified key do not exist, this method will /// seek to key that lexicographically precedes it instead. /// /// # Examples /// /// ```rust /// use rocksdb::{DB, Options}; /// /// let tempdir = tempfile::Builder::new() /// .prefix("_path_for_rocksdb_storage8") /// .tempdir() /// .expect("Failed to create temporary path for the _path_for_rocksdb_storage8."); /// let path = tempdir.path(); /// { /// let db = DB::open_default(path).unwrap(); /// let mut iter = db.raw_iterator(); /// /// // Read the last key that starts with 'a' /// iter.seek_for_prev(b"b"); /// /// if iter.valid() { /// println!("{:?} {:?}", iter.key(), iter.value()); /// } else { /// // There are no keys in the database /// } /// } /// let _ = DB::destroy(&Options::default(), path); /// ``` pub fn seek_for_prev>(&mut self, key: K) { let key = key.as_ref(); unsafe { ffi::rocksdb_iter_seek_for_prev( self.inner.as_ptr(), key.as_ptr() as *const c_char, key.len() as size_t, ); } } /// Seeks to the next key. pub fn next(&mut self) { if self.valid() { unsafe { ffi::rocksdb_iter_next(self.inner.as_ptr()); } } } /// Seeks to the previous key. pub fn prev(&mut self) { if self.valid() { unsafe { ffi::rocksdb_iter_prev(self.inner.as_ptr()); } } } /// Returns a slice of the current key. pub fn key(&self) -> Option<&[u8]> { if self.valid() { Some(self.key_impl()) } else { None } } /// Returns a slice of the current value. pub fn value(&self) -> Option<&[u8]> { if self.valid() { Some(self.value_impl()) } else { None } } /// Returns pair with slice of the current key and current value. pub fn item(&self) -> Option<(&[u8], &[u8])> { if self.valid() { Some((self.key_impl(), self.value_impl())) } else { None } } /// Returns a slice of the current key; assumes the iterator is valid. fn key_impl(&self) -> &[u8] { // Safety Note: This is safe as all methods that may invalidate the buffer returned // take `&mut self`, so borrow checker will prevent use of buffer after seek. unsafe { let mut key_len: size_t = 0; let key_len_ptr: *mut size_t = &mut key_len; let key_ptr = ffi::rocksdb_iter_key(self.inner.as_ptr(), key_len_ptr); slice::from_raw_parts(key_ptr as *const c_uchar, key_len) } } /// Returns a slice of the current value; assumes the iterator is valid. fn value_impl(&self) -> &[u8] { // Safety Note: This is safe as all methods that may invalidate the buffer returned // take `&mut self`, so borrow checker will prevent use of buffer after seek. unsafe { let mut val_len: size_t = 0; let val_len_ptr: *mut size_t = &mut val_len; let val_ptr = ffi::rocksdb_iter_value(self.inner.as_ptr(), val_len_ptr); slice::from_raw_parts(val_ptr as *const c_uchar, val_len) } } } impl<'a, D: DBAccess> Drop for DBRawIteratorWithThreadMode<'a, D> { fn drop(&mut self) { unsafe { ffi::rocksdb_iter_destroy(self.inner.as_ptr()); } } } unsafe impl<'a, D: DBAccess> Send for DBRawIteratorWithThreadMode<'a, D> {} unsafe impl<'a, D: DBAccess> Sync for DBRawIteratorWithThreadMode<'a, D> {} /// A type alias to keep compatibility. See [`DBIteratorWithThreadMode`] for details pub type DBIterator<'a> = DBIteratorWithThreadMode<'a, DB>; /// An iterator over a database or column family, with specifiable /// ranges and direction. /// /// ``` /// use rocksdb::{DB, Direction, IteratorMode, Options}; /// /// let tempdir = tempfile::Builder::new() /// .prefix("_path_for_rocksdb_storage2") /// .tempdir() /// .expect("Failed to create temporary path for the _path_for_rocksdb_storage2."); /// let path = tempdir.path(); /// { /// let db = DB::open_default(path).unwrap(); /// let mut iter = db.iterator(IteratorMode::Start); // Always iterates forward /// for item in iter { /// let (key, value) = item.unwrap(); /// println!("Saw {:?} {:?}", key, value); /// } /// iter = db.iterator(IteratorMode::End); // Always iterates backward /// for item in iter { /// let (key, value) = item.unwrap(); /// println!("Saw {:?} {:?}", key, value); /// } /// iter = db.iterator(IteratorMode::From(b"my key", Direction::Forward)); // From a key in Direction::{forward,reverse} /// for item in iter { /// let (key, value) = item.unwrap(); /// println!("Saw {:?} {:?}", key, value); /// } /// /// // You can seek with an existing Iterator instance, too /// iter = db.iterator(IteratorMode::Start); /// iter.set_mode(IteratorMode::From(b"another key", Direction::Reverse)); /// for item in iter { /// let (key, value) = item.unwrap(); /// println!("Saw {:?} {:?}", key, value); /// } /// } /// let _ = DB::destroy(&Options::default(), path); /// ``` pub struct DBIteratorWithThreadMode<'a, D: DBAccess> { raw: DBRawIteratorWithThreadMode<'a, D>, direction: Direction, done: bool, } #[derive(Copy, Clone)] pub enum Direction { Forward, Reverse, } pub type KVBytes = (Box<[u8]>, Box<[u8]>); #[derive(Copy, Clone)] pub enum IteratorMode<'a> { Start, End, From(&'a [u8], Direction), } impl<'a, D: DBAccess> DBIteratorWithThreadMode<'a, D> { pub(crate) fn new(db: &D, readopts: ReadOptions, mode: IteratorMode) -> Self { Self::from_raw(DBRawIteratorWithThreadMode::new(db, readopts), mode) } pub(crate) fn new_cf( db: &'a D, cf_handle: *mut ffi::rocksdb_column_family_handle_t, readopts: ReadOptions, mode: IteratorMode, ) -> Self { Self::from_raw( DBRawIteratorWithThreadMode::new_cf(db, cf_handle, readopts), mode, ) } fn from_raw(raw: DBRawIteratorWithThreadMode<'a, D>, mode: IteratorMode) -> Self { let mut rv = DBIteratorWithThreadMode { raw, direction: Direction::Forward, // blown away by set_mode() done: false, }; rv.set_mode(mode); rv } pub fn set_mode(&mut self, mode: IteratorMode) { self.done = false; self.direction = match mode { IteratorMode::Start => { self.raw.seek_to_first(); Direction::Forward } IteratorMode::End => { self.raw.seek_to_last(); Direction::Reverse } IteratorMode::From(key, Direction::Forward) => { self.raw.seek(key); Direction::Forward } IteratorMode::From(key, Direction::Reverse) => { self.raw.seek_for_prev(key); Direction::Reverse } }; } } impl<'a, D: DBAccess> Iterator for DBIteratorWithThreadMode<'a, D> { type Item = Result; fn next(&mut self) -> Option> { if self.done { None } else if let Some((key, value)) = self.raw.item() { let item = (Box::from(key), Box::from(value)); match self.direction { Direction::Forward => self.raw.next(), Direction::Reverse => self.raw.prev(), } Some(Ok(item)) } else { self.done = true; self.raw.status().err().map(Result::Err) } } } impl<'a, D: DBAccess> std::iter::FusedIterator for DBIteratorWithThreadMode<'a, D> {} impl<'a, D: DBAccess> Into> for DBIteratorWithThreadMode<'a, D> { fn into(self) -> DBRawIteratorWithThreadMode<'a, D> { self.raw } } /// Iterates the batches of writes since a given sequence number. /// /// `DBWALIterator` is returned by `DB::get_updates_since()` and will return the /// batches of write operations that have occurred since a given sequence number /// (see `DB::latest_sequence_number()`). This iterator cannot be constructed by /// the application. /// /// The iterator item type is a tuple of (`u64`, `WriteBatch`) where the first /// value is the sequence number of the associated write batch. /// pub struct DBWALIterator { pub(crate) inner: *mut ffi::rocksdb_wal_iterator_t, pub(crate) start_seq_number: u64, } impl DBWALIterator { /// Returns `true` if the iterator is valid. An iterator is invalidated when /// it reaches the end of its defined range, or when it encounters an error. /// /// To check whether the iterator encountered an error after `valid` has /// returned `false`, use the [`status`](DBWALIterator::status) method. /// `status` will never return an error when `valid` is `true`. pub fn valid(&self) -> bool { unsafe { ffi::rocksdb_wal_iter_valid(self.inner) != 0 } } /// Returns an error `Result` if the iterator has encountered an error /// during operation. When an error is encountered, the iterator is /// invalidated and [`valid`](DBWALIterator::valid) will return `false` when /// called. pub fn status(&self) -> Result<(), Error> { unsafe { ffi_try!(ffi::rocksdb_wal_iter_status(self.inner)); } Ok(()) } } impl Iterator for DBWALIterator { type Item = Result<(u64, WriteBatch), Error>; fn next(&mut self) -> Option { if !self.valid() { return None; } let mut seq: u64 = 0; let mut batch = WriteBatch { inner: unsafe { ffi::rocksdb_wal_iter_get_batch(self.inner, &mut seq) }, }; // if the initial sequence number is what was requested we skip it to // only provide changes *after* it while seq <= self.start_seq_number { unsafe { ffi::rocksdb_wal_iter_next(self.inner); } if !self.valid() { return None; } // this drops which in turn frees the skipped batch batch = WriteBatch { inner: unsafe { ffi::rocksdb_wal_iter_get_batch(self.inner, &mut seq) }, }; } if !self.valid() { return self.status().err().map(Result::Err); } // Seek to the next write batch. // Note that WriteBatches live independently of the WAL iterator so this is safe to do unsafe { ffi::rocksdb_wal_iter_next(self.inner); } Some(Ok((seq, batch))) } } impl Drop for DBWALIterator { fn drop(&mut self) { unsafe { ffi::rocksdb_wal_iter_destroy(self.inner); } } } rocksdb-0.23.0/src/db_options.rs000064400000000000000000005155001046102023000146400ustar 00000000000000// Copyright 2020 Tyler Neely // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. use std::ffi::CStr; use std::path::Path; use std::ptr::{null_mut, NonNull}; use std::slice; use std::sync::Arc; use libc::{self, c_char, c_double, c_int, c_uchar, c_uint, c_void, size_t}; use crate::column_family::ColumnFamilyTtl; use crate::statistics::{Histogram, HistogramData, StatsLevel}; use crate::{ compaction_filter::{self, CompactionFilterCallback, CompactionFilterFn}, compaction_filter_factory::{self, CompactionFilterFactory}, comparator::{ ComparatorCallback, ComparatorWithTsCallback, CompareFn, CompareTsFn, CompareWithoutTsFn, }, db::DBAccess, env::Env, ffi, ffi_util::{from_cstr, to_cpath, CStrLike}, merge_operator::{ self, full_merge_callback, partial_merge_callback, MergeFn, MergeOperatorCallback, }, slice_transform::SliceTransform, statistics::Ticker, ColumnFamilyDescriptor, Error, SnapshotWithThreadMode, }; pub(crate) struct WriteBufferManagerWrapper { pub(crate) inner: NonNull, } impl Drop for WriteBufferManagerWrapper { fn drop(&mut self) { unsafe { ffi::rocksdb_write_buffer_manager_destroy(self.inner.as_ptr()); } } } #[derive(Clone)] pub struct WriteBufferManager(pub(crate) Arc); impl WriteBufferManager { /// /// Write buffer manager helps users control the total memory used by memtables across multiple column families and/or DB instances. /// Users can enable this control by 2 ways: /// /// 1- Limit the total memtable usage across multiple column families and DBs under a threshold. /// 2- Cost the memtable memory usage to block cache so that memory of RocksDB can be capped by the single limit. /// The usage of a write buffer manager is similar to rate_limiter and sst_file_manager. /// Users can create one write buffer manager object and pass it to all the options of column families or DBs whose memtable size they want to be controlled by this object. /// /// A memory limit is given when creating the write buffer manager object. RocksDB will try to limit the total memory to under this limit. /// /// a flush will be triggered on one column family of the DB you are inserting to, /// /// If mutable memtable size exceeds about 90% of the limit, /// If the total memory is over the limit, more aggressive flush may also be triggered only if the mutable memtable size also exceeds 50% of the limit. /// Both checks are needed because if already more than half memory is being flushed, triggering more flush may not help. /// /// The total memory is counted as total memory allocated in the arena, even if some of that may not yet be used by memtable. /// /// buffer_size: the memory limit in bytes. /// allow_stall: If set true, it will enable stalling of all writers when memory usage exceeds buffer_size (soft limit). /// It will wait for flush to complete and memory usage to drop down pub fn new_write_buffer_manager(buffer_size: size_t, allow_stall: bool) -> Self { let inner = NonNull::new(unsafe { ffi::rocksdb_write_buffer_manager_create(buffer_size, allow_stall) }) .unwrap(); WriteBufferManager(Arc::new(WriteBufferManagerWrapper { inner })) } /// Users can set up RocksDB to cost memory used by memtables to block cache. /// This can happen no matter whether you enable memtable memory limit or not. /// This option is added to manage memory (memtables + block cache) under a single limit. /// /// buffer_size: the memory limit in bytes. /// allow_stall: If set true, it will enable stalling of all writers when memory usage exceeds buffer_size (soft limit). /// It will wait for flush to complete and memory usage to drop down /// cache: the block cache instance pub fn new_write_buffer_manager_with_cache( buffer_size: size_t, allow_stall: bool, cache: Cache, ) -> Self { let inner = NonNull::new(unsafe { ffi::rocksdb_write_buffer_manager_create_with_cache( buffer_size, cache.0.inner.as_ptr(), allow_stall, ) }) .unwrap(); WriteBufferManager(Arc::new(WriteBufferManagerWrapper { inner })) } /// Returns the WriteBufferManager memory usage in bytes. pub fn get_usage(&self) -> usize { unsafe { ffi::rocksdb_write_buffer_manager_memory_usage(self.0.inner.as_ptr()) } } /// Returns the current buffer size in bytes. pub fn get_buffer_size(&self) -> usize { unsafe { ffi::rocksdb_write_buffer_manager_buffer_size(self.0.inner.as_ptr()) } } /// Set the buffer size in bytes. pub fn set_buffer_size(&self, new_size: usize) { unsafe { ffi::rocksdb_write_buffer_manager_set_buffer_size(self.0.inner.as_ptr(), new_size); } } /// Returns if WriteBufferManager is enabled. pub fn enabled(&self) -> bool { unsafe { ffi::rocksdb_write_buffer_manager_enabled(self.0.inner.as_ptr()) } } /// set the allow_stall flag. pub fn set_allow_stall(&self, allow_stall: bool) { unsafe { ffi::rocksdb_write_buffer_manager_set_allow_stall(self.0.inner.as_ptr(), allow_stall); } } } pub(crate) struct CacheWrapper { pub(crate) inner: NonNull, } impl Drop for CacheWrapper { fn drop(&mut self) { unsafe { ffi::rocksdb_cache_destroy(self.inner.as_ptr()); } } } #[derive(Clone)] pub struct Cache(pub(crate) Arc); impl Cache { /// Creates an LRU cache with capacity in bytes. pub fn new_lru_cache(capacity: size_t) -> Cache { let inner = NonNull::new(unsafe { ffi::rocksdb_cache_create_lru(capacity) }).unwrap(); Cache(Arc::new(CacheWrapper { inner })) } /// Creates an LRU cache with custom options. pub fn new_lru_cache_opts(opts: &LruCacheOptions) -> Cache { let inner = NonNull::new(unsafe { ffi::rocksdb_cache_create_lru_opts(opts.inner) }).unwrap(); Cache(Arc::new(CacheWrapper { inner })) } /// Creates a HyperClockCache with capacity in bytes. /// /// `estimated_entry_charge` is an important tuning parameter. The optimal /// choice at any given time is /// `(cache.get_usage() - 64 * cache.get_table_address_count()) / /// cache.get_occupancy_count()`, or approximately `cache.get_usage() / /// cache.get_occupancy_count()`. /// /// However, the value cannot be changed dynamically, so as the cache /// composition changes at runtime, the following tradeoffs apply: /// /// * If the estimate is substantially too high (e.g., 25% higher), /// the cache may have to evict entries to prevent load factors that /// would dramatically affect lookup times. /// * If the estimate is substantially too low (e.g., less than half), /// then meta data space overhead is substantially higher. /// /// The latter is generally preferable, and picking the larger of /// block size and meta data block size is a reasonable choice that /// errs towards this side. pub fn new_hyper_clock_cache(capacity: size_t, estimated_entry_charge: size_t) -> Cache { Cache(Arc::new(CacheWrapper { inner: NonNull::new(unsafe { ffi::rocksdb_cache_create_hyper_clock(capacity, estimated_entry_charge) }) .unwrap(), })) } /// Returns the cache memory usage in bytes. pub fn get_usage(&self) -> usize { unsafe { ffi::rocksdb_cache_get_usage(self.0.inner.as_ptr()) } } /// Returns the pinned memory usage in bytes. pub fn get_pinned_usage(&self) -> usize { unsafe { ffi::rocksdb_cache_get_pinned_usage(self.0.inner.as_ptr()) } } /// Sets cache capacity in bytes. pub fn set_capacity(&mut self, capacity: size_t) { unsafe { ffi::rocksdb_cache_set_capacity(self.0.inner.as_ptr(), capacity); } } } #[derive(Default)] pub(crate) struct OptionsMustOutliveDB { env: Option, row_cache: Option, blob_cache: Option, block_based: Option, write_buffer_manager: Option, } impl OptionsMustOutliveDB { pub(crate) fn clone(&self) -> Self { Self { env: self.env.clone(), row_cache: self.row_cache.clone(), blob_cache: self.blob_cache.clone(), block_based: self .block_based .as_ref() .map(BlockBasedOptionsMustOutliveDB::clone), write_buffer_manager: self.write_buffer_manager.clone(), } } } #[derive(Default)] struct BlockBasedOptionsMustOutliveDB { block_cache: Option, } impl BlockBasedOptionsMustOutliveDB { fn clone(&self) -> Self { Self { block_cache: self.block_cache.clone(), } } } /// Database-wide options around performance and behavior. /// /// Please read the official tuning [guide](https://github.com/facebook/rocksdb/wiki/RocksDB-Tuning-Guide) /// and most importantly, measure performance under realistic workloads with realistic hardware. /// /// # Examples /// /// ``` /// use rocksdb::{Options, DB}; /// use rocksdb::DBCompactionStyle; /// /// fn badly_tuned_for_somebody_elses_disk() -> DB { /// let path = "path/for/rocksdb/storageX"; /// let mut opts = Options::default(); /// opts.create_if_missing(true); /// opts.set_max_open_files(10000); /// opts.set_use_fsync(false); /// opts.set_bytes_per_sync(8388608); /// opts.optimize_for_point_lookup(1024); /// opts.set_table_cache_num_shard_bits(6); /// opts.set_max_write_buffer_number(32); /// opts.set_write_buffer_size(536870912); /// opts.set_target_file_size_base(1073741824); /// opts.set_min_write_buffer_number_to_merge(4); /// opts.set_level_zero_stop_writes_trigger(2000); /// opts.set_level_zero_slowdown_writes_trigger(0); /// opts.set_compaction_style(DBCompactionStyle::Universal); /// opts.set_disable_auto_compactions(true); /// /// DB::open(&opts, path).unwrap() /// } /// ``` pub struct Options { pub(crate) inner: *mut ffi::rocksdb_options_t, pub(crate) outlive: OptionsMustOutliveDB, } /// Optionally disable WAL or sync for this write. /// /// # Examples /// /// Making an unsafe write of a batch: /// /// ``` /// use rocksdb::{DB, Options, WriteBatch, WriteOptions}; /// /// let tempdir = tempfile::Builder::new() /// .prefix("_path_for_rocksdb_storageY1") /// .tempdir() /// .expect("Failed to create temporary path for the _path_for_rocksdb_storageY1"); /// let path = tempdir.path(); /// { /// let db = DB::open_default(path).unwrap(); /// let mut batch = WriteBatch::default(); /// batch.put(b"my key", b"my value"); /// batch.put(b"key2", b"value2"); /// batch.put(b"key3", b"value3"); /// /// let mut write_options = WriteOptions::default(); /// write_options.set_sync(false); /// write_options.disable_wal(true); /// /// db.write_opt(batch, &write_options); /// } /// let _ = DB::destroy(&Options::default(), path); /// ``` pub struct WriteOptions { pub(crate) inner: *mut ffi::rocksdb_writeoptions_t, } pub struct LruCacheOptions { pub(crate) inner: *mut ffi::rocksdb_lru_cache_options_t, } /// Optionally wait for the memtable flush to be performed. /// /// # Examples /// /// Manually flushing the memtable: /// /// ``` /// use rocksdb::{DB, Options, FlushOptions}; /// /// let tempdir = tempfile::Builder::new() /// .prefix("_path_for_rocksdb_storageY2") /// .tempdir() /// .expect("Failed to create temporary path for the _path_for_rocksdb_storageY2"); /// let path = tempdir.path(); /// { /// let db = DB::open_default(path).unwrap(); /// /// let mut flush_options = FlushOptions::default(); /// flush_options.set_wait(true); /// /// db.flush_opt(&flush_options); /// } /// let _ = DB::destroy(&Options::default(), path); /// ``` pub struct FlushOptions { pub(crate) inner: *mut ffi::rocksdb_flushoptions_t, } /// For configuring block-based file storage. pub struct BlockBasedOptions { pub(crate) inner: *mut ffi::rocksdb_block_based_table_options_t, outlive: BlockBasedOptionsMustOutliveDB, } pub struct ReadOptions { pub(crate) inner: *mut ffi::rocksdb_readoptions_t, // The `ReadOptions` owns a copy of the timestamp and iteration bounds. // This is necessary to ensure the pointers we pass over the FFI live as // long as the `ReadOptions`. This way, when performing the read operation, // the pointers are guaranteed to be valid. timestamp: Option>, iter_start_ts: Option>, iterate_upper_bound: Option>, iterate_lower_bound: Option>, } /// Configuration of cuckoo-based storage. pub struct CuckooTableOptions { pub(crate) inner: *mut ffi::rocksdb_cuckoo_table_options_t, } /// For configuring external files ingestion. /// /// # Examples /// /// Move files instead of copying them: /// /// ``` /// use rocksdb::{DB, IngestExternalFileOptions, SstFileWriter, Options}; /// /// let writer_opts = Options::default(); /// let mut writer = SstFileWriter::create(&writer_opts); /// let tempdir = tempfile::Builder::new() /// .tempdir() /// .expect("Failed to create temporary folder for the _path_for_sst_file"); /// let path1 = tempdir.path().join("_path_for_sst_file"); /// writer.open(path1.clone()).unwrap(); /// writer.put(b"k1", b"v1").unwrap(); /// writer.finish().unwrap(); /// /// let tempdir2 = tempfile::Builder::new() /// .prefix("_path_for_rocksdb_storageY3") /// .tempdir() /// .expect("Failed to create temporary path for the _path_for_rocksdb_storageY3"); /// let path2 = tempdir2.path(); /// { /// let db = DB::open_default(&path2).unwrap(); /// let mut ingest_opts = IngestExternalFileOptions::default(); /// ingest_opts.set_move_files(true); /// db.ingest_external_file_opts(&ingest_opts, vec![path1]).unwrap(); /// } /// let _ = DB::destroy(&Options::default(), path2); /// ``` pub struct IngestExternalFileOptions { pub(crate) inner: *mut ffi::rocksdb_ingestexternalfileoptions_t, } // Safety note: auto-implementing Send on most db-related types is prevented by the inner FFI // pointer. In most cases, however, this pointer is Send-safe because it is never aliased and // rocksdb internally does not rely on thread-local information for its user-exposed types. unsafe impl Send for Options {} unsafe impl Send for WriteOptions {} unsafe impl Send for LruCacheOptions {} unsafe impl Send for FlushOptions {} unsafe impl Send for BlockBasedOptions {} unsafe impl Send for CuckooTableOptions {} unsafe impl Send for ReadOptions {} unsafe impl Send for IngestExternalFileOptions {} unsafe impl Send for CacheWrapper {} unsafe impl Send for CompactOptions {} unsafe impl Send for WriteBufferManagerWrapper {} // Sync is similarly safe for many types because they do not expose interior mutability, and their // use within the rocksdb library is generally behind a const reference unsafe impl Sync for Options {} unsafe impl Sync for WriteOptions {} unsafe impl Sync for LruCacheOptions {} unsafe impl Sync for FlushOptions {} unsafe impl Sync for BlockBasedOptions {} unsafe impl Sync for CuckooTableOptions {} unsafe impl Sync for ReadOptions {} unsafe impl Sync for IngestExternalFileOptions {} unsafe impl Sync for CacheWrapper {} unsafe impl Sync for CompactOptions {} unsafe impl Sync for WriteBufferManagerWrapper {} impl Drop for Options { fn drop(&mut self) { unsafe { ffi::rocksdb_options_destroy(self.inner); } } } impl Clone for Options { fn clone(&self) -> Self { let inner = unsafe { ffi::rocksdb_options_create_copy(self.inner) }; assert!(!inner.is_null(), "Could not copy RocksDB options"); Self { inner, outlive: self.outlive.clone(), } } } impl Drop for BlockBasedOptions { fn drop(&mut self) { unsafe { ffi::rocksdb_block_based_options_destroy(self.inner); } } } impl Drop for CuckooTableOptions { fn drop(&mut self) { unsafe { ffi::rocksdb_cuckoo_options_destroy(self.inner); } } } impl Drop for FlushOptions { fn drop(&mut self) { unsafe { ffi::rocksdb_flushoptions_destroy(self.inner); } } } impl Drop for WriteOptions { fn drop(&mut self) { unsafe { ffi::rocksdb_writeoptions_destroy(self.inner); } } } impl Drop for LruCacheOptions { fn drop(&mut self) { unsafe { ffi::rocksdb_lru_cache_options_destroy(self.inner); } } } impl Drop for ReadOptions { fn drop(&mut self) { unsafe { ffi::rocksdb_readoptions_destroy(self.inner); } } } impl Drop for IngestExternalFileOptions { fn drop(&mut self) { unsafe { ffi::rocksdb_ingestexternalfileoptions_destroy(self.inner); } } } impl BlockBasedOptions { /// Approximate size of user data packed per block. Note that the /// block size specified here corresponds to uncompressed data. The /// actual size of the unit read from disk may be smaller if /// compression is enabled. This parameter can be changed dynamically. pub fn set_block_size(&mut self, size: usize) { unsafe { ffi::rocksdb_block_based_options_set_block_size(self.inner, size); } } /// Block size for partitioned metadata. Currently applied to indexes when /// kTwoLevelIndexSearch is used and to filters when partition_filters is used. /// Note: Since in the current implementation the filters and index partitions /// are aligned, an index/filter block is created when either index or filter /// block size reaches the specified limit. /// /// Note: this limit is currently applied to only index blocks; a filter /// partition is cut right after an index block is cut. pub fn set_metadata_block_size(&mut self, size: usize) { unsafe { ffi::rocksdb_block_based_options_set_metadata_block_size(self.inner, size as u64); } } /// Note: currently this option requires kTwoLevelIndexSearch to be set as /// well. /// /// Use partitioned full filters for each SST file. This option is /// incompatible with block-based filters. pub fn set_partition_filters(&mut self, size: bool) { unsafe { ffi::rocksdb_block_based_options_set_partition_filters(self.inner, c_uchar::from(size)); } } /// Sets global cache for blocks (user data is stored in a set of blocks, and /// a block is the unit of reading from disk). /// /// If set, use the specified cache for blocks. /// By default, rocksdb will automatically create and use an 8MB internal cache. pub fn set_block_cache(&mut self, cache: &Cache) { unsafe { ffi::rocksdb_block_based_options_set_block_cache(self.inner, cache.0.inner.as_ptr()); } self.outlive.block_cache = Some(cache.clone()); } /// Disable block cache pub fn disable_cache(&mut self) { unsafe { ffi::rocksdb_block_based_options_set_no_block_cache(self.inner, c_uchar::from(true)); } } /// Sets a [Bloom filter](https://github.com/facebook/rocksdb/wiki/RocksDB-Bloom-Filter) /// policy to reduce disk reads. /// /// # Examples /// /// ``` /// use rocksdb::BlockBasedOptions; /// /// let mut opts = BlockBasedOptions::default(); /// opts.set_bloom_filter(10.0, true); /// ``` pub fn set_bloom_filter(&mut self, bits_per_key: c_double, block_based: bool) { unsafe { let bloom = if block_based { ffi::rocksdb_filterpolicy_create_bloom(bits_per_key as _) } else { ffi::rocksdb_filterpolicy_create_bloom_full(bits_per_key as _) }; ffi::rocksdb_block_based_options_set_filter_policy(self.inner, bloom); } } /// Sets a [Ribbon filter](http://rocksdb.org/blog/2021/12/29/ribbon-filter.html) /// policy to reduce disk reads. /// /// Ribbon filters use less memory in exchange for slightly more CPU usage /// compared to an equivalent bloom filter. /// /// # Examples /// /// ``` /// use rocksdb::BlockBasedOptions; /// /// let mut opts = BlockBasedOptions::default(); /// opts.set_ribbon_filter(10.0); /// ``` pub fn set_ribbon_filter(&mut self, bloom_equivalent_bits_per_key: c_double) { unsafe { let ribbon = ffi::rocksdb_filterpolicy_create_ribbon(bloom_equivalent_bits_per_key); ffi::rocksdb_block_based_options_set_filter_policy(self.inner, ribbon); } } /// Sets a hybrid [Ribbon filter](http://rocksdb.org/blog/2021/12/29/ribbon-filter.html) /// policy to reduce disk reads. /// /// Uses Bloom filters before the given level, and Ribbon filters for all /// other levels. This combines the memory savings from Ribbon filters /// with the lower CPU usage of Bloom filters. /// /// # Examples /// /// ``` /// use rocksdb::BlockBasedOptions; /// /// let mut opts = BlockBasedOptions::default(); /// opts.set_hybrid_ribbon_filter(10.0, 2); /// ``` pub fn set_hybrid_ribbon_filter( &mut self, bloom_equivalent_bits_per_key: c_double, bloom_before_level: c_int, ) { unsafe { let ribbon = ffi::rocksdb_filterpolicy_create_ribbon_hybrid( bloom_equivalent_bits_per_key, bloom_before_level, ); ffi::rocksdb_block_based_options_set_filter_policy(self.inner, ribbon); } } /// If cache_index_and_filter_blocks is enabled, cache index and filter blocks with high priority. /// If set to true, depending on implementation of block cache, /// index and filter blocks may be less likely to be evicted than data blocks. pub fn set_cache_index_and_filter_blocks(&mut self, v: bool) { unsafe { ffi::rocksdb_block_based_options_set_cache_index_and_filter_blocks( self.inner, c_uchar::from(v), ); } } /// Defines the index type to be used for SS-table lookups. /// /// # Examples /// /// ``` /// use rocksdb::{BlockBasedOptions, BlockBasedIndexType, Options}; /// /// let mut opts = Options::default(); /// let mut block_opts = BlockBasedOptions::default(); /// block_opts.set_index_type(BlockBasedIndexType::HashSearch); /// ``` pub fn set_index_type(&mut self, index_type: BlockBasedIndexType) { let index = index_type as i32; unsafe { ffi::rocksdb_block_based_options_set_index_type(self.inner, index); } } /// If cache_index_and_filter_blocks is true and the below is true, then /// filter and index blocks are stored in the cache, but a reference is /// held in the "table reader" object so the blocks are pinned and only /// evicted from cache when the table reader is freed. /// /// Default: false. pub fn set_pin_l0_filter_and_index_blocks_in_cache(&mut self, v: bool) { unsafe { ffi::rocksdb_block_based_options_set_pin_l0_filter_and_index_blocks_in_cache( self.inner, c_uchar::from(v), ); } } /// If cache_index_and_filter_blocks is true and the below is true, then /// the top-level index of partitioned filter and index blocks are stored in /// the cache, but a reference is held in the "table reader" object so the /// blocks are pinned and only evicted from cache when the table reader is /// freed. This is not limited to l0 in LSM tree. /// /// Default: false. pub fn set_pin_top_level_index_and_filter(&mut self, v: bool) { unsafe { ffi::rocksdb_block_based_options_set_pin_top_level_index_and_filter( self.inner, c_uchar::from(v), ); } } /// Format version, reserved for backward compatibility. /// /// See full [list](https://github.com/facebook/rocksdb/blob/v8.6.7/include/rocksdb/table.h#L493-L521) /// of the supported versions. /// /// Default: 5. pub fn set_format_version(&mut self, version: i32) { unsafe { ffi::rocksdb_block_based_options_set_format_version(self.inner, version); } } /// Number of keys between restart points for delta encoding of keys. /// This parameter can be changed dynamically. Most clients should /// leave this parameter alone. The minimum value allowed is 1. Any smaller /// value will be silently overwritten with 1. /// /// Default: 16. pub fn set_block_restart_interval(&mut self, interval: i32) { unsafe { ffi::rocksdb_block_based_options_set_block_restart_interval(self.inner, interval); } } /// Same as block_restart_interval but used for the index block. /// If you don't plan to run RocksDB before version 5.16 and you are /// using `index_block_restart_interval` > 1, you should /// probably set the `format_version` to >= 4 as it would reduce the index size. /// /// Default: 1. pub fn set_index_block_restart_interval(&mut self, interval: i32) { unsafe { ffi::rocksdb_block_based_options_set_index_block_restart_interval(self.inner, interval); } } /// Set the data block index type for point lookups: /// `DataBlockIndexType::BinarySearch` to use binary search within the data block. /// `DataBlockIndexType::BinaryAndHash` to use the data block hash index in combination with /// the normal binary search. /// /// The hash table utilization ratio is adjustable using [`set_data_block_hash_ratio`](#method.set_data_block_hash_ratio), which is /// valid only when using `DataBlockIndexType::BinaryAndHash`. /// /// Default: `BinarySearch` /// # Examples /// /// ``` /// use rocksdb::{BlockBasedOptions, DataBlockIndexType, Options}; /// /// let mut opts = Options::default(); /// let mut block_opts = BlockBasedOptions::default(); /// block_opts.set_data_block_index_type(DataBlockIndexType::BinaryAndHash); /// block_opts.set_data_block_hash_ratio(0.85); /// ``` pub fn set_data_block_index_type(&mut self, index_type: DataBlockIndexType) { let index_t = index_type as i32; unsafe { ffi::rocksdb_block_based_options_set_data_block_index_type(self.inner, index_t); } } /// Set the data block hash index utilization ratio. /// /// The smaller the utilization ratio, the less hash collisions happen, and so reduce the risk for a /// point lookup to fall back to binary search due to the collisions. A small ratio means faster /// lookup at the price of more space overhead. /// /// Default: 0.75 pub fn set_data_block_hash_ratio(&mut self, ratio: f64) { unsafe { ffi::rocksdb_block_based_options_set_data_block_hash_ratio(self.inner, ratio); } } /// If false, place only prefixes in the filter, not whole keys. /// /// Defaults to true. pub fn set_whole_key_filtering(&mut self, v: bool) { unsafe { ffi::rocksdb_block_based_options_set_whole_key_filtering(self.inner, c_uchar::from(v)); } } /// Use the specified checksum type. /// Newly created table files will be protected with this checksum type. /// Old table files will still be readable, even though they have different checksum type. pub fn set_checksum_type(&mut self, checksum_type: ChecksumType) { unsafe { ffi::rocksdb_block_based_options_set_checksum(self.inner, checksum_type as c_char); } } /// If true, generate Bloom/Ribbon filters that minimize memory internal /// fragmentation. /// See official [wiki]( /// https://github.com/facebook/rocksdb/wiki/RocksDB-Bloom-Filter#reducing-internal-fragmentation) /// for more information. /// /// Defaults to false. /// # Examples /// /// ``` /// use rocksdb::BlockBasedOptions; /// /// let mut opts = BlockBasedOptions::default(); /// opts.set_bloom_filter(10.0, true); /// opts.set_optimize_filters_for_memory(true); /// ``` pub fn set_optimize_filters_for_memory(&mut self, v: bool) { unsafe { ffi::rocksdb_block_based_options_set_optimize_filters_for_memory( self.inner, c_uchar::from(v), ); } } } impl Default for BlockBasedOptions { fn default() -> Self { let block_opts = unsafe { ffi::rocksdb_block_based_options_create() }; assert!( !block_opts.is_null(), "Could not create RocksDB block based options" ); Self { inner: block_opts, outlive: BlockBasedOptionsMustOutliveDB::default(), } } } impl CuckooTableOptions { /// Determines the utilization of hash tables. Smaller values /// result in larger hash tables with fewer collisions. /// Default: 0.9 pub fn set_hash_ratio(&mut self, ratio: f64) { unsafe { ffi::rocksdb_cuckoo_options_set_hash_ratio(self.inner, ratio); } } /// A property used by builder to determine the depth to go to /// to search for a path to displace elements in case of /// collision. See Builder.MakeSpaceForKey method. Higher /// values result in more efficient hash tables with fewer /// lookups but take more time to build. /// Default: 100 pub fn set_max_search_depth(&mut self, depth: u32) { unsafe { ffi::rocksdb_cuckoo_options_set_max_search_depth(self.inner, depth); } } /// In case of collision while inserting, the builder /// attempts to insert in the next cuckoo_block_size /// locations before skipping over to the next Cuckoo hash /// function. This makes lookups more cache friendly in case /// of collisions. /// Default: 5 pub fn set_cuckoo_block_size(&mut self, size: u32) { unsafe { ffi::rocksdb_cuckoo_options_set_cuckoo_block_size(self.inner, size); } } /// If this option is enabled, user key is treated as uint64_t and its value /// is used as hash value directly. This option changes builder's behavior. /// Reader ignore this option and behave according to what specified in /// table property. /// Default: false pub fn set_identity_as_first_hash(&mut self, flag: bool) { unsafe { ffi::rocksdb_cuckoo_options_set_identity_as_first_hash(self.inner, c_uchar::from(flag)); } } /// If this option is set to true, module is used during hash calculation. /// This often yields better space efficiency at the cost of performance. /// If this option is set to false, # of entries in table is constrained to /// be power of two, and bit and is used to calculate hash, which is faster in general. /// Default: true pub fn set_use_module_hash(&mut self, flag: bool) { unsafe { ffi::rocksdb_cuckoo_options_set_use_module_hash(self.inner, c_uchar::from(flag)); } } } impl Default for CuckooTableOptions { fn default() -> Self { let opts = unsafe { ffi::rocksdb_cuckoo_options_create() }; assert!(!opts.is_null(), "Could not create RocksDB cuckoo options"); Self { inner: opts } } } // Verbosity of the LOG. #[derive(Debug, Copy, Clone, PartialEq, Eq)] #[repr(i32)] pub enum LogLevel { Debug = 0, Info, Warn, Error, Fatal, Header, } impl Options { /// Constructs the DBOptions and ColumnFamilyDescriptors by loading the /// latest RocksDB options file stored in the specified rocksdb database. /// /// *IMPORTANT*: /// ROCKSDB DOES NOT STORE cf ttl in the options file. If you have set it via /// [`ColumnFamilyDescriptor::new_with_ttl`] then you need to set it again after loading the options file. /// Tll will be set to [`ColumnFamilyTtl::Disabled`] for all column families for your safety. pub fn load_latest>( path: P, env: Env, ignore_unknown_options: bool, cache: Cache, ) -> Result<(Options, Vec), Error> { let path = to_cpath(path)?; let mut db_options: *mut ffi::rocksdb_options_t = null_mut(); let mut num_column_families: usize = 0; let mut column_family_names: *mut *mut c_char = null_mut(); let mut column_family_options: *mut *mut ffi::rocksdb_options_t = null_mut(); unsafe { ffi_try!(ffi::rocksdb_load_latest_options( path.as_ptr(), env.0.inner, ignore_unknown_options, cache.0.inner.as_ptr(), &mut db_options, &mut num_column_families, &mut column_family_names, &mut column_family_options, )); } let options = Options { inner: db_options, outlive: OptionsMustOutliveDB::default(), }; let column_families = unsafe { Options::read_column_descriptors( num_column_families, column_family_names, column_family_options, ) }; Ok((options, column_families)) } /// read column descriptors from c pointers #[inline] unsafe fn read_column_descriptors( num_column_families: usize, column_family_names: *mut *mut c_char, column_family_options: *mut *mut ffi::rocksdb_options_t, ) -> Vec { let column_family_names_iter = slice::from_raw_parts(column_family_names, num_column_families) .iter() .map(|ptr| from_cstr(*ptr)); let column_family_options_iter = slice::from_raw_parts(column_family_options, num_column_families) .iter() .map(|ptr| Options { inner: *ptr, outlive: OptionsMustOutliveDB::default(), }); let column_descriptors = column_family_names_iter .zip(column_family_options_iter) .map(|(name, options)| ColumnFamilyDescriptor { name, options, ttl: ColumnFamilyTtl::Disabled, }) .collect::>(); // free pointers slice::from_raw_parts(column_family_names, num_column_families) .iter() .for_each(|ptr| ffi::rocksdb_free(*ptr as *mut c_void)); ffi::rocksdb_free(column_family_names as *mut c_void); ffi::rocksdb_free(column_family_options as *mut c_void); column_descriptors } /// By default, RocksDB uses only one background thread for flush and /// compaction. Calling this function will set it up such that total of /// `total_threads` is used. Good value for `total_threads` is the number of /// cores. You almost definitely want to call this function if your system is /// bottlenecked by RocksDB. /// /// # Examples /// /// ``` /// use rocksdb::Options; /// /// let mut opts = Options::default(); /// opts.increase_parallelism(3); /// ``` pub fn increase_parallelism(&mut self, parallelism: i32) { unsafe { ffi::rocksdb_options_increase_parallelism(self.inner, parallelism); } } /// Optimize level style compaction. /// /// Default values for some parameters in `Options` are not optimized for heavy /// workloads and big datasets, which means you might observe write stalls under /// some conditions. /// /// This can be used as one of the starting points for tuning RocksDB options in /// such cases. /// /// Internally, it sets `write_buffer_size`, `min_write_buffer_number_to_merge`, /// `max_write_buffer_number`, `level0_file_num_compaction_trigger`, /// `target_file_size_base`, `max_bytes_for_level_base`, so it can override if those /// parameters were set before. /// /// It sets buffer sizes so that memory consumption would be constrained by /// `memtable_memory_budget`. pub fn optimize_level_style_compaction(&mut self, memtable_memory_budget: usize) { unsafe { ffi::rocksdb_options_optimize_level_style_compaction( self.inner, memtable_memory_budget as u64, ); } } /// Optimize universal style compaction. /// /// Default values for some parameters in `Options` are not optimized for heavy /// workloads and big datasets, which means you might observe write stalls under /// some conditions. /// /// This can be used as one of the starting points for tuning RocksDB options in /// such cases. /// /// Internally, it sets `write_buffer_size`, `min_write_buffer_number_to_merge`, /// `max_write_buffer_number`, `level0_file_num_compaction_trigger`, /// `target_file_size_base`, `max_bytes_for_level_base`, so it can override if those /// parameters were set before. /// /// It sets buffer sizes so that memory consumption would be constrained by /// `memtable_memory_budget`. pub fn optimize_universal_style_compaction(&mut self, memtable_memory_budget: usize) { unsafe { ffi::rocksdb_options_optimize_universal_style_compaction( self.inner, memtable_memory_budget as u64, ); } } /// If true, the database will be created if it is missing. /// /// Default: `false` /// /// # Examples /// /// ``` /// use rocksdb::Options; /// /// let mut opts = Options::default(); /// opts.create_if_missing(true); /// ``` pub fn create_if_missing(&mut self, create_if_missing: bool) { unsafe { ffi::rocksdb_options_set_create_if_missing( self.inner, c_uchar::from(create_if_missing), ); } } /// If true, any column families that didn't exist when opening the database /// will be created. /// /// Default: `false` /// /// # Examples /// /// ``` /// use rocksdb::Options; /// /// let mut opts = Options::default(); /// opts.create_missing_column_families(true); /// ``` pub fn create_missing_column_families(&mut self, create_missing_cfs: bool) { unsafe { ffi::rocksdb_options_set_create_missing_column_families( self.inner, c_uchar::from(create_missing_cfs), ); } } /// Specifies whether an error should be raised if the database already exists. /// /// Default: false pub fn set_error_if_exists(&mut self, enabled: bool) { unsafe { ffi::rocksdb_options_set_error_if_exists(self.inner, c_uchar::from(enabled)); } } /// Enable/disable paranoid checks. /// /// If true, the implementation will do aggressive checking of the /// data it is processing and will stop early if it detects any /// errors. This may have unforeseen ramifications: for example, a /// corruption of one DB entry may cause a large number of entries to /// become unreadable or for the entire DB to become unopenable. /// If any of the writes to the database fails (Put, Delete, Merge, Write), /// the database will switch to read-only mode and fail all other /// Write operations. /// /// Default: false pub fn set_paranoid_checks(&mut self, enabled: bool) { unsafe { ffi::rocksdb_options_set_paranoid_checks(self.inner, c_uchar::from(enabled)); } } /// A list of paths where SST files can be put into, with its target size. /// Newer data is placed into paths specified earlier in the vector while /// older data gradually moves to paths specified later in the vector. /// /// For example, you have a flash device with 10GB allocated for the DB, /// as well as a hard drive of 2TB, you should config it to be: /// [{"/flash_path", 10GB}, {"/hard_drive", 2TB}] /// /// The system will try to guarantee data under each path is close to but /// not larger than the target size. But current and future file sizes used /// by determining where to place a file are based on best-effort estimation, /// which means there is a chance that the actual size under the directory /// is slightly more than target size under some workloads. User should give /// some buffer room for those cases. /// /// If none of the paths has sufficient room to place a file, the file will /// be placed to the last path anyway, despite to the target size. /// /// Placing newer data to earlier paths is also best-efforts. User should /// expect user files to be placed in higher levels in some extreme cases. /// /// If left empty, only one path will be used, which is `path` passed when /// opening the DB. /// /// Default: empty pub fn set_db_paths(&mut self, paths: &[DBPath]) { let mut paths: Vec<_> = paths.iter().map(|path| path.inner.cast_const()).collect(); let num_paths = paths.len(); unsafe { ffi::rocksdb_options_set_db_paths(self.inner, paths.as_mut_ptr(), num_paths); } } /// Use the specified object to interact with the environment, /// e.g. to read/write files, schedule background work, etc. In the near /// future, support for doing storage operations such as read/write files /// through env will be deprecated in favor of file_system. /// /// Default: Env::default() pub fn set_env(&mut self, env: &Env) { unsafe { ffi::rocksdb_options_set_env(self.inner, env.0.inner); } self.outlive.env = Some(env.clone()); } /// Sets the compression algorithm that will be used for compressing blocks. /// /// Default: `DBCompressionType::Snappy` (`DBCompressionType::None` if /// snappy feature is not enabled). /// /// # Examples /// /// ``` /// use rocksdb::{Options, DBCompressionType}; /// /// let mut opts = Options::default(); /// opts.set_compression_type(DBCompressionType::Snappy); /// ``` pub fn set_compression_type(&mut self, t: DBCompressionType) { unsafe { ffi::rocksdb_options_set_compression(self.inner, t as c_int); } } /// Number of threads for parallel compression. /// Parallel compression is enabled only if threads > 1. /// THE FEATURE IS STILL EXPERIMENTAL /// /// See [code](https://github.com/facebook/rocksdb/blob/v8.6.7/include/rocksdb/advanced_options.h#L116-L127) /// for more information. /// /// Default: 1 /// /// Examples /// /// ``` /// use rocksdb::{Options, DBCompressionType}; /// /// let mut opts = Options::default(); /// opts.set_compression_type(DBCompressionType::Zstd); /// opts.set_compression_options_parallel_threads(3); /// ``` pub fn set_compression_options_parallel_threads(&mut self, num: i32) { unsafe { ffi::rocksdb_options_set_compression_options_parallel_threads(self.inner, num); } } /// Sets the compression algorithm that will be used for compressing WAL. /// /// At present, only ZSTD compression is supported! /// /// Default: `DBCompressionType::None` /// /// # Examples /// /// ``` /// use rocksdb::{Options, DBCompressionType}; /// /// let mut opts = Options::default(); /// opts.set_wal_compression_type(DBCompressionType::Zstd); /// // Or None to disable it /// opts.set_wal_compression_type(DBCompressionType::None); /// ``` pub fn set_wal_compression_type(&mut self, t: DBCompressionType) { match t { DBCompressionType::None | DBCompressionType::Zstd => unsafe { ffi::rocksdb_options_set_wal_compression(self.inner, t as c_int); }, other => unimplemented!("{:?} is not supported for WAL compression", other), } } /// Sets the bottom-most compression algorithm that will be used for /// compressing blocks at the bottom-most level. /// /// Note that to actually enable bottom-most compression configuration after /// setting the compression type, it needs to be enabled by calling /// [`set_bottommost_compression_options`](#method.set_bottommost_compression_options) or /// [`set_bottommost_zstd_max_train_bytes`](#method.set_bottommost_zstd_max_train_bytes) method with `enabled` argument /// set to `true`. /// /// # Examples /// /// ``` /// use rocksdb::{Options, DBCompressionType}; /// /// let mut opts = Options::default(); /// opts.set_bottommost_compression_type(DBCompressionType::Zstd); /// opts.set_bottommost_zstd_max_train_bytes(0, true); /// ``` pub fn set_bottommost_compression_type(&mut self, t: DBCompressionType) { unsafe { ffi::rocksdb_options_set_bottommost_compression(self.inner, t as c_int); } } /// Different levels can have different compression policies. There /// are cases where most lower levels would like to use quick compression /// algorithms while the higher levels (which have more data) use /// compression algorithms that have better compression but could /// be slower. This array, if non-empty, should have an entry for /// each level of the database; these override the value specified in /// the previous field 'compression'. /// /// # Examples /// /// ``` /// use rocksdb::{Options, DBCompressionType}; /// /// let mut opts = Options::default(); /// opts.set_compression_per_level(&[ /// DBCompressionType::None, /// DBCompressionType::None, /// DBCompressionType::Snappy, /// DBCompressionType::Snappy, /// DBCompressionType::Snappy /// ]); /// ``` pub fn set_compression_per_level(&mut self, level_types: &[DBCompressionType]) { unsafe { let mut level_types: Vec<_> = level_types.iter().map(|&t| t as c_int).collect(); ffi::rocksdb_options_set_compression_per_level( self.inner, level_types.as_mut_ptr(), level_types.len() as size_t, ); } } /// Maximum size of dictionaries used to prime the compression library. /// Enabling dictionary can improve compression ratios when there are /// repetitions across data blocks. /// /// The dictionary is created by sampling the SST file data. If /// `zstd_max_train_bytes` is nonzero, the samples are passed through zstd's /// dictionary generator. Otherwise, the random samples are used directly as /// the dictionary. /// /// When compression dictionary is disabled, we compress and write each block /// before buffering data for the next one. When compression dictionary is /// enabled, we buffer all SST file data in-memory so we can sample it, as data /// can only be compressed and written after the dictionary has been finalized. /// So users of this feature may see increased memory usage. /// /// Default: `0` /// /// # Examples /// /// ``` /// use rocksdb::Options; /// /// let mut opts = Options::default(); /// opts.set_compression_options(4, 5, 6, 7); /// ``` pub fn set_compression_options( &mut self, w_bits: c_int, level: c_int, strategy: c_int, max_dict_bytes: c_int, ) { unsafe { ffi::rocksdb_options_set_compression_options( self.inner, w_bits, level, strategy, max_dict_bytes, ); } } /// Sets compression options for blocks at the bottom-most level. Meaning /// of all settings is the same as in [`set_compression_options`](#method.set_compression_options) method but /// affect only the bottom-most compression which is set using /// [`set_bottommost_compression_type`](#method.set_bottommost_compression_type) method. /// /// # Examples /// /// ``` /// use rocksdb::{Options, DBCompressionType}; /// /// let mut opts = Options::default(); /// opts.set_bottommost_compression_type(DBCompressionType::Zstd); /// opts.set_bottommost_compression_options(4, 5, 6, 7, true); /// ``` pub fn set_bottommost_compression_options( &mut self, w_bits: c_int, level: c_int, strategy: c_int, max_dict_bytes: c_int, enabled: bool, ) { unsafe { ffi::rocksdb_options_set_bottommost_compression_options( self.inner, w_bits, level, strategy, max_dict_bytes, c_uchar::from(enabled), ); } } /// Sets maximum size of training data passed to zstd's dictionary trainer. Using zstd's /// dictionary trainer can achieve even better compression ratio improvements than using /// `max_dict_bytes` alone. /// /// The training data will be used to generate a dictionary of max_dict_bytes. /// /// Default: 0. pub fn set_zstd_max_train_bytes(&mut self, value: c_int) { unsafe { ffi::rocksdb_options_set_compression_options_zstd_max_train_bytes(self.inner, value); } } /// Sets maximum size of training data passed to zstd's dictionary trainer /// when compressing the bottom-most level. Using zstd's dictionary trainer /// can achieve even better compression ratio improvements than using /// `max_dict_bytes` alone. /// /// The training data will be used to generate a dictionary of /// `max_dict_bytes`. /// /// Default: 0. pub fn set_bottommost_zstd_max_train_bytes(&mut self, value: c_int, enabled: bool) { unsafe { ffi::rocksdb_options_set_bottommost_compression_options_zstd_max_train_bytes( self.inner, value, c_uchar::from(enabled), ); } } /// If non-zero, we perform bigger reads when doing compaction. If you're /// running RocksDB on spinning disks, you should set this to at least 2MB. /// That way RocksDB's compaction is doing sequential instead of random reads. /// /// Default: 2 * 1024 * 1024 (2 MB) pub fn set_compaction_readahead_size(&mut self, compaction_readahead_size: usize) { unsafe { ffi::rocksdb_options_compaction_readahead_size(self.inner, compaction_readahead_size); } } /// Allow RocksDB to pick dynamic base of bytes for levels. /// With this feature turned on, RocksDB will automatically adjust max bytes for each level. /// The goal of this feature is to have lower bound on size amplification. /// /// Default: false. pub fn set_level_compaction_dynamic_level_bytes(&mut self, v: bool) { unsafe { ffi::rocksdb_options_set_level_compaction_dynamic_level_bytes( self.inner, c_uchar::from(v), ); } } /// This option has different meanings for different compaction styles: /// /// Leveled: files older than `periodic_compaction_seconds` will be picked up /// for compaction and will be re-written to the same level as they were /// before. /// /// FIFO: not supported. Setting this option has no effect for FIFO compaction. /// /// Universal: when there are files older than `periodic_compaction_seconds`, /// rocksdb will try to do as large a compaction as possible including the /// last level. Such compaction is only skipped if only last level is to /// be compacted and no file in last level is older than /// `periodic_compaction_seconds`. See more in /// UniversalCompactionBuilder::PickPeriodicCompaction(). /// For backward compatibility, the effective value of this option takes /// into account the value of option `ttl`. The logic is as follows: /// - both options are set to 30 days if they have the default value. /// - if both options are zero, zero is picked. Otherwise, we take the min /// value among non-zero options values (i.e. takes the stricter limit). /// /// One main use of the feature is to make sure a file goes through compaction /// filters periodically. Users can also use the feature to clear up SST /// files using old format. /// /// A file's age is computed by looking at file_creation_time or creation_time /// table properties in order, if they have valid non-zero values; if not, the /// age is based on the file's last modified time (given by the underlying /// Env). /// /// This option only supports block based table format for any compaction /// style. /// /// unit: seconds. Ex: 7 days = 7 * 24 * 60 * 60 /// /// Values: /// 0: Turn off Periodic compactions. /// UINT64_MAX - 1 (0xfffffffffffffffe) is special flag to allow RocksDB to /// pick default. /// /// Default: 30 days if using block based table format + compaction filter + /// leveled compaction or block based table format + universal compaction. /// 0 (disabled) otherwise. /// pub fn set_periodic_compaction_seconds(&mut self, secs: u64) { unsafe { ffi::rocksdb_options_set_periodic_compaction_seconds(self.inner, secs); } } pub fn set_merge_operator_associative( &mut self, name: impl CStrLike, full_merge_fn: F, ) { let cb = Box::new(MergeOperatorCallback { name: name.into_c_string().unwrap(), full_merge_fn: full_merge_fn.clone(), partial_merge_fn: full_merge_fn, }); unsafe { let mo = ffi::rocksdb_mergeoperator_create( Box::into_raw(cb).cast::(), Some(merge_operator::destructor_callback::), Some(full_merge_callback::), Some(partial_merge_callback::), Some(merge_operator::delete_callback), Some(merge_operator::name_callback::), ); ffi::rocksdb_options_set_merge_operator(self.inner, mo); } } pub fn set_merge_operator( &mut self, name: impl CStrLike, full_merge_fn: F, partial_merge_fn: PF, ) { let cb = Box::new(MergeOperatorCallback { name: name.into_c_string().unwrap(), full_merge_fn, partial_merge_fn, }); unsafe { let mo = ffi::rocksdb_mergeoperator_create( Box::into_raw(cb).cast::(), Some(merge_operator::destructor_callback::), Some(full_merge_callback::), Some(partial_merge_callback::), Some(merge_operator::delete_callback), Some(merge_operator::name_callback::), ); ffi::rocksdb_options_set_merge_operator(self.inner, mo); } } #[deprecated( since = "0.5.0", note = "add_merge_operator has been renamed to set_merge_operator" )] pub fn add_merge_operator(&mut self, name: &str, merge_fn: F) { self.set_merge_operator_associative(name, merge_fn); } /// Sets a compaction filter used to determine if entries should be kept, changed, /// or removed during compaction. /// /// An example use case is to remove entries with an expired TTL. /// /// If you take a snapshot of the database, only values written since the last /// snapshot will be passed through the compaction filter. /// /// If multi-threaded compaction is used, `filter_fn` may be called multiple times /// simultaneously. pub fn set_compaction_filter(&mut self, name: impl CStrLike, filter_fn: F) where F: CompactionFilterFn + Send + 'static, { let cb = Box::new(CompactionFilterCallback { name: name.into_c_string().unwrap(), filter_fn, }); unsafe { let cf = ffi::rocksdb_compactionfilter_create( Box::into_raw(cb).cast::(), Some(compaction_filter::destructor_callback::>), Some(compaction_filter::filter_callback::>), Some(compaction_filter::name_callback::>), ); ffi::rocksdb_options_set_compaction_filter(self.inner, cf); } } /// This is a factory that provides compaction filter objects which allow /// an application to modify/delete a key-value during background compaction. /// /// A new filter will be created on each compaction run. If multithreaded /// compaction is being used, each created CompactionFilter will only be used /// from a single thread and so does not need to be thread-safe. /// /// Default: nullptr pub fn set_compaction_filter_factory(&mut self, factory: F) where F: CompactionFilterFactory + 'static, { let factory = Box::new(factory); unsafe { let cff = ffi::rocksdb_compactionfilterfactory_create( Box::into_raw(factory).cast::(), Some(compaction_filter_factory::destructor_callback::), Some(compaction_filter_factory::create_compaction_filter_callback::), Some(compaction_filter_factory::name_callback::), ); ffi::rocksdb_options_set_compaction_filter_factory(self.inner, cff); } } /// Sets the comparator used to define the order of keys in the table. /// Default: a comparator that uses lexicographic byte-wise ordering /// /// The client must ensure that the comparator supplied here has the same /// name and orders keys *exactly* the same as the comparator provided to /// previous open calls on the same DB. pub fn set_comparator(&mut self, name: impl CStrLike, compare_fn: Box) { let cb = Box::new(ComparatorCallback { name: name.into_c_string().unwrap(), compare_fn, }); unsafe { let cmp = ffi::rocksdb_comparator_create( Box::into_raw(cb).cast::(), Some(ComparatorCallback::destructor_callback), Some(ComparatorCallback::compare_callback), Some(ComparatorCallback::name_callback), ); ffi::rocksdb_options_set_comparator(self.inner, cmp); } } /// Sets the comparator that are timestamp-aware, used to define the order of keys in the table, /// taking timestamp into consideration. /// Find more information on timestamp-aware comparator on [here](https://github.com/facebook/rocksdb/wiki/User-defined-Timestamp) /// /// The client must ensure that the comparator supplied here has the same /// name and orders keys *exactly* the same as the comparator provided to /// previous open calls on the same DB. pub fn set_comparator_with_ts( &mut self, name: impl CStrLike, timestamp_size: usize, compare_fn: Box, compare_ts_fn: Box, compare_without_ts_fn: Box, ) { let cb = Box::new(ComparatorWithTsCallback { name: name.into_c_string().unwrap(), compare_fn, compare_ts_fn, compare_without_ts_fn, }); unsafe { let cmp = ffi::rocksdb_comparator_with_ts_create( Box::into_raw(cb).cast::(), Some(ComparatorWithTsCallback::destructor_callback), Some(ComparatorWithTsCallback::compare_callback), Some(ComparatorWithTsCallback::compare_ts_callback), Some(ComparatorWithTsCallback::compare_without_ts_callback), Some(ComparatorWithTsCallback::name_callback), timestamp_size, ); ffi::rocksdb_options_set_comparator(self.inner, cmp); } } pub fn set_prefix_extractor(&mut self, prefix_extractor: SliceTransform) { unsafe { ffi::rocksdb_options_set_prefix_extractor(self.inner, prefix_extractor.inner); } } // Use this if you don't need to keep the data sorted, i.e. you'll never use // an iterator, only Put() and Get() API calls // pub fn optimize_for_point_lookup(&mut self, block_cache_size_mb: u64) { unsafe { ffi::rocksdb_options_optimize_for_point_lookup(self.inner, block_cache_size_mb); } } /// Sets the optimize_filters_for_hits flag /// /// Default: `false` /// /// # Examples /// /// ``` /// use rocksdb::Options; /// /// let mut opts = Options::default(); /// opts.set_optimize_filters_for_hits(true); /// ``` pub fn set_optimize_filters_for_hits(&mut self, optimize_for_hits: bool) { unsafe { ffi::rocksdb_options_set_optimize_filters_for_hits( self.inner, c_int::from(optimize_for_hits), ); } } /// Sets the periodicity when obsolete files get deleted. /// /// The files that get out of scope by compaction /// process will still get automatically delete on every compaction, /// regardless of this setting. /// /// Default: 6 hours pub fn set_delete_obsolete_files_period_micros(&mut self, micros: u64) { unsafe { ffi::rocksdb_options_set_delete_obsolete_files_period_micros(self.inner, micros); } } /// Prepare the DB for bulk loading. /// /// All data will be in level 0 without any automatic compaction. /// It's recommended to manually call CompactRange(NULL, NULL) before reading /// from the database, because otherwise the read can be very slow. pub fn prepare_for_bulk_load(&mut self) { unsafe { ffi::rocksdb_options_prepare_for_bulk_load(self.inner); } } /// Sets the number of open files that can be used by the DB. You may need to /// increase this if your database has a large working set. Value `-1` means /// files opened are always kept open. You can estimate number of files based /// on target_file_size_base and target_file_size_multiplier for level-based /// compaction. For universal-style compaction, you can usually set it to `-1`. /// /// Default: `-1` /// /// # Examples /// /// ``` /// use rocksdb::Options; /// /// let mut opts = Options::default(); /// opts.set_max_open_files(10); /// ``` pub fn set_max_open_files(&mut self, nfiles: c_int) { unsafe { ffi::rocksdb_options_set_max_open_files(self.inner, nfiles); } } /// If max_open_files is -1, DB will open all files on DB::Open(). You can /// use this option to increase the number of threads used to open the files. /// Default: 16 pub fn set_max_file_opening_threads(&mut self, nthreads: c_int) { unsafe { ffi::rocksdb_options_set_max_file_opening_threads(self.inner, nthreads); } } /// By default, writes to stable storage use fdatasync (on platforms /// where this function is available). If this option is true, /// fsync is used instead. /// /// fsync and fdatasync are equally safe for our purposes and fdatasync is /// faster, so it is rarely necessary to set this option. It is provided /// as a workaround for kernel/filesystem bugs, such as one that affected /// fdatasync with ext4 in kernel versions prior to 3.7. /// /// Default: `false` /// /// # Examples /// /// ``` /// use rocksdb::Options; /// /// let mut opts = Options::default(); /// opts.set_use_fsync(true); /// ``` pub fn set_use_fsync(&mut self, useit: bool) { unsafe { ffi::rocksdb_options_set_use_fsync(self.inner, c_int::from(useit)); } } /// Specifies the absolute info LOG dir. /// /// If it is empty, the log files will be in the same dir as data. /// If it is non empty, the log files will be in the specified dir, /// and the db data dir's absolute path will be used as the log file /// name's prefix. /// /// Default: empty pub fn set_db_log_dir>(&mut self, path: P) { let p = to_cpath(path).unwrap(); unsafe { ffi::rocksdb_options_set_db_log_dir(self.inner, p.as_ptr()); } } /// Specifies the log level. /// Consider the `LogLevel` enum for a list of possible levels. /// /// Default: Info /// /// # Examples /// /// ``` /// use rocksdb::{Options, LogLevel}; /// /// let mut opts = Options::default(); /// opts.set_log_level(LogLevel::Warn); /// ``` pub fn set_log_level(&mut self, level: LogLevel) { unsafe { ffi::rocksdb_options_set_info_log_level(self.inner, level as c_int); } } /// Allows OS to incrementally sync files to disk while they are being /// written, asynchronously, in the background. This operation can be used /// to smooth out write I/Os over time. Users shouldn't rely on it for /// persistency guarantee. /// Issue one request for every bytes_per_sync written. `0` turns it off. /// /// Default: `0` /// /// You may consider using rate_limiter to regulate write rate to device. /// When rate limiter is enabled, it automatically enables bytes_per_sync /// to 1MB. /// /// This option applies to table files /// /// # Examples /// /// ``` /// use rocksdb::Options; /// /// let mut opts = Options::default(); /// opts.set_bytes_per_sync(1024 * 1024); /// ``` pub fn set_bytes_per_sync(&mut self, nbytes: u64) { unsafe { ffi::rocksdb_options_set_bytes_per_sync(self.inner, nbytes); } } /// Same as bytes_per_sync, but applies to WAL files. /// /// Default: 0, turned off /// /// Dynamically changeable through SetDBOptions() API. pub fn set_wal_bytes_per_sync(&mut self, nbytes: u64) { unsafe { ffi::rocksdb_options_set_wal_bytes_per_sync(self.inner, nbytes); } } /// Sets the maximum buffer size that is used by WritableFileWriter. /// /// On Windows, we need to maintain an aligned buffer for writes. /// We allow the buffer to grow until it's size hits the limit in buffered /// IO and fix the buffer size when using direct IO to ensure alignment of /// write requests if the logical sector size is unusual /// /// Default: 1024 * 1024 (1 MB) /// /// Dynamically changeable through SetDBOptions() API. pub fn set_writable_file_max_buffer_size(&mut self, nbytes: u64) { unsafe { ffi::rocksdb_options_set_writable_file_max_buffer_size(self.inner, nbytes); } } /// If true, allow multi-writers to update mem tables in parallel. /// Only some memtable_factory-s support concurrent writes; currently it /// is implemented only for SkipListFactory. Concurrent memtable writes /// are not compatible with inplace_update_support or filter_deletes. /// It is strongly recommended to set enable_write_thread_adaptive_yield /// if you are going to use this feature. /// /// Default: true /// /// # Examples /// /// ``` /// use rocksdb::Options; /// /// let mut opts = Options::default(); /// opts.set_allow_concurrent_memtable_write(false); /// ``` pub fn set_allow_concurrent_memtable_write(&mut self, allow: bool) { unsafe { ffi::rocksdb_options_set_allow_concurrent_memtable_write( self.inner, c_uchar::from(allow), ); } } /// If true, threads synchronizing with the write batch group leader will wait for up to /// write_thread_max_yield_usec before blocking on a mutex. This can substantially improve /// throughput for concurrent workloads, regardless of whether allow_concurrent_memtable_write /// is enabled. /// /// Default: true pub fn set_enable_write_thread_adaptive_yield(&mut self, enabled: bool) { unsafe { ffi::rocksdb_options_set_enable_write_thread_adaptive_yield( self.inner, c_uchar::from(enabled), ); } } /// Specifies whether an iteration->Next() sequentially skips over keys with the same user-key or not. /// /// This number specifies the number of keys (with the same userkey) /// that will be sequentially skipped before a reseek is issued. /// /// Default: 8 pub fn set_max_sequential_skip_in_iterations(&mut self, num: u64) { unsafe { ffi::rocksdb_options_set_max_sequential_skip_in_iterations(self.inner, num); } } /// Enable direct I/O mode for reading /// they may or may not improve performance depending on the use case /// /// Files will be opened in "direct I/O" mode /// which means that data read from the disk will not be cached or /// buffered. The hardware buffer of the devices may however still /// be used. Memory mapped files are not impacted by these parameters. /// /// Default: false /// /// # Examples /// /// ``` /// use rocksdb::Options; /// /// let mut opts = Options::default(); /// opts.set_use_direct_reads(true); /// ``` pub fn set_use_direct_reads(&mut self, enabled: bool) { unsafe { ffi::rocksdb_options_set_use_direct_reads(self.inner, c_uchar::from(enabled)); } } /// Enable direct I/O mode for flush and compaction /// /// Files will be opened in "direct I/O" mode /// which means that data written to the disk will not be cached or /// buffered. The hardware buffer of the devices may however still /// be used. Memory mapped files are not impacted by these parameters. /// they may or may not improve performance depending on the use case /// /// Default: false /// /// # Examples /// /// ``` /// use rocksdb::Options; /// /// let mut opts = Options::default(); /// opts.set_use_direct_io_for_flush_and_compaction(true); /// ``` pub fn set_use_direct_io_for_flush_and_compaction(&mut self, enabled: bool) { unsafe { ffi::rocksdb_options_set_use_direct_io_for_flush_and_compaction( self.inner, c_uchar::from(enabled), ); } } /// Enable/disable child process inherit open files. /// /// Default: true pub fn set_is_fd_close_on_exec(&mut self, enabled: bool) { unsafe { ffi::rocksdb_options_set_is_fd_close_on_exec(self.inner, c_uchar::from(enabled)); } } /// Hints to the OS that it should not buffer disk I/O. Enabling this /// parameter may improve performance but increases pressure on the /// system cache. /// /// The exact behavior of this parameter is platform dependent. /// /// On POSIX systems, after RocksDB reads data from disk it will /// mark the pages as "unneeded". The operating system may - or may not /// - evict these pages from memory, reducing pressure on the system /// cache. If the disk block is requested again this can result in /// additional disk I/O. /// /// On WINDOWS systems, files will be opened in "unbuffered I/O" mode /// which means that data read from the disk will not be cached or /// bufferized. The hardware buffer of the devices may however still /// be used. Memory mapped files are not impacted by this parameter. /// /// Default: true /// /// # Examples /// /// ``` /// use rocksdb::Options; /// /// let mut opts = Options::default(); /// #[allow(deprecated)] /// opts.set_allow_os_buffer(false); /// ``` #[deprecated( since = "0.7.0", note = "replaced with set_use_direct_reads/set_use_direct_io_for_flush_and_compaction methods" )] pub fn set_allow_os_buffer(&mut self, is_allow: bool) { self.set_use_direct_reads(!is_allow); self.set_use_direct_io_for_flush_and_compaction(!is_allow); } /// Sets the number of shards used for table cache. /// /// Default: `6` /// /// # Examples /// /// ``` /// use rocksdb::Options; /// /// let mut opts = Options::default(); /// opts.set_table_cache_num_shard_bits(4); /// ``` pub fn set_table_cache_num_shard_bits(&mut self, nbits: c_int) { unsafe { ffi::rocksdb_options_set_table_cache_numshardbits(self.inner, nbits); } } /// By default target_file_size_multiplier is 1, which means /// by default files in different levels will have similar size. /// /// Dynamically changeable through SetOptions() API pub fn set_target_file_size_multiplier(&mut self, multiplier: i32) { unsafe { ffi::rocksdb_options_set_target_file_size_multiplier(self.inner, multiplier as c_int); } } /// Sets the minimum number of write buffers that will be merged /// before writing to storage. If set to `1`, then /// all write buffers are flushed to L0 as individual files and this increases /// read amplification because a get request has to check in all of these /// files. Also, an in-memory merge may result in writing lesser /// data to storage if there are duplicate records in each of these /// individual write buffers. /// /// Default: `1` /// /// # Examples /// /// ``` /// use rocksdb::Options; /// /// let mut opts = Options::default(); /// opts.set_min_write_buffer_number(2); /// ``` pub fn set_min_write_buffer_number(&mut self, nbuf: c_int) { unsafe { ffi::rocksdb_options_set_min_write_buffer_number_to_merge(self.inner, nbuf); } } /// Sets the maximum number of write buffers that are built up in memory. /// The default and the minimum number is 2, so that when 1 write buffer /// is being flushed to storage, new writes can continue to the other /// write buffer. /// If max_write_buffer_number > 3, writing will be slowed down to /// options.delayed_write_rate if we are writing to the last write buffer /// allowed. /// /// Default: `2` /// /// # Examples /// /// ``` /// use rocksdb::Options; /// /// let mut opts = Options::default(); /// opts.set_max_write_buffer_number(4); /// ``` pub fn set_max_write_buffer_number(&mut self, nbuf: c_int) { unsafe { ffi::rocksdb_options_set_max_write_buffer_number(self.inner, nbuf); } } /// Sets the amount of data to build up in memory (backed by an unsorted log /// on disk) before converting to a sorted on-disk file. /// /// Larger values increase performance, especially during bulk loads. /// Up to max_write_buffer_number write buffers may be held in memory /// at the same time, /// so you may wish to adjust this parameter to control memory usage. /// Also, a larger write buffer will result in a longer recovery time /// the next time the database is opened. /// /// Note that write_buffer_size is enforced per column family. /// See db_write_buffer_size for sharing memory across column families. /// /// Default: `0x4000000` (64MiB) /// /// Dynamically changeable through SetOptions() API /// /// # Examples /// /// ``` /// use rocksdb::Options; /// /// let mut opts = Options::default(); /// opts.set_write_buffer_size(128 * 1024 * 1024); /// ``` pub fn set_write_buffer_size(&mut self, size: usize) { unsafe { ffi::rocksdb_options_set_write_buffer_size(self.inner, size); } } /// Amount of data to build up in memtables across all column /// families before writing to disk. /// /// This is distinct from write_buffer_size, which enforces a limit /// for a single memtable. /// /// This feature is disabled by default. Specify a non-zero value /// to enable it. /// /// Default: 0 (disabled) /// /// # Examples /// /// ``` /// use rocksdb::Options; /// /// let mut opts = Options::default(); /// opts.set_db_write_buffer_size(128 * 1024 * 1024); /// ``` pub fn set_db_write_buffer_size(&mut self, size: usize) { unsafe { ffi::rocksdb_options_set_db_write_buffer_size(self.inner, size); } } /// Control maximum total data size for a level. /// max_bytes_for_level_base is the max total for level-1. /// Maximum number of bytes for level L can be calculated as /// (max_bytes_for_level_base) * (max_bytes_for_level_multiplier ^ (L-1)) /// For example, if max_bytes_for_level_base is 200MB, and if /// max_bytes_for_level_multiplier is 10, total data size for level-1 /// will be 200MB, total file size for level-2 will be 2GB, /// and total file size for level-3 will be 20GB. /// /// Default: `0x10000000` (256MiB). /// /// Dynamically changeable through SetOptions() API /// /// # Examples /// /// ``` /// use rocksdb::Options; /// /// let mut opts = Options::default(); /// opts.set_max_bytes_for_level_base(512 * 1024 * 1024); /// ``` pub fn set_max_bytes_for_level_base(&mut self, size: u64) { unsafe { ffi::rocksdb_options_set_max_bytes_for_level_base(self.inner, size); } } /// Default: `10` /// /// # Examples /// /// ``` /// use rocksdb::Options; /// /// let mut opts = Options::default(); /// opts.set_max_bytes_for_level_multiplier(4.0); /// ``` pub fn set_max_bytes_for_level_multiplier(&mut self, mul: f64) { unsafe { ffi::rocksdb_options_set_max_bytes_for_level_multiplier(self.inner, mul); } } /// The manifest file is rolled over on reaching this limit. /// The older manifest file be deleted. /// The default value is MAX_INT so that roll-over does not take place. /// /// # Examples /// /// ``` /// use rocksdb::Options; /// /// let mut opts = Options::default(); /// opts.set_max_manifest_file_size(20 * 1024 * 1024); /// ``` pub fn set_max_manifest_file_size(&mut self, size: usize) { unsafe { ffi::rocksdb_options_set_max_manifest_file_size(self.inner, size); } } /// Sets the target file size for compaction. /// target_file_size_base is per-file size for level-1. /// Target file size for level L can be calculated by /// target_file_size_base * (target_file_size_multiplier ^ (L-1)) /// For example, if target_file_size_base is 2MB and /// target_file_size_multiplier is 10, then each file on level-1 will /// be 2MB, and each file on level 2 will be 20MB, /// and each file on level-3 will be 200MB. /// /// Default: `0x4000000` (64MiB) /// /// Dynamically changeable through SetOptions() API /// /// # Examples /// /// ``` /// use rocksdb::Options; /// /// let mut opts = Options::default(); /// opts.set_target_file_size_base(128 * 1024 * 1024); /// ``` pub fn set_target_file_size_base(&mut self, size: u64) { unsafe { ffi::rocksdb_options_set_target_file_size_base(self.inner, size); } } /// Sets the minimum number of write buffers that will be merged together /// before writing to storage. If set to `1`, then /// all write buffers are flushed to L0 as individual files and this increases /// read amplification because a get request has to check in all of these /// files. Also, an in-memory merge may result in writing lesser /// data to storage if there are duplicate records in each of these /// individual write buffers. /// /// Default: `1` /// /// # Examples /// /// ``` /// use rocksdb::Options; /// /// let mut opts = Options::default(); /// opts.set_min_write_buffer_number_to_merge(2); /// ``` pub fn set_min_write_buffer_number_to_merge(&mut self, to_merge: c_int) { unsafe { ffi::rocksdb_options_set_min_write_buffer_number_to_merge(self.inner, to_merge); } } /// Sets the number of files to trigger level-0 compaction. A value < `0` means that /// level-0 compaction will not be triggered by number of files at all. /// /// Default: `4` /// /// Dynamically changeable through SetOptions() API /// /// # Examples /// /// ``` /// use rocksdb::Options; /// /// let mut opts = Options::default(); /// opts.set_level_zero_file_num_compaction_trigger(8); /// ``` pub fn set_level_zero_file_num_compaction_trigger(&mut self, n: c_int) { unsafe { ffi::rocksdb_options_set_level0_file_num_compaction_trigger(self.inner, n); } } /// Sets the compaction priority. When multiple files are picked for compaction from a level, /// this option determines which files to pick first. /// /// Default: `CompactionPri::ByCompensatedSize` /// /// Dynamically changeable through SetOptions() API /// /// See [rocksdb post](https://github.com/facebook/rocksdb/blob/f20d12adc85ece3e75fb238872959c702c0e5535/docs/_posts/2016-01-29-compaction_pri.markdown) for more details. /// /// # Examples /// /// ``` /// use rocksdb::{Options, CompactionPri}; /// /// let mut opts = Options::default(); /// opts.set_compaction_pri(CompactionPri::MinOverlappingRatio); /// ``` pub fn set_compaction_pri(&mut self, pri: CompactionPri) { unsafe { ffi::rocksdb_options_set_compaction_pri(self.inner, pri as i32); } } /// Sets the soft limit on number of level-0 files. We start slowing down writes at this /// point. A value < `0` means that no writing slowdown will be triggered by /// number of files in level-0. /// /// Default: `20` /// /// Dynamically changeable through SetOptions() API /// /// # Examples /// /// ``` /// use rocksdb::Options; /// /// let mut opts = Options::default(); /// opts.set_level_zero_slowdown_writes_trigger(10); /// ``` pub fn set_level_zero_slowdown_writes_trigger(&mut self, n: c_int) { unsafe { ffi::rocksdb_options_set_level0_slowdown_writes_trigger(self.inner, n); } } /// Sets the maximum number of level-0 files. We stop writes at this point. /// /// Default: `24` /// /// Dynamically changeable through SetOptions() API /// /// # Examples /// /// ``` /// use rocksdb::Options; /// /// let mut opts = Options::default(); /// opts.set_level_zero_stop_writes_trigger(48); /// ``` pub fn set_level_zero_stop_writes_trigger(&mut self, n: c_int) { unsafe { ffi::rocksdb_options_set_level0_stop_writes_trigger(self.inner, n); } } /// Sets the compaction style. /// /// Default: DBCompactionStyle::Level /// /// # Examples /// /// ``` /// use rocksdb::{Options, DBCompactionStyle}; /// /// let mut opts = Options::default(); /// opts.set_compaction_style(DBCompactionStyle::Universal); /// ``` pub fn set_compaction_style(&mut self, style: DBCompactionStyle) { unsafe { ffi::rocksdb_options_set_compaction_style(self.inner, style as c_int); } } /// Sets the options needed to support Universal Style compactions. pub fn set_universal_compaction_options(&mut self, uco: &UniversalCompactOptions) { unsafe { ffi::rocksdb_options_set_universal_compaction_options(self.inner, uco.inner); } } /// Sets the options for FIFO compaction style. pub fn set_fifo_compaction_options(&mut self, fco: &FifoCompactOptions) { unsafe { ffi::rocksdb_options_set_fifo_compaction_options(self.inner, fco.inner); } } /// Sets unordered_write to true trades higher write throughput with /// relaxing the immutability guarantee of snapshots. This violates the /// repeatability one expects from ::Get from a snapshot, as well as /// ::MultiGet and Iterator's consistent-point-in-time view property. /// If the application cannot tolerate the relaxed guarantees, it can implement /// its own mechanisms to work around that and yet benefit from the higher /// throughput. Using TransactionDB with WRITE_PREPARED write policy and /// two_write_queues=true is one way to achieve immutable snapshots despite /// unordered_write. /// /// By default, i.e., when it is false, rocksdb does not advance the sequence /// number for new snapshots unless all the writes with lower sequence numbers /// are already finished. This provides the immutability that we expect from /// snapshots. Moreover, since Iterator and MultiGet internally depend on /// snapshots, the snapshot immutability results into Iterator and MultiGet /// offering consistent-point-in-time view. If set to true, although /// Read-Your-Own-Write property is still provided, the snapshot immutability /// property is relaxed: the writes issued after the snapshot is obtained (with /// larger sequence numbers) will be still not visible to the reads from that /// snapshot, however, there still might be pending writes (with lower sequence /// number) that will change the state visible to the snapshot after they are /// landed to the memtable. /// /// Default: false pub fn set_unordered_write(&mut self, unordered: bool) { unsafe { ffi::rocksdb_options_set_unordered_write(self.inner, c_uchar::from(unordered)); } } /// Sets maximum number of threads that will /// concurrently perform a compaction job by breaking it into multiple, /// smaller ones that are run simultaneously. /// /// Default: 1 (i.e. no subcompactions) pub fn set_max_subcompactions(&mut self, num: u32) { unsafe { ffi::rocksdb_options_set_max_subcompactions(self.inner, num); } } /// Sets maximum number of concurrent background jobs /// (compactions and flushes). /// /// Default: 2 /// /// Dynamically changeable through SetDBOptions() API. pub fn set_max_background_jobs(&mut self, jobs: c_int) { unsafe { ffi::rocksdb_options_set_max_background_jobs(self.inner, jobs); } } /// Sets the maximum number of concurrent background compaction jobs, submitted to /// the default LOW priority thread pool. /// We first try to schedule compactions based on /// `base_background_compactions`. If the compaction cannot catch up , we /// will increase number of compaction threads up to /// `max_background_compactions`. /// /// If you're increasing this, also consider increasing number of threads in /// LOW priority thread pool. For more information, see /// Env::SetBackgroundThreads /// /// Default: `1` /// /// # Examples /// /// ``` /// use rocksdb::Options; /// /// let mut opts = Options::default(); /// #[allow(deprecated)] /// opts.set_max_background_compactions(2); /// ``` #[deprecated( since = "0.15.0", note = "RocksDB automatically decides this based on the value of max_background_jobs" )] pub fn set_max_background_compactions(&mut self, n: c_int) { unsafe { ffi::rocksdb_options_set_max_background_compactions(self.inner, n); } } /// Sets the maximum number of concurrent background memtable flush jobs, submitted to /// the HIGH priority thread pool. /// /// By default, all background jobs (major compaction and memtable flush) go /// to the LOW priority pool. If this option is set to a positive number, /// memtable flush jobs will be submitted to the HIGH priority pool. /// It is important when the same Env is shared by multiple db instances. /// Without a separate pool, long running major compaction jobs could /// potentially block memtable flush jobs of other db instances, leading to /// unnecessary Put stalls. /// /// If you're increasing this, also consider increasing number of threads in /// HIGH priority thread pool. For more information, see /// Env::SetBackgroundThreads /// /// Default: `1` /// /// # Examples /// /// ``` /// use rocksdb::Options; /// /// let mut opts = Options::default(); /// #[allow(deprecated)] /// opts.set_max_background_flushes(2); /// ``` #[deprecated( since = "0.15.0", note = "RocksDB automatically decides this based on the value of max_background_jobs" )] pub fn set_max_background_flushes(&mut self, n: c_int) { unsafe { ffi::rocksdb_options_set_max_background_flushes(self.inner, n); } } /// Disables automatic compactions. Manual compactions can still /// be issued on this column family /// /// Default: `false` /// /// Dynamically changeable through SetOptions() API /// /// # Examples /// /// ``` /// use rocksdb::Options; /// /// let mut opts = Options::default(); /// opts.set_disable_auto_compactions(true); /// ``` pub fn set_disable_auto_compactions(&mut self, disable: bool) { unsafe { ffi::rocksdb_options_set_disable_auto_compactions(self.inner, c_int::from(disable)); } } /// SetMemtableHugePageSize sets the page size for huge page for /// arena used by the memtable. /// If <=0, it won't allocate from huge page but from malloc. /// Users are responsible to reserve huge pages for it to be allocated. For /// example: /// sysctl -w vm.nr_hugepages=20 /// See linux doc Documentation/vm/hugetlbpage.txt /// If there isn't enough free huge page available, it will fall back to /// malloc. /// /// Dynamically changeable through SetOptions() API pub fn set_memtable_huge_page_size(&mut self, size: size_t) { unsafe { ffi::rocksdb_options_set_memtable_huge_page_size(self.inner, size); } } /// Sets the maximum number of successive merge operations on a key in the memtable. /// /// When a merge operation is added to the memtable and the maximum number of /// successive merges is reached, the value of the key will be calculated and /// inserted into the memtable instead of the merge operation. This will /// ensure that there are never more than max_successive_merges merge /// operations in the memtable. /// /// Default: 0 (disabled) pub fn set_max_successive_merges(&mut self, num: usize) { unsafe { ffi::rocksdb_options_set_max_successive_merges(self.inner, num); } } /// Control locality of bloom filter probes to improve cache miss rate. /// This option only applies to memtable prefix bloom and plaintable /// prefix bloom. It essentially limits the max number of cache lines each /// bloom filter check can touch. /// /// This optimization is turned off when set to 0. The number should never /// be greater than number of probes. This option can boost performance /// for in-memory workload but should use with care since it can cause /// higher false positive rate. /// /// Default: 0 pub fn set_bloom_locality(&mut self, v: u32) { unsafe { ffi::rocksdb_options_set_bloom_locality(self.inner, v); } } /// Enable/disable thread-safe inplace updates. /// /// Requires updates if /// * key exists in current memtable /// * new sizeof(new_value) <= sizeof(old_value) /// * old_value for that key is a put i.e. kTypeValue /// /// Default: false. pub fn set_inplace_update_support(&mut self, enabled: bool) { unsafe { ffi::rocksdb_options_set_inplace_update_support(self.inner, c_uchar::from(enabled)); } } /// Sets the number of locks used for inplace update. /// /// Default: 10000 when inplace_update_support = true, otherwise 0. pub fn set_inplace_update_locks(&mut self, num: usize) { unsafe { ffi::rocksdb_options_set_inplace_update_num_locks(self.inner, num); } } /// Different max-size multipliers for different levels. /// These are multiplied by max_bytes_for_level_multiplier to arrive /// at the max-size of each level. /// /// Default: 1 /// /// Dynamically changeable through SetOptions() API pub fn set_max_bytes_for_level_multiplier_additional(&mut self, level_values: &[i32]) { let count = level_values.len(); unsafe { ffi::rocksdb_options_set_max_bytes_for_level_multiplier_additional( self.inner, level_values.as_ptr().cast_mut(), count, ); } } /// If true, then DB::Open() will not fetch and check sizes of all sst files. /// This may significantly speed up startup if there are many sst files, /// especially when using non-default Env with expensive GetFileSize(). /// We'll still check that all required sst files exist. /// If paranoid_checks is false, this option is ignored, and sst files are /// not checked at all. /// /// Default: false pub fn set_skip_checking_sst_file_sizes_on_db_open(&mut self, value: bool) { unsafe { ffi::rocksdb_options_set_skip_checking_sst_file_sizes_on_db_open( self.inner, c_uchar::from(value), ); } } /// The total maximum size(bytes) of write buffers to maintain in memory /// including copies of buffers that have already been flushed. This parameter /// only affects trimming of flushed buffers and does not affect flushing. /// This controls the maximum amount of write history that will be available /// in memory for conflict checking when Transactions are used. The actual /// size of write history (flushed Memtables) might be higher than this limit /// if further trimming will reduce write history total size below this /// limit. For example, if max_write_buffer_size_to_maintain is set to 64MB, /// and there are three flushed Memtables, with sizes of 32MB, 20MB, 20MB. /// Because trimming the next Memtable of size 20MB will reduce total memory /// usage to 52MB which is below the limit, RocksDB will stop trimming. /// /// When using an OptimisticTransactionDB: /// If this value is too low, some transactions may fail at commit time due /// to not being able to determine whether there were any write conflicts. /// /// When using a TransactionDB: /// If Transaction::SetSnapshot is used, TransactionDB will read either /// in-memory write buffers or SST files to do write-conflict checking. /// Increasing this value can reduce the number of reads to SST files /// done for conflict detection. /// /// Setting this value to 0 will cause write buffers to be freed immediately /// after they are flushed. If this value is set to -1, /// 'max_write_buffer_number * write_buffer_size' will be used. /// /// Default: /// If using a TransactionDB/OptimisticTransactionDB, the default value will /// be set to the value of 'max_write_buffer_number * write_buffer_size' /// if it is not explicitly set by the user. Otherwise, the default is 0. pub fn set_max_write_buffer_size_to_maintain(&mut self, size: i64) { unsafe { ffi::rocksdb_options_set_max_write_buffer_size_to_maintain(self.inner, size); } } /// By default, a single write thread queue is maintained. The thread gets /// to the head of the queue becomes write batch group leader and responsible /// for writing to WAL and memtable for the batch group. /// /// If enable_pipelined_write is true, separate write thread queue is /// maintained for WAL write and memtable write. A write thread first enter WAL /// writer queue and then memtable writer queue. Pending thread on the WAL /// writer queue thus only have to wait for previous writers to finish their /// WAL writing but not the memtable writing. Enabling the feature may improve /// write throughput and reduce latency of the prepare phase of two-phase /// commit. /// /// Default: false pub fn set_enable_pipelined_write(&mut self, value: bool) { unsafe { ffi::rocksdb_options_set_enable_pipelined_write(self.inner, c_uchar::from(value)); } } /// Defines the underlying memtable implementation. /// See official [wiki](https://github.com/facebook/rocksdb/wiki/MemTable) for more information. /// Defaults to using a skiplist. /// /// # Examples /// /// ``` /// use rocksdb::{Options, MemtableFactory}; /// let mut opts = Options::default(); /// let factory = MemtableFactory::HashSkipList { /// bucket_count: 1_000_000, /// height: 4, /// branching_factor: 4, /// }; /// /// opts.set_allow_concurrent_memtable_write(false); /// opts.set_memtable_factory(factory); /// ``` pub fn set_memtable_factory(&mut self, factory: MemtableFactory) { match factory { MemtableFactory::Vector => unsafe { ffi::rocksdb_options_set_memtable_vector_rep(self.inner); }, MemtableFactory::HashSkipList { bucket_count, height, branching_factor, } => unsafe { ffi::rocksdb_options_set_hash_skip_list_rep( self.inner, bucket_count, height, branching_factor, ); }, MemtableFactory::HashLinkList { bucket_count } => unsafe { ffi::rocksdb_options_set_hash_link_list_rep(self.inner, bucket_count); }, }; } pub fn set_block_based_table_factory(&mut self, factory: &BlockBasedOptions) { unsafe { ffi::rocksdb_options_set_block_based_table_factory(self.inner, factory.inner); } self.outlive.block_based = Some(factory.outlive.clone()); } /// Sets the table factory to a CuckooTableFactory (the default table /// factory is a block-based table factory that provides a default /// implementation of TableBuilder and TableReader with default /// BlockBasedTableOptions). /// See official [wiki](https://github.com/facebook/rocksdb/wiki/CuckooTable-Format) for more information on this table format. /// # Examples /// /// ``` /// use rocksdb::{Options, CuckooTableOptions}; /// /// let mut opts = Options::default(); /// let mut factory_opts = CuckooTableOptions::default(); /// factory_opts.set_hash_ratio(0.8); /// factory_opts.set_max_search_depth(20); /// factory_opts.set_cuckoo_block_size(10); /// factory_opts.set_identity_as_first_hash(true); /// factory_opts.set_use_module_hash(false); /// /// opts.set_cuckoo_table_factory(&factory_opts); /// ``` pub fn set_cuckoo_table_factory(&mut self, factory: &CuckooTableOptions) { unsafe { ffi::rocksdb_options_set_cuckoo_table_factory(self.inner, factory.inner); } } // This is a factory that provides TableFactory objects. // Default: a block-based table factory that provides a default // implementation of TableBuilder and TableReader with default // BlockBasedTableOptions. /// Sets the factory as plain table. /// See official [wiki](https://github.com/facebook/rocksdb/wiki/PlainTable-Format) for more /// information. /// /// # Examples /// /// ``` /// use rocksdb::{KeyEncodingType, Options, PlainTableFactoryOptions}; /// /// let mut opts = Options::default(); /// let factory_opts = PlainTableFactoryOptions { /// user_key_length: 0, /// bloom_bits_per_key: 20, /// hash_table_ratio: 0.75, /// index_sparseness: 16, /// huge_page_tlb_size: 0, /// encoding_type: KeyEncodingType::Plain, /// full_scan_mode: false, /// store_index_in_file: false, /// }; /// /// opts.set_plain_table_factory(&factory_opts); /// ``` pub fn set_plain_table_factory(&mut self, options: &PlainTableFactoryOptions) { unsafe { ffi::rocksdb_options_set_plain_table_factory( self.inner, options.user_key_length, options.bloom_bits_per_key, options.hash_table_ratio, options.index_sparseness, options.huge_page_tlb_size, options.encoding_type as c_char, c_uchar::from(options.full_scan_mode), c_uchar::from(options.store_index_in_file), ); } } /// Sets the start level to use compression. pub fn set_min_level_to_compress(&mut self, lvl: c_int) { unsafe { ffi::rocksdb_options_set_min_level_to_compress(self.inner, lvl); } } /// Measure IO stats in compactions and flushes, if `true`. /// /// Default: `false` /// /// # Examples /// /// ``` /// use rocksdb::Options; /// /// let mut opts = Options::default(); /// opts.set_report_bg_io_stats(true); /// ``` pub fn set_report_bg_io_stats(&mut self, enable: bool) { unsafe { ffi::rocksdb_options_set_report_bg_io_stats(self.inner, c_int::from(enable)); } } /// Once write-ahead logs exceed this size, we will start forcing the flush of /// column families whose memtables are backed by the oldest live WAL file /// (i.e. the ones that are causing all the space amplification). /// /// Default: `0` /// /// # Examples /// /// ``` /// use rocksdb::Options; /// /// let mut opts = Options::default(); /// // Set max total wal size to 1G. /// opts.set_max_total_wal_size(1 << 30); /// ``` pub fn set_max_total_wal_size(&mut self, size: u64) { unsafe { ffi::rocksdb_options_set_max_total_wal_size(self.inner, size); } } /// Recovery mode to control the consistency while replaying WAL. /// /// Default: DBRecoveryMode::PointInTime /// /// # Examples /// /// ``` /// use rocksdb::{Options, DBRecoveryMode}; /// /// let mut opts = Options::default(); /// opts.set_wal_recovery_mode(DBRecoveryMode::AbsoluteConsistency); /// ``` pub fn set_wal_recovery_mode(&mut self, mode: DBRecoveryMode) { unsafe { ffi::rocksdb_options_set_wal_recovery_mode(self.inner, mode as c_int); } } pub fn enable_statistics(&mut self) { unsafe { ffi::rocksdb_options_enable_statistics(self.inner); } } pub fn get_statistics(&self) -> Option { unsafe { let value = ffi::rocksdb_options_statistics_get_string(self.inner); if value.is_null() { return None; } // Must have valid UTF-8 format. let s = CStr::from_ptr(value).to_str().unwrap().to_owned(); ffi::rocksdb_free(value as *mut c_void); Some(s) } } /// StatsLevel can be used to reduce statistics overhead by skipping certain /// types of stats in the stats collection process. pub fn set_statistics_level(&self, level: StatsLevel) { unsafe { ffi::rocksdb_options_set_statistics_level(self.inner, level as c_int) } } /// Returns the value of cumulative db counters if stat collection is enabled. pub fn get_ticker_count(&self, ticker: Ticker) -> u64 { unsafe { ffi::rocksdb_options_statistics_get_ticker_count(self.inner, ticker as u32) } } /// Gets Histogram data from collected db stats. Requires stats to be enabled. pub fn get_histogram_data(&self, histogram: Histogram) -> HistogramData { unsafe { let data = HistogramData::default(); ffi::rocksdb_options_statistics_get_histogram_data( self.inner, histogram as u32, data.inner, ); data } } /// If not zero, dump `rocksdb.stats` to LOG every `stats_dump_period_sec`. /// /// Default: `600` (10 mins) /// /// # Examples /// /// ``` /// use rocksdb::Options; /// /// let mut opts = Options::default(); /// opts.set_stats_dump_period_sec(300); /// ``` pub fn set_stats_dump_period_sec(&mut self, period: c_uint) { unsafe { ffi::rocksdb_options_set_stats_dump_period_sec(self.inner, period); } } /// If not zero, dump rocksdb.stats to RocksDB to LOG every `stats_persist_period_sec`. /// /// Default: `600` (10 mins) /// /// # Examples /// /// ``` /// use rocksdb::Options; /// /// let mut opts = Options::default(); /// opts.set_stats_persist_period_sec(5); /// ``` pub fn set_stats_persist_period_sec(&mut self, period: c_uint) { unsafe { ffi::rocksdb_options_set_stats_persist_period_sec(self.inner, period); } } /// When set to true, reading SST files will opt out of the filesystem's /// readahead. Setting this to false may improve sequential iteration /// performance. /// /// Default: `true` pub fn set_advise_random_on_open(&mut self, advise: bool) { unsafe { ffi::rocksdb_options_set_advise_random_on_open(self.inner, c_uchar::from(advise)); } } /// Enable/disable adaptive mutex, which spins in the user space before resorting to kernel. /// /// This could reduce context switch when the mutex is not /// heavily contended. However, if the mutex is hot, we could end up /// wasting spin time. /// /// Default: false pub fn set_use_adaptive_mutex(&mut self, enabled: bool) { unsafe { ffi::rocksdb_options_set_use_adaptive_mutex(self.inner, c_uchar::from(enabled)); } } /// Sets the number of levels for this database. pub fn set_num_levels(&mut self, n: c_int) { unsafe { ffi::rocksdb_options_set_num_levels(self.inner, n); } } /// When a `prefix_extractor` is defined through `opts.set_prefix_extractor` this /// creates a prefix bloom filter for each memtable with the size of /// `write_buffer_size * memtable_prefix_bloom_ratio` (capped at 0.25). /// /// Default: `0` /// /// # Examples /// /// ``` /// use rocksdb::{Options, SliceTransform}; /// /// let mut opts = Options::default(); /// let transform = SliceTransform::create_fixed_prefix(10); /// opts.set_prefix_extractor(transform); /// opts.set_memtable_prefix_bloom_ratio(0.2); /// ``` pub fn set_memtable_prefix_bloom_ratio(&mut self, ratio: f64) { unsafe { ffi::rocksdb_options_set_memtable_prefix_bloom_size_ratio(self.inner, ratio); } } /// Sets the maximum number of bytes in all compacted files. /// We try to limit number of bytes in one compaction to be lower than this /// threshold. But it's not guaranteed. /// /// Value 0 will be sanitized. /// /// Default: target_file_size_base * 25 pub fn set_max_compaction_bytes(&mut self, nbytes: u64) { unsafe { ffi::rocksdb_options_set_max_compaction_bytes(self.inner, nbytes); } } /// Specifies the absolute path of the directory the /// write-ahead log (WAL) should be written to. /// /// Default: same directory as the database /// /// # Examples /// /// ``` /// use rocksdb::Options; /// /// let mut opts = Options::default(); /// opts.set_wal_dir("/path/to/dir"); /// ``` pub fn set_wal_dir>(&mut self, path: P) { let p = to_cpath(path).unwrap(); unsafe { ffi::rocksdb_options_set_wal_dir(self.inner, p.as_ptr()); } } /// Sets the WAL ttl in seconds. /// /// The following two options affect how archived logs will be deleted. /// 1. If both set to 0, logs will be deleted asap and will not get into /// the archive. /// 2. If wal_ttl_seconds is 0 and wal_size_limit_mb is not 0, /// WAL files will be checked every 10 min and if total size is greater /// then wal_size_limit_mb, they will be deleted starting with the /// earliest until size_limit is met. All empty files will be deleted. /// 3. If wal_ttl_seconds is not 0 and wall_size_limit_mb is 0, then /// WAL files will be checked every wal_ttl_seconds / 2 and those that /// are older than wal_ttl_seconds will be deleted. /// 4. If both are not 0, WAL files will be checked every 10 min and both /// checks will be performed with ttl being first. /// /// Default: 0 pub fn set_wal_ttl_seconds(&mut self, secs: u64) { unsafe { ffi::rocksdb_options_set_WAL_ttl_seconds(self.inner, secs); } } /// Sets the WAL size limit in MB. /// /// If total size of WAL files is greater then wal_size_limit_mb, /// they will be deleted starting with the earliest until size_limit is met. /// /// Default: 0 pub fn set_wal_size_limit_mb(&mut self, size: u64) { unsafe { ffi::rocksdb_options_set_WAL_size_limit_MB(self.inner, size); } } /// Sets the number of bytes to preallocate (via fallocate) the manifest files. /// /// Default is 4MB, which is reasonable to reduce random IO /// as well as prevent overallocation for mounts that preallocate /// large amounts of data (such as xfs's allocsize option). pub fn set_manifest_preallocation_size(&mut self, size: usize) { unsafe { ffi::rocksdb_options_set_manifest_preallocation_size(self.inner, size); } } /// If true, then DB::Open() will not update the statistics used to optimize /// compaction decision by loading table properties from many files. /// Turning off this feature will improve DBOpen time especially in disk environment. /// /// Default: false pub fn set_skip_stats_update_on_db_open(&mut self, skip: bool) { unsafe { ffi::rocksdb_options_set_skip_stats_update_on_db_open(self.inner, c_uchar::from(skip)); } } /// Specify the maximal number of info log files to be kept. /// /// Default: 1000 /// /// # Examples /// /// ``` /// use rocksdb::Options; /// /// let mut options = Options::default(); /// options.set_keep_log_file_num(100); /// ``` pub fn set_keep_log_file_num(&mut self, nfiles: usize) { unsafe { ffi::rocksdb_options_set_keep_log_file_num(self.inner, nfiles); } } /// Allow the OS to mmap file for writing. /// /// Default: false /// /// # Examples /// /// ``` /// use rocksdb::Options; /// /// let mut options = Options::default(); /// options.set_allow_mmap_writes(true); /// ``` pub fn set_allow_mmap_writes(&mut self, is_enabled: bool) { unsafe { ffi::rocksdb_options_set_allow_mmap_writes(self.inner, c_uchar::from(is_enabled)); } } /// Allow the OS to mmap file for reading sst tables. /// /// Default: false /// /// # Examples /// /// ``` /// use rocksdb::Options; /// /// let mut options = Options::default(); /// options.set_allow_mmap_reads(true); /// ``` pub fn set_allow_mmap_reads(&mut self, is_enabled: bool) { unsafe { ffi::rocksdb_options_set_allow_mmap_reads(self.inner, c_uchar::from(is_enabled)); } } /// If enabled, WAL is not flushed automatically after each write. Instead it /// relies on manual invocation of `DB::flush_wal()` to write the WAL buffer /// to its file. /// /// Default: false /// /// # Examples /// /// ``` /// use rocksdb::Options; /// /// let mut options = Options::default(); /// options.set_manual_wal_flush(true); /// ``` pub fn set_manual_wal_flush(&mut self, is_enabled: bool) { unsafe { ffi::rocksdb_options_set_manual_wal_flush(self.inner, c_uchar::from(is_enabled)); } } /// Guarantee that all column families are flushed together atomically. /// This option applies to both manual flushes (`db.flush()`) and automatic /// background flushes caused when memtables are filled. /// /// Note that this is only useful when the WAL is disabled. When using the /// WAL, writes are always consistent across column families. /// /// Default: false /// /// # Examples /// /// ``` /// use rocksdb::Options; /// /// let mut options = Options::default(); /// options.set_atomic_flush(true); /// ``` pub fn set_atomic_flush(&mut self, atomic_flush: bool) { unsafe { ffi::rocksdb_options_set_atomic_flush(self.inner, c_uchar::from(atomic_flush)); } } /// Sets global cache for table-level rows. /// /// Default: null (disabled) /// Not supported in ROCKSDB_LITE mode! pub fn set_row_cache(&mut self, cache: &Cache) { unsafe { ffi::rocksdb_options_set_row_cache(self.inner, cache.0.inner.as_ptr()); } self.outlive.row_cache = Some(cache.clone()); } /// Use to control write rate of flush and compaction. Flush has higher /// priority than compaction. /// If rate limiter is enabled, bytes_per_sync is set to 1MB by default. /// /// Default: disable /// /// # Examples /// /// ``` /// use rocksdb::Options; /// /// let mut options = Options::default(); /// options.set_ratelimiter(1024 * 1024, 100 * 1000, 10); /// ``` pub fn set_ratelimiter( &mut self, rate_bytes_per_sec: i64, refill_period_us: i64, fairness: i32, ) { unsafe { let ratelimiter = ffi::rocksdb_ratelimiter_create(rate_bytes_per_sec, refill_period_us, fairness); ffi::rocksdb_options_set_ratelimiter(self.inner, ratelimiter); ffi::rocksdb_ratelimiter_destroy(ratelimiter); } } /// Use to control write rate of flush and compaction. Flush has higher /// priority than compaction. /// If rate limiter is enabled, bytes_per_sync is set to 1MB by default. /// /// Default: disable pub fn set_auto_tuned_ratelimiter( &mut self, rate_bytes_per_sec: i64, refill_period_us: i64, fairness: i32, ) { unsafe { let ratelimiter = ffi::rocksdb_ratelimiter_create_auto_tuned( rate_bytes_per_sec, refill_period_us, fairness, ); ffi::rocksdb_options_set_ratelimiter(self.inner, ratelimiter); ffi::rocksdb_ratelimiter_destroy(ratelimiter); } } /// Sets the maximal size of the info log file. /// /// If the log file is larger than `max_log_file_size`, a new info log file /// will be created. If `max_log_file_size` is equal to zero, all logs will /// be written to one log file. /// /// Default: 0 /// /// # Examples /// /// ``` /// use rocksdb::Options; /// /// let mut options = Options::default(); /// options.set_max_log_file_size(0); /// ``` pub fn set_max_log_file_size(&mut self, size: usize) { unsafe { ffi::rocksdb_options_set_max_log_file_size(self.inner, size); } } /// Sets the time for the info log file to roll (in seconds). /// /// If specified with non-zero value, log file will be rolled /// if it has been active longer than `log_file_time_to_roll`. /// Default: 0 (disabled) pub fn set_log_file_time_to_roll(&mut self, secs: usize) { unsafe { ffi::rocksdb_options_set_log_file_time_to_roll(self.inner, secs); } } /// Controls the recycling of log files. /// /// If non-zero, previously written log files will be reused for new logs, /// overwriting the old data. The value indicates how many such files we will /// keep around at any point in time for later use. This is more efficient /// because the blocks are already allocated and fdatasync does not need to /// update the inode after each write. /// /// Default: 0 /// /// # Examples /// /// ``` /// use rocksdb::Options; /// /// let mut options = Options::default(); /// options.set_recycle_log_file_num(5); /// ``` pub fn set_recycle_log_file_num(&mut self, num: usize) { unsafe { ffi::rocksdb_options_set_recycle_log_file_num(self.inner, num); } } /// Sets the threshold at which all writes will be slowed down to at least delayed_write_rate if estimated /// bytes needed to be compaction exceed this threshold. /// /// Default: 64GB pub fn set_soft_pending_compaction_bytes_limit(&mut self, limit: usize) { unsafe { ffi::rocksdb_options_set_soft_pending_compaction_bytes_limit(self.inner, limit); } } /// Sets the bytes threshold at which all writes are stopped if estimated bytes needed to be compaction exceed /// this threshold. /// /// Default: 256GB pub fn set_hard_pending_compaction_bytes_limit(&mut self, limit: usize) { unsafe { ffi::rocksdb_options_set_hard_pending_compaction_bytes_limit(self.inner, limit); } } /// Sets the size of one block in arena memory allocation. /// /// If <= 0, a proper value is automatically calculated (usually 1/10 of /// writer_buffer_size). /// /// Default: 0 pub fn set_arena_block_size(&mut self, size: usize) { unsafe { ffi::rocksdb_options_set_arena_block_size(self.inner, size); } } /// If true, then print malloc stats together with rocksdb.stats when printing to LOG. /// /// Default: false pub fn set_dump_malloc_stats(&mut self, enabled: bool) { unsafe { ffi::rocksdb_options_set_dump_malloc_stats(self.inner, c_uchar::from(enabled)); } } /// Enable whole key bloom filter in memtable. Note this will only take effect /// if memtable_prefix_bloom_size_ratio is not 0. Enabling whole key filtering /// can potentially reduce CPU usage for point-look-ups. /// /// Default: false (disable) /// /// Dynamically changeable through SetOptions() API pub fn set_memtable_whole_key_filtering(&mut self, whole_key_filter: bool) { unsafe { ffi::rocksdb_options_set_memtable_whole_key_filtering( self.inner, c_uchar::from(whole_key_filter), ); } } /// Enable the use of key-value separation. /// /// More details can be found here: [Integrated BlobDB](http://rocksdb.org/blog/2021/05/26/integrated-blob-db.html). /// /// Default: false (disable) /// /// Dynamically changeable through SetOptions() API pub fn set_enable_blob_files(&mut self, val: bool) { unsafe { ffi::rocksdb_options_set_enable_blob_files(self.inner, u8::from(val)); } } /// Sets the minimum threshold value at or above which will be written /// to blob files during flush or compaction. /// /// Dynamically changeable through SetOptions() API pub fn set_min_blob_size(&mut self, val: u64) { unsafe { ffi::rocksdb_options_set_min_blob_size(self.inner, val); } } /// Sets the size limit for blob files. /// /// Dynamically changeable through SetOptions() API pub fn set_blob_file_size(&mut self, val: u64) { unsafe { ffi::rocksdb_options_set_blob_file_size(self.inner, val); } } /// Sets the blob compression type. All blob files use the same /// compression type. /// /// Dynamically changeable through SetOptions() API pub fn set_blob_compression_type(&mut self, val: DBCompressionType) { unsafe { ffi::rocksdb_options_set_blob_compression_type(self.inner, val as _); } } /// If this is set to true RocksDB will actively relocate valid blobs from the oldest blob files /// as they are encountered during compaction. /// /// Dynamically changeable through SetOptions() API pub fn set_enable_blob_gc(&mut self, val: bool) { unsafe { ffi::rocksdb_options_set_enable_blob_gc(self.inner, u8::from(val)); } } /// Sets the threshold that the GC logic uses to determine which blob files should be considered “old.” /// /// For example, the default value of 0.25 signals to RocksDB that blobs residing in the /// oldest 25% of blob files should be relocated by GC. This parameter can be tuned to adjust /// the trade-off between write amplification and space amplification. /// /// Dynamically changeable through SetOptions() API pub fn set_blob_gc_age_cutoff(&mut self, val: c_double) { unsafe { ffi::rocksdb_options_set_blob_gc_age_cutoff(self.inner, val); } } /// Sets the blob GC force threshold. /// /// Dynamically changeable through SetOptions() API pub fn set_blob_gc_force_threshold(&mut self, val: c_double) { unsafe { ffi::rocksdb_options_set_blob_gc_force_threshold(self.inner, val); } } /// Sets the blob compaction read ahead size. /// /// Dynamically changeable through SetOptions() API pub fn set_blob_compaction_readahead_size(&mut self, val: u64) { unsafe { ffi::rocksdb_options_set_blob_compaction_readahead_size(self.inner, val); } } /// Sets the blob cache. /// /// Using a dedicated object for blobs and using the same object for the block and blob caches /// are both supported. In the latter case, note that blobs are less valuable from a caching /// perspective than SST blocks, and some cache implementations have configuration options that /// can be used to prioritize items accordingly (see Cache::Priority and /// LRUCacheOptions::{high,low}_pri_pool_ratio). /// /// Default: disabled pub fn set_blob_cache(&mut self, cache: &Cache) { unsafe { ffi::rocksdb_options_set_blob_cache(self.inner, cache.0.inner.as_ptr()); } self.outlive.blob_cache = Some(cache.clone()); } /// Set this option to true during creation of database if you want /// to be able to ingest behind (call IngestExternalFile() skipping keys /// that already exist, rather than overwriting matching keys). /// Setting this option to true has the following effects: /// 1) Disable some internal optimizations around SST file compression. /// 2) Reserve the last level for ingested files only. /// 3) Compaction will not include any file from the last level. /// Note that only Universal Compaction supports allow_ingest_behind. /// `num_levels` should be >= 3 if this option is turned on. /// /// DEFAULT: false /// Immutable. pub fn set_allow_ingest_behind(&mut self, val: bool) { unsafe { ffi::rocksdb_options_set_allow_ingest_behind(self.inner, c_uchar::from(val)); } } // A factory of a table property collector that marks an SST // file as need-compaction when it observe at least "D" deletion // entries in any "N" consecutive entries, or the ratio of tombstone // entries >= deletion_ratio. // // `window_size`: is the sliding window size "N" // `num_dels_trigger`: is the deletion trigger "D" // `deletion_ratio`: if <= 0 or > 1, disable triggering compaction based on // deletion ratio. pub fn add_compact_on_deletion_collector_factory( &mut self, window_size: size_t, num_dels_trigger: size_t, deletion_ratio: f64, ) { unsafe { ffi::rocksdb_options_add_compact_on_deletion_collector_factory_del_ratio( self.inner, window_size, num_dels_trigger, deletion_ratio, ); } } /// /// Write buffer manager helps users control the total memory used by memtables across multiple column families and/or DB instances. /// Users can enable this control by 2 ways: /// /// 1- Limit the total memtable usage across multiple column families and DBs under a threshold. /// 2- Cost the memtable memory usage to block cache so that memory of RocksDB can be capped by the single limit. /// The usage of a write buffer manager is similar to rate_limiter and sst_file_manager. /// Users can create one write buffer manager object and pass it to all the options of column families or DBs whose memtable size they want to be controlled by this object. pub fn set_write_buffer_manager(&mut self, write_buffer_manager: &WriteBufferManager) { unsafe { ffi::rocksdb_options_set_write_buffer_manager( self.inner, write_buffer_manager.0.inner.as_ptr(), ); } self.outlive.write_buffer_manager = Some(write_buffer_manager.clone()); } /// If true, working thread may avoid doing unnecessary and long-latency /// operation (such as deleting obsolete files directly or deleting memtable) /// and will instead schedule a background job to do it. /// /// Use it if you're latency-sensitive. /// /// Default: false (disabled) pub fn set_avoid_unnecessary_blocking_io(&mut self, val: bool) { unsafe { ffi::rocksdb_options_set_avoid_unnecessary_blocking_io(self.inner, u8::from(val)); } } /// If true, the log numbers and sizes of the synced WALs are tracked /// in MANIFEST. During DB recovery, if a synced WAL is missing /// from disk, or the WAL's size does not match the recorded size in /// MANIFEST, an error will be reported and the recovery will be aborted. /// /// This is one additional protection against WAL corruption besides the /// per-WAL-entry checksum. /// /// Note that this option does not work with secondary instance. /// Currently, only syncing closed WALs are tracked. Calling `DB::SyncWAL()`, /// etc. or writing with `WriteOptions::sync=true` to sync the live WAL is not /// tracked for performance/efficiency reasons. /// /// See: /// /// Default: false (disabled) pub fn set_track_and_verify_wals_in_manifest(&mut self, val: bool) { unsafe { ffi::rocksdb_options_set_track_and_verify_wals_in_manifest(self.inner, u8::from(val)); } } /// Returns the value of the `track_and_verify_wals_in_manifest` option. pub fn get_track_and_verify_wals_in_manifest(&self) -> bool { let val_u8 = unsafe { ffi::rocksdb_options_get_track_and_verify_wals_in_manifest(self.inner) }; val_u8 != 0 } /// The DB unique ID can be saved in the DB manifest (preferred, this option) /// or an IDENTITY file (historical, deprecated), or both. If this option is /// set to false (old behavior), then `write_identity_file` must be set to true. /// The manifest is preferred because /// 1. The IDENTITY file is not checksummed, so it is not as safe against /// corruption. /// 2. The IDENTITY file may or may not be copied with the DB (e.g. not /// copied by BackupEngine), so is not reliable for the provenance of a DB. /// This option might eventually be obsolete and removed as Identity files /// are phased out. /// /// Default: true (enabled) pub fn set_write_dbid_to_manifest(&mut self, val: bool) { unsafe { ffi::rocksdb_options_set_write_dbid_to_manifest(self.inner, u8::from(val)); } } /// Returns the value of the `write_dbid_to_manifest` option. pub fn get_write_dbid_to_manifest(&self) -> bool { let val_u8 = unsafe { ffi::rocksdb_options_get_write_dbid_to_manifest(self.inner) }; val_u8 != 0 } } impl Default for Options { fn default() -> Self { unsafe { let opts = ffi::rocksdb_options_create(); assert!(!opts.is_null(), "Could not create RocksDB options"); Self { inner: opts, outlive: OptionsMustOutliveDB::default(), } } } } impl FlushOptions { pub fn new() -> FlushOptions { FlushOptions::default() } /// Waits until the flush is done. /// /// Default: true /// /// # Examples /// /// ``` /// use rocksdb::FlushOptions; /// /// let mut options = FlushOptions::default(); /// options.set_wait(false); /// ``` pub fn set_wait(&mut self, wait: bool) { unsafe { ffi::rocksdb_flushoptions_set_wait(self.inner, c_uchar::from(wait)); } } } impl Default for FlushOptions { fn default() -> Self { let flush_opts = unsafe { ffi::rocksdb_flushoptions_create() }; assert!( !flush_opts.is_null(), "Could not create RocksDB flush options" ); Self { inner: flush_opts } } } impl WriteOptions { pub fn new() -> WriteOptions { WriteOptions::default() } /// Sets the sync mode. If true, the write will be flushed /// from the operating system buffer cache before the write is considered complete. /// If this flag is true, writes will be slower. /// /// Default: false pub fn set_sync(&mut self, sync: bool) { unsafe { ffi::rocksdb_writeoptions_set_sync(self.inner, c_uchar::from(sync)); } } /// Sets whether WAL should be active or not. /// If true, writes will not first go to the write ahead log, /// and the write may got lost after a crash. /// /// Default: false pub fn disable_wal(&mut self, disable: bool) { unsafe { ffi::rocksdb_writeoptions_disable_WAL(self.inner, c_int::from(disable)); } } /// If true and if user is trying to write to column families that don't exist (they were dropped), /// ignore the write (don't return an error). If there are multiple writes in a WriteBatch, /// other writes will succeed. /// /// Default: false pub fn set_ignore_missing_column_families(&mut self, ignore: bool) { unsafe { ffi::rocksdb_writeoptions_set_ignore_missing_column_families( self.inner, c_uchar::from(ignore), ); } } /// If true and we need to wait or sleep for the write request, fails /// immediately with Status::Incomplete(). /// /// Default: false pub fn set_no_slowdown(&mut self, no_slowdown: bool) { unsafe { ffi::rocksdb_writeoptions_set_no_slowdown(self.inner, c_uchar::from(no_slowdown)); } } /// If true, this write request is of lower priority if compaction is /// behind. In this case, no_slowdown = true, the request will be cancelled /// immediately with Status::Incomplete() returned. Otherwise, it will be /// slowed down. The slowdown value is determined by RocksDB to guarantee /// it introduces minimum impacts to high priority writes. /// /// Default: false pub fn set_low_pri(&mut self, v: bool) { unsafe { ffi::rocksdb_writeoptions_set_low_pri(self.inner, c_uchar::from(v)); } } /// If true, writebatch will maintain the last insert positions of each /// memtable as hints in concurrent write. It can improve write performance /// in concurrent writes if keys in one writebatch are sequential. In /// non-concurrent writes (when concurrent_memtable_writes is false) this /// option will be ignored. /// /// Default: false pub fn set_memtable_insert_hint_per_batch(&mut self, v: bool) { unsafe { ffi::rocksdb_writeoptions_set_memtable_insert_hint_per_batch( self.inner, c_uchar::from(v), ); } } } impl Default for WriteOptions { fn default() -> Self { let write_opts = unsafe { ffi::rocksdb_writeoptions_create() }; assert!( !write_opts.is_null(), "Could not create RocksDB write options" ); Self { inner: write_opts } } } impl LruCacheOptions { /// Capacity of the cache, in the same units as the `charge` of each entry. /// This is typically measured in bytes, but can be a different unit if using /// kDontChargeCacheMetadata. pub fn set_capacity(&mut self, cap: usize) { unsafe { ffi::rocksdb_lru_cache_options_set_capacity(self.inner, cap); } } /// Cache is sharded into 2^num_shard_bits shards, by hash of key. /// If < 0, a good default is chosen based on the capacity and the /// implementation. (Mutex-based implementations are much more reliant /// on many shards for parallel scalability.) pub fn set_num_shard_bits(&mut self, val: c_int) { unsafe { ffi::rocksdb_lru_cache_options_set_num_shard_bits(self.inner, val); } } } impl Default for LruCacheOptions { fn default() -> Self { let inner = unsafe { ffi::rocksdb_lru_cache_options_create() }; assert!( !inner.is_null(), "Could not create RocksDB LRU cache options" ); Self { inner } } } #[derive(Debug, Copy, Clone, PartialEq, Eq)] #[cfg_attr(feature = "serde1", derive(serde::Serialize, serde::Deserialize))] #[repr(i32)] pub enum ReadTier { /// Reads data in memtable, block cache, OS cache or storage. All = 0, /// Reads data in memtable or block cache. BlockCache, /// Reads persisted data. When WAL is disabled, this option will skip data in memtable. Persisted, /// Reads data in memtable. Used for memtable only iterators. Memtable, } #[repr(i32)] pub enum CompactionPri { /// Slightly prioritize larger files by size compensated by #deletes ByCompensatedSize = 0, /// First compact files whose data's latest update time is oldest. /// Try this if you only update some hot keys in small ranges. OldestLargestSeqFirst = 1, /// First compact files whose range hasn't been compacted to the next level /// for the longest. If your updates are random across the key space, /// write amplification is slightly better with this option. OldestSmallestSeqFirst = 2, /// First compact files whose ratio between overlapping size in next level /// and its size is the smallest. It in many cases can optimize write amplification. MinOverlappingRatio = 3, /// Keeps a cursor(s) of the successor of the file (key range) was/were /// compacted before, and always picks the next files (key range) in that /// level. The file picking process will cycle through all the files in a /// round-robin manner. RoundRobin = 4, } impl ReadOptions { // TODO add snapshot setting here // TODO add snapshot wrapper structs with proper destructors; // that struct needs an "iterator" impl too. /// Specify whether the "data block"/"index block"/"filter block" /// read for this iteration should be cached in memory? /// Callers may wish to set this field to false for bulk scans. /// /// Default: true pub fn fill_cache(&mut self, v: bool) { unsafe { ffi::rocksdb_readoptions_set_fill_cache(self.inner, c_uchar::from(v)); } } /// Sets the snapshot which should be used for the read. /// The snapshot must belong to the DB that is being read and must /// not have been released. pub fn set_snapshot(&mut self, snapshot: &SnapshotWithThreadMode) { unsafe { ffi::rocksdb_readoptions_set_snapshot(self.inner, snapshot.inner); } } /// Sets the lower bound for an iterator. pub fn set_iterate_lower_bound>>(&mut self, key: K) { self.set_lower_bound_impl(Some(key.into())); } /// Sets the upper bound for an iterator. /// The upper bound itself is not included on the iteration result. pub fn set_iterate_upper_bound>>(&mut self, key: K) { self.set_upper_bound_impl(Some(key.into())); } /// Sets lower and upper bounds based on the provided range. This is /// similar to setting lower and upper bounds separately except that it also /// allows either bound to be reset. /// /// The argument can be a regular Rust range, e.g. `lower..upper`. However, /// since RocksDB upper bound is always excluded (i.e. range can never be /// fully closed) inclusive ranges (`lower..=upper` and `..=upper`) are not /// supported. For example: /// /// ``` /// let mut options = rocksdb::ReadOptions::default(); /// options.set_iterate_range("xy".as_bytes().."xz".as_bytes()); /// ``` /// /// In addition, [`crate::PrefixRange`] can be used to specify a range of /// keys with a given prefix. In particular, the above example is /// equivalent to: /// /// ``` /// let mut options = rocksdb::ReadOptions::default(); /// options.set_iterate_range(rocksdb::PrefixRange("xy".as_bytes())); /// ``` /// /// Note that setting range using this method is separate to using prefix /// iterators. Prefix iterators use prefix extractor configured for /// a column family. Setting bounds via [`crate::PrefixRange`] is more akin /// to using manual prefix. /// /// Using this method clears any previously set bounds. In other words, the /// bounds can be reset by setting the range to `..` as in: /// /// ``` /// let mut options = rocksdb::ReadOptions::default(); /// options.set_iterate_range(..); /// ``` pub fn set_iterate_range(&mut self, range: impl crate::IterateBounds) { let (lower, upper) = range.into_bounds(); self.set_lower_bound_impl(lower); self.set_upper_bound_impl(upper); } fn set_lower_bound_impl(&mut self, bound: Option>) { let (ptr, len) = if let Some(ref bound) = bound { (bound.as_ptr() as *const c_char, bound.len()) } else if self.iterate_lower_bound.is_some() { (std::ptr::null(), 0) } else { return; }; self.iterate_lower_bound = bound; unsafe { ffi::rocksdb_readoptions_set_iterate_lower_bound(self.inner, ptr, len); } } fn set_upper_bound_impl(&mut self, bound: Option>) { let (ptr, len) = if let Some(ref bound) = bound { (bound.as_ptr() as *const c_char, bound.len()) } else if self.iterate_upper_bound.is_some() { (std::ptr::null(), 0) } else { return; }; self.iterate_upper_bound = bound; unsafe { ffi::rocksdb_readoptions_set_iterate_upper_bound(self.inner, ptr, len); } } /// Specify if this read request should process data that ALREADY /// resides on a particular cache. If the required data is not /// found at the specified cache, then Status::Incomplete is returned. /// /// Default: ::All pub fn set_read_tier(&mut self, tier: ReadTier) { unsafe { ffi::rocksdb_readoptions_set_read_tier(self.inner, tier as c_int); } } /// Enforce that the iterator only iterates over the same /// prefix as the seek. /// This option is effective only for prefix seeks, i.e. prefix_extractor is /// non-null for the column family and total_order_seek is false. Unlike /// iterate_upper_bound, prefix_same_as_start only works within a prefix /// but in both directions. /// /// Default: false pub fn set_prefix_same_as_start(&mut self, v: bool) { unsafe { ffi::rocksdb_readoptions_set_prefix_same_as_start(self.inner, c_uchar::from(v)); } } /// Enable a total order seek regardless of index format (e.g. hash index) /// used in the table. Some table format (e.g. plain table) may not support /// this option. /// /// If true when calling Get(), we also skip prefix bloom when reading from /// block based table. It provides a way to read existing data after /// changing implementation of prefix extractor. pub fn set_total_order_seek(&mut self, v: bool) { unsafe { ffi::rocksdb_readoptions_set_total_order_seek(self.inner, c_uchar::from(v)); } } /// Sets a threshold for the number of keys that can be skipped /// before failing an iterator seek as incomplete. The default value of 0 should be used to /// never fail a request as incomplete, even on skipping too many keys. /// /// Default: 0 pub fn set_max_skippable_internal_keys(&mut self, num: u64) { unsafe { ffi::rocksdb_readoptions_set_max_skippable_internal_keys(self.inner, num); } } /// If true, when PurgeObsoleteFile is called in CleanupIteratorState, we schedule a background job /// in the flush job queue and delete obsolete files in background. /// /// Default: false pub fn set_background_purge_on_iterator_cleanup(&mut self, v: bool) { unsafe { ffi::rocksdb_readoptions_set_background_purge_on_iterator_cleanup( self.inner, c_uchar::from(v), ); } } /// If true, keys deleted using the DeleteRange() API will be visible to /// readers until they are naturally deleted during compaction. This improves /// read performance in DBs with many range deletions. /// /// Default: false pub fn set_ignore_range_deletions(&mut self, v: bool) { unsafe { ffi::rocksdb_readoptions_set_ignore_range_deletions(self.inner, c_uchar::from(v)); } } /// If true, all data read from underlying storage will be /// verified against corresponding checksums. /// /// Default: true pub fn set_verify_checksums(&mut self, v: bool) { unsafe { ffi::rocksdb_readoptions_set_verify_checksums(self.inner, c_uchar::from(v)); } } /// If non-zero, an iterator will create a new table reader which /// performs reads of the given size. Using a large size (> 2MB) can /// improve the performance of forward iteration on spinning disks. /// Default: 0 /// /// ``` /// use rocksdb::{ReadOptions}; /// /// let mut opts = ReadOptions::default(); /// opts.set_readahead_size(4_194_304); // 4mb /// ``` pub fn set_readahead_size(&mut self, v: usize) { unsafe { ffi::rocksdb_readoptions_set_readahead_size(self.inner, v as size_t); } } /// If auto_readahead_size is set to true, it will auto tune the readahead_size /// during scans internally. /// For this feature to be enabled, iterate_upper_bound must also be specified. /// /// NOTE: - Recommended for forward Scans only. /// - If there is a backward scans, this option will be /// disabled internally and won't be enabled again if the forward scan /// is issued again. /// /// Default: true pub fn set_auto_readahead_size(&mut self, v: bool) { unsafe { ffi::rocksdb_readoptions_set_auto_readahead_size(self.inner, c_uchar::from(v)); } } /// If true, create a tailing iterator. Note that tailing iterators /// only support moving in the forward direction. Iterating in reverse /// or seek_to_last are not supported. pub fn set_tailing(&mut self, v: bool) { unsafe { ffi::rocksdb_readoptions_set_tailing(self.inner, c_uchar::from(v)); } } /// Specifies the value of "pin_data". If true, it keeps the blocks /// loaded by the iterator pinned in memory as long as the iterator is not deleted, /// If used when reading from tables created with /// BlockBasedTableOptions::use_delta_encoding = false, /// Iterator's property "rocksdb.iterator.is-key-pinned" is guaranteed to /// return 1. /// /// Default: false pub fn set_pin_data(&mut self, v: bool) { unsafe { ffi::rocksdb_readoptions_set_pin_data(self.inner, c_uchar::from(v)); } } /// Asynchronously prefetch some data. /// /// Used for sequential reads and internal automatic prefetching. /// /// Default: `false` pub fn set_async_io(&mut self, v: bool) { unsafe { ffi::rocksdb_readoptions_set_async_io(self.inner, c_uchar::from(v)); } } /// Timestamp of operation. Read should return the latest data visible to the /// specified timestamp. All timestamps of the same database must be of the /// same length and format. The user is responsible for providing a customized /// compare function via Comparator to order tuples. /// For iterator, iter_start_ts is the lower bound (older) and timestamp /// serves as the upper bound. Versions of the same record that fall in /// the timestamp range will be returned. If iter_start_ts is nullptr, /// only the most recent version visible to timestamp is returned. /// The user-specified timestamp feature is still under active development, /// and the API is subject to change. pub fn set_timestamp>>(&mut self, ts: S) { self.set_timestamp_impl(Some(ts.into())); } fn set_timestamp_impl(&mut self, ts: Option>) { let (ptr, len) = if let Some(ref ts) = ts { (ts.as_ptr() as *const c_char, ts.len()) } else if self.timestamp.is_some() { // The stored timestamp is a `Some` but we're updating it to a `None`. // This means to cancel a previously set timestamp. // To do this, use a null pointer and zero length. (std::ptr::null(), 0) } else { return; }; self.timestamp = ts; unsafe { ffi::rocksdb_readoptions_set_timestamp(self.inner, ptr, len); } } /// See `set_timestamp` pub fn set_iter_start_ts>>(&mut self, ts: S) { self.set_iter_start_ts_impl(Some(ts.into())); } fn set_iter_start_ts_impl(&mut self, ts: Option>) { let (ptr, len) = if let Some(ref ts) = ts { (ts.as_ptr() as *const c_char, ts.len()) } else if self.timestamp.is_some() { (std::ptr::null(), 0) } else { return; }; self.iter_start_ts = ts; unsafe { ffi::rocksdb_readoptions_set_iter_start_ts(self.inner, ptr, len); } } } impl Default for ReadOptions { fn default() -> Self { unsafe { Self { inner: ffi::rocksdb_readoptions_create(), timestamp: None, iter_start_ts: None, iterate_upper_bound: None, iterate_lower_bound: None, } } } } impl IngestExternalFileOptions { /// Can be set to true to move the files instead of copying them. pub fn set_move_files(&mut self, v: bool) { unsafe { ffi::rocksdb_ingestexternalfileoptions_set_move_files(self.inner, c_uchar::from(v)); } } /// If set to false, an ingested file keys could appear in existing snapshots /// that where created before the file was ingested. pub fn set_snapshot_consistency(&mut self, v: bool) { unsafe { ffi::rocksdb_ingestexternalfileoptions_set_snapshot_consistency( self.inner, c_uchar::from(v), ); } } /// If set to false, IngestExternalFile() will fail if the file key range /// overlaps with existing keys or tombstones in the DB. pub fn set_allow_global_seqno(&mut self, v: bool) { unsafe { ffi::rocksdb_ingestexternalfileoptions_set_allow_global_seqno( self.inner, c_uchar::from(v), ); } } /// If set to false and the file key range overlaps with the memtable key range /// (memtable flush required), IngestExternalFile will fail. pub fn set_allow_blocking_flush(&mut self, v: bool) { unsafe { ffi::rocksdb_ingestexternalfileoptions_set_allow_blocking_flush( self.inner, c_uchar::from(v), ); } } /// Set to true if you would like duplicate keys in the file being ingested /// to be skipped rather than overwriting existing data under that key. /// Usecase: back-fill of some historical data in the database without /// over-writing existing newer version of data. /// This option could only be used if the DB has been running /// with allow_ingest_behind=true since the dawn of time. /// All files will be ingested at the bottommost level with seqno=0. pub fn set_ingest_behind(&mut self, v: bool) { unsafe { ffi::rocksdb_ingestexternalfileoptions_set_ingest_behind(self.inner, c_uchar::from(v)); } } } impl Default for IngestExternalFileOptions { fn default() -> Self { unsafe { Self { inner: ffi::rocksdb_ingestexternalfileoptions_create(), } } } } /// Used by BlockBasedOptions::set_index_type. pub enum BlockBasedIndexType { /// A space efficient index block that is optimized for /// binary-search-based index. BinarySearch, /// The hash index, if enabled, will perform a hash lookup if /// a prefix extractor has been provided through Options::set_prefix_extractor. HashSearch, /// A two-level index implementation. Both levels are binary search indexes. TwoLevelIndexSearch, } /// Used by BlockBasedOptions::set_data_block_index_type. #[repr(C)] pub enum DataBlockIndexType { /// Use binary search when performing point lookup for keys in data blocks. /// This is the default. BinarySearch = 0, /// Appends a compact hash table to the end of the data block for efficient indexing. Backwards /// compatible with databases created without this feature. Once turned on, existing data will /// be gradually converted to the hash index format. BinaryAndHash = 1, } /// Defines the underlying memtable implementation. /// See official [wiki](https://github.com/facebook/rocksdb/wiki/MemTable) for more information. pub enum MemtableFactory { Vector, HashSkipList { bucket_count: usize, height: i32, branching_factor: i32, }, HashLinkList { bucket_count: usize, }, } /// Used by BlockBasedOptions::set_checksum_type. pub enum ChecksumType { NoChecksum = 0, CRC32c = 1, XXHash = 2, XXHash64 = 3, XXH3 = 4, // Supported since RocksDB 6.27 } /// Used in [`PlainTableFactoryOptions`]. #[derive(Debug, Copy, Clone, PartialEq, Eq, Default)] pub enum KeyEncodingType { /// Always write full keys. #[default] Plain = 0, /// Find opportunities to write the same prefix for multiple rows. Prefix = 1, } /// Used with DBOptions::set_plain_table_factory. /// See official [wiki](https://github.com/facebook/rocksdb/wiki/PlainTable-Format) for more /// information. /// /// Defaults: /// user_key_length: 0 (variable length) /// bloom_bits_per_key: 10 /// hash_table_ratio: 0.75 /// index_sparseness: 16 /// huge_page_tlb_size: 0 /// encoding_type: KeyEncodingType::Plain /// full_scan_mode: false /// store_index_in_file: false pub struct PlainTableFactoryOptions { pub user_key_length: u32, pub bloom_bits_per_key: i32, pub hash_table_ratio: f64, pub index_sparseness: usize, pub huge_page_tlb_size: usize, pub encoding_type: KeyEncodingType, pub full_scan_mode: bool, pub store_index_in_file: bool, } #[derive(Debug, Copy, Clone, PartialEq, Eq)] #[cfg_attr(feature = "serde1", derive(serde::Serialize, serde::Deserialize))] pub enum DBCompressionType { None = ffi::rocksdb_no_compression as isize, Snappy = ffi::rocksdb_snappy_compression as isize, Zlib = ffi::rocksdb_zlib_compression as isize, Bz2 = ffi::rocksdb_bz2_compression as isize, Lz4 = ffi::rocksdb_lz4_compression as isize, Lz4hc = ffi::rocksdb_lz4hc_compression as isize, Zstd = ffi::rocksdb_zstd_compression as isize, } #[derive(Debug, Copy, Clone, PartialEq, Eq)] #[cfg_attr(feature = "serde1", derive(serde::Serialize, serde::Deserialize))] pub enum DBCompactionStyle { Level = ffi::rocksdb_level_compaction as isize, Universal = ffi::rocksdb_universal_compaction as isize, Fifo = ffi::rocksdb_fifo_compaction as isize, } #[derive(Debug, Copy, Clone, PartialEq, Eq)] #[cfg_attr(feature = "serde1", derive(serde::Serialize, serde::Deserialize))] pub enum DBRecoveryMode { TolerateCorruptedTailRecords = ffi::rocksdb_tolerate_corrupted_tail_records_recovery as isize, AbsoluteConsistency = ffi::rocksdb_absolute_consistency_recovery as isize, PointInTime = ffi::rocksdb_point_in_time_recovery as isize, SkipAnyCorruptedRecord = ffi::rocksdb_skip_any_corrupted_records_recovery as isize, } pub struct FifoCompactOptions { pub(crate) inner: *mut ffi::rocksdb_fifo_compaction_options_t, } impl Default for FifoCompactOptions { fn default() -> Self { let opts = unsafe { ffi::rocksdb_fifo_compaction_options_create() }; assert!( !opts.is_null(), "Could not create RocksDB Fifo Compaction Options" ); Self { inner: opts } } } impl Drop for FifoCompactOptions { fn drop(&mut self) { unsafe { ffi::rocksdb_fifo_compaction_options_destroy(self.inner); } } } impl FifoCompactOptions { /// Sets the max table file size. /// /// Once the total sum of table files reaches this, we will delete the oldest /// table file /// /// Default: 1GB pub fn set_max_table_files_size(&mut self, nbytes: u64) { unsafe { ffi::rocksdb_fifo_compaction_options_set_max_table_files_size(self.inner, nbytes); } } } #[derive(Debug, Copy, Clone, PartialEq, Eq)] #[cfg_attr(feature = "serde1", derive(serde::Serialize, serde::Deserialize))] pub enum UniversalCompactionStopStyle { Similar = ffi::rocksdb_similar_size_compaction_stop_style as isize, Total = ffi::rocksdb_total_size_compaction_stop_style as isize, } pub struct UniversalCompactOptions { pub(crate) inner: *mut ffi::rocksdb_universal_compaction_options_t, } impl Default for UniversalCompactOptions { fn default() -> Self { let opts = unsafe { ffi::rocksdb_universal_compaction_options_create() }; assert!( !opts.is_null(), "Could not create RocksDB Universal Compaction Options" ); Self { inner: opts } } } impl Drop for UniversalCompactOptions { fn drop(&mut self) { unsafe { ffi::rocksdb_universal_compaction_options_destroy(self.inner); } } } impl UniversalCompactOptions { /// Sets the percentage flexibility while comparing file size. /// If the candidate file(s) size is 1% smaller than the next file's size, /// then include next file into this candidate set. /// /// Default: 1 pub fn set_size_ratio(&mut self, ratio: c_int) { unsafe { ffi::rocksdb_universal_compaction_options_set_size_ratio(self.inner, ratio); } } /// Sets the minimum number of files in a single compaction run. /// /// Default: 2 pub fn set_min_merge_width(&mut self, num: c_int) { unsafe { ffi::rocksdb_universal_compaction_options_set_min_merge_width(self.inner, num); } } /// Sets the maximum number of files in a single compaction run. /// /// Default: UINT_MAX pub fn set_max_merge_width(&mut self, num: c_int) { unsafe { ffi::rocksdb_universal_compaction_options_set_max_merge_width(self.inner, num); } } /// sets the size amplification. /// /// It is defined as the amount (in percentage) of /// additional storage needed to store a single byte of data in the database. /// For example, a size amplification of 2% means that a database that /// contains 100 bytes of user-data may occupy upto 102 bytes of /// physical storage. By this definition, a fully compacted database has /// a size amplification of 0%. Rocksdb uses the following heuristic /// to calculate size amplification: it assumes that all files excluding /// the earliest file contribute to the size amplification. /// /// Default: 200, which means that a 100 byte database could require upto 300 bytes of storage. pub fn set_max_size_amplification_percent(&mut self, v: c_int) { unsafe { ffi::rocksdb_universal_compaction_options_set_max_size_amplification_percent( self.inner, v, ); } } /// Sets the percentage of compression size. /// /// If this option is set to be -1, all the output files /// will follow compression type specified. /// /// If this option is not negative, we will try to make sure compressed /// size is just above this value. In normal cases, at least this percentage /// of data will be compressed. /// When we are compacting to a new file, here is the criteria whether /// it needs to be compressed: assuming here are the list of files sorted /// by generation time: /// A1...An B1...Bm C1...Ct /// where A1 is the newest and Ct is the oldest, and we are going to compact /// B1...Bm, we calculate the total size of all the files as total_size, as /// well as the total size of C1...Ct as total_C, the compaction output file /// will be compressed iff /// total_C / total_size < this percentage /// /// Default: -1 pub fn set_compression_size_percent(&mut self, v: c_int) { unsafe { ffi::rocksdb_universal_compaction_options_set_compression_size_percent(self.inner, v); } } /// Sets the algorithm used to stop picking files into a single compaction run. /// /// Default: ::Total pub fn set_stop_style(&mut self, style: UniversalCompactionStopStyle) { unsafe { ffi::rocksdb_universal_compaction_options_set_stop_style(self.inner, style as c_int); } } } #[derive(Debug, Copy, Clone, PartialEq, Eq)] #[cfg_attr(feature = "serde1", derive(serde::Serialize, serde::Deserialize))] #[repr(u8)] pub enum BottommostLevelCompaction { /// Skip bottommost level compaction Skip = 0, /// Only compact bottommost level if there is a compaction filter /// This is the default option IfHaveCompactionFilter, /// Always compact bottommost level Force, /// Always compact bottommost level but in bottommost level avoid /// double-compacting files created in the same compaction ForceOptimized, } pub struct CompactOptions { pub(crate) inner: *mut ffi::rocksdb_compactoptions_t, full_history_ts_low: Option>, } impl Default for CompactOptions { fn default() -> Self { let opts = unsafe { ffi::rocksdb_compactoptions_create() }; assert!(!opts.is_null(), "Could not create RocksDB Compact Options"); Self { inner: opts, full_history_ts_low: None, } } } impl Drop for CompactOptions { fn drop(&mut self) { unsafe { ffi::rocksdb_compactoptions_destroy(self.inner); } } } impl CompactOptions { /// If more than one thread calls manual compaction, /// only one will actually schedule it while the other threads will simply wait /// for the scheduled manual compaction to complete. If exclusive_manual_compaction /// is set to true, the call will disable scheduling of automatic compaction jobs /// and wait for existing automatic compaction jobs to finish. pub fn set_exclusive_manual_compaction(&mut self, v: bool) { unsafe { ffi::rocksdb_compactoptions_set_exclusive_manual_compaction( self.inner, c_uchar::from(v), ); } } /// Sets bottommost level compaction. pub fn set_bottommost_level_compaction(&mut self, lvl: BottommostLevelCompaction) { unsafe { ffi::rocksdb_compactoptions_set_bottommost_level_compaction(self.inner, lvl as c_uchar); } } /// If true, compacted files will be moved to the minimum level capable /// of holding the data or given level (specified non-negative target_level). pub fn set_change_level(&mut self, v: bool) { unsafe { ffi::rocksdb_compactoptions_set_change_level(self.inner, c_uchar::from(v)); } } /// If change_level is true and target_level have non-negative value, compacted /// files will be moved to target_level. pub fn set_target_level(&mut self, lvl: c_int) { unsafe { ffi::rocksdb_compactoptions_set_target_level(self.inner, lvl); } } /// Set user-defined timestamp low bound, the data with older timestamp than /// low bound maybe GCed by compaction. Default: nullptr pub fn set_full_history_ts_low>>(&mut self, ts: S) { self.set_full_history_ts_low_impl(Some(ts.into())); } fn set_full_history_ts_low_impl(&mut self, ts: Option>) { let (ptr, len) = if let Some(ref ts) = ts { (ts.as_ptr() as *mut c_char, ts.len()) } else if self.full_history_ts_low.is_some() { (std::ptr::null::>() as *mut c_char, 0) } else { return; }; self.full_history_ts_low = ts; unsafe { ffi::rocksdb_compactoptions_set_full_history_ts_low(self.inner, ptr, len); } } } pub struct WaitForCompactOptions { pub(crate) inner: *mut ffi::rocksdb_wait_for_compact_options_t, } impl Default for WaitForCompactOptions { fn default() -> Self { let opts = unsafe { ffi::rocksdb_wait_for_compact_options_create() }; assert!( !opts.is_null(), "Could not create RocksDB Wait For Compact Options" ); Self { inner: opts } } } impl Drop for WaitForCompactOptions { fn drop(&mut self) { unsafe { ffi::rocksdb_wait_for_compact_options_destroy(self.inner); } } } impl WaitForCompactOptions { /// If true, abort waiting if background jobs are paused. If false, /// ContinueBackgroundWork() must be called to resume the background jobs. /// Otherwise, jobs that were queued, but not scheduled yet may never finish /// and WaitForCompact() may wait indefinitely (if timeout is set, it will /// abort after the timeout). /// /// Default: false pub fn set_abort_on_pause(&mut self, v: bool) { unsafe { ffi::rocksdb_wait_for_compact_options_set_abort_on_pause(self.inner, c_uchar::from(v)); } } /// If true, flush all column families before starting to wait. /// /// Default: false pub fn set_flush(&mut self, v: bool) { unsafe { ffi::rocksdb_wait_for_compact_options_set_flush(self.inner, c_uchar::from(v)); } } /// Timeout in microseconds for waiting for compaction to complete. /// when timeout == 0, WaitForCompact() will wait as long as there's background /// work to finish. /// /// Default: 0 pub fn set_timeout(&mut self, microseconds: u64) { unsafe { ffi::rocksdb_wait_for_compact_options_set_timeout(self.inner, microseconds); } } } /// Represents a path where sst files can be put into pub struct DBPath { pub(crate) inner: *mut ffi::rocksdb_dbpath_t, } impl DBPath { /// Create a new path pub fn new>(path: P, target_size: u64) -> Result { let p = to_cpath(path.as_ref()).unwrap(); let dbpath = unsafe { ffi::rocksdb_dbpath_create(p.as_ptr(), target_size) }; if dbpath.is_null() { Err(Error::new(format!( "Could not create path for storing sst files at location: {}", path.as_ref().display() ))) } else { Ok(DBPath { inner: dbpath }) } } } impl Drop for DBPath { fn drop(&mut self) { unsafe { ffi::rocksdb_dbpath_destroy(self.inner); } } } #[cfg(test)] mod tests { use crate::db_options::WriteBufferManager; use crate::{Cache, CompactionPri, MemtableFactory, Options}; #[test] fn test_enable_statistics() { let mut opts = Options::default(); opts.enable_statistics(); opts.set_stats_dump_period_sec(60); assert!(opts.get_statistics().is_some()); let opts = Options::default(); assert!(opts.get_statistics().is_none()); } #[test] fn test_set_memtable_factory() { let mut opts = Options::default(); opts.set_memtable_factory(MemtableFactory::Vector); opts.set_memtable_factory(MemtableFactory::HashLinkList { bucket_count: 100 }); opts.set_memtable_factory(MemtableFactory::HashSkipList { bucket_count: 100, height: 4, branching_factor: 4, }); } #[test] fn test_set_stats_persist_period_sec() { let mut opts = Options::default(); opts.enable_statistics(); opts.set_stats_persist_period_sec(5); assert!(opts.get_statistics().is_some()); let opts = Options::default(); assert!(opts.get_statistics().is_none()); } #[test] fn test_set_write_buffer_manager() { let mut opts = Options::default(); let lrucache = Cache::new_lru_cache(100); let write_buffer_manager = WriteBufferManager::new_write_buffer_manager_with_cache(100, false, lrucache); assert_eq!(write_buffer_manager.get_buffer_size(), 100); assert_eq!(write_buffer_manager.get_usage(), 0); assert!(write_buffer_manager.enabled()); opts.set_write_buffer_manager(&write_buffer_manager); drop(opts); // WriteBufferManager outlives options assert!(write_buffer_manager.enabled()); } #[test] fn compaction_pri() { let mut opts = Options::default(); opts.set_compaction_pri(CompactionPri::RoundRobin); opts.create_if_missing(true); let tmp = tempfile::tempdir().unwrap(); let _db = crate::DB::open(&opts, tmp.path()).unwrap(); let options = std::fs::read_dir(tmp.path()) .unwrap() .find_map(|x| { let x = x.ok()?; x.file_name() .into_string() .unwrap() .contains("OPTIONS") .then_some(x.path()) }) .map(std::fs::read_to_string) .unwrap() .unwrap(); assert!(options.contains("compaction_pri=kRoundRobin")); } } rocksdb-0.23.0/src/db_pinnable_slice.rs000064400000000000000000000041741046102023000161140ustar 00000000000000// Copyright 2020 Tyler Neely // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. use crate::{ffi, DB}; use core::ops::Deref; use libc::size_t; use std::marker::PhantomData; use std::slice; /// Wrapper around RocksDB PinnableSlice struct. /// /// With a pinnable slice, we can directly leverage in-memory data within /// RocksDB to avoid unnecessary memory copies. The struct here wraps the /// returned raw pointer and ensures proper finalization work. pub struct DBPinnableSlice<'a> { ptr: *mut ffi::rocksdb_pinnableslice_t, db: PhantomData<&'a DB>, } unsafe impl<'a> Send for DBPinnableSlice<'a> {} unsafe impl<'a> Sync for DBPinnableSlice<'a> {} impl<'a> AsRef<[u8]> for DBPinnableSlice<'a> { fn as_ref(&self) -> &[u8] { // Implement this via Deref so as not to repeat ourselves self } } impl<'a> Deref for DBPinnableSlice<'a> { type Target = [u8]; fn deref(&self) -> &[u8] { unsafe { let mut val_len: size_t = 0; let val = ffi::rocksdb_pinnableslice_value(self.ptr, &mut val_len) as *mut u8; slice::from_raw_parts(val, val_len) } } } impl<'a> Drop for DBPinnableSlice<'a> { fn drop(&mut self) { unsafe { ffi::rocksdb_pinnableslice_destroy(self.ptr); } } } impl<'a> DBPinnableSlice<'a> { /// Used to wrap a PinnableSlice from rocksdb to avoid unnecessary memcpy /// /// # Unsafe /// Requires that the pointer must be generated by rocksdb_get_pinned pub(crate) unsafe fn from_c(ptr: *mut ffi::rocksdb_pinnableslice_t) -> Self { Self { ptr, db: PhantomData, } } } rocksdb-0.23.0/src/env.rs000064400000000000000000000117011046102023000132620ustar 00000000000000use std::sync::Arc; use libc::{self, c_int}; use crate::{ffi, Error}; /// An Env is an interface used by the rocksdb implementation to access /// operating system functionality like the filesystem etc. Callers /// may wish to provide a custom Env object when opening a database to /// get fine gain control; e.g., to rate limit file system operations. /// /// All Env implementations are safe for concurrent access from /// multiple threads without any external synchronization. /// /// Note: currently, C API behinds C++ API for various settings. /// See also: `rocksdb/include/env.h` #[derive(Clone)] pub struct Env(pub(crate) Arc); pub(crate) struct EnvWrapper { pub(crate) inner: *mut ffi::rocksdb_env_t, } impl Drop for EnvWrapper { fn drop(&mut self) { unsafe { ffi::rocksdb_env_destroy(self.inner); } } } impl Env { /// Returns default env pub fn new() -> Result { let env = unsafe { ffi::rocksdb_create_default_env() }; if env.is_null() { Err(Error::new("Could not create mem env".to_owned())) } else { Ok(Self(Arc::new(EnvWrapper { inner: env }))) } } /// Returns a new environment that stores its data in memory and delegates /// all non-file-storage tasks to base_env. pub fn mem_env() -> Result { let env = unsafe { ffi::rocksdb_create_mem_env() }; if env.is_null() { Err(Error::new("Could not create mem env".to_owned())) } else { Ok(Self(Arc::new(EnvWrapper { inner: env }))) } } /// Returns a new environment which wraps and takes ownership of the provided /// raw environment. /// /// # Safety /// /// Ownership of `env` is transferred to the returned Env, which becomes /// responsible for freeing it. The caller should forget the raw pointer /// after this call. /// /// # When would I use this? /// /// RocksDB's C++ [Env](https://github.com/facebook/rocksdb/blob/main/include/rocksdb/env.h) /// class provides many extension points for low-level database subsystems, such as file IO. /// These subsystems aren't covered within the scope of the C interface or this crate, /// but from_raw() may be used to hand a pre-instrumented Env to this crate for further use. /// pub unsafe fn from_raw(env: *mut ffi::rocksdb_env_t) -> Self { Self(Arc::new(EnvWrapper { inner: env })) } /// Sets the number of background worker threads of a specific thread pool for this environment. /// `LOW` is the default pool. /// /// Default: 1 pub fn set_background_threads(&mut self, num_threads: c_int) { unsafe { ffi::rocksdb_env_set_background_threads(self.0.inner, num_threads); } } /// Sets the size of the high priority thread pool that can be used to /// prevent compactions from stalling memtable flushes. pub fn set_high_priority_background_threads(&mut self, n: c_int) { unsafe { ffi::rocksdb_env_set_high_priority_background_threads(self.0.inner, n); } } /// Sets the size of the low priority thread pool that can be used to /// prevent compactions from stalling memtable flushes. pub fn set_low_priority_background_threads(&mut self, n: c_int) { unsafe { ffi::rocksdb_env_set_low_priority_background_threads(self.0.inner, n); } } /// Sets the size of the bottom priority thread pool that can be used to /// prevent compactions from stalling memtable flushes. pub fn set_bottom_priority_background_threads(&mut self, n: c_int) { unsafe { ffi::rocksdb_env_set_bottom_priority_background_threads(self.0.inner, n); } } /// Wait for all threads started by StartThread to terminate. pub fn join_all_threads(&mut self) { unsafe { ffi::rocksdb_env_join_all_threads(self.0.inner); } } /// Lowering IO priority for threads from the specified pool. pub fn lower_thread_pool_io_priority(&mut self) { unsafe { ffi::rocksdb_env_lower_thread_pool_io_priority(self.0.inner); } } /// Lowering IO priority for high priority thread pool. pub fn lower_high_priority_thread_pool_io_priority(&mut self) { unsafe { ffi::rocksdb_env_lower_high_priority_thread_pool_io_priority(self.0.inner); } } /// Lowering CPU priority for threads from the specified pool. pub fn lower_thread_pool_cpu_priority(&mut self) { unsafe { ffi::rocksdb_env_lower_thread_pool_cpu_priority(self.0.inner); } } /// Lowering CPU priority for high priority thread pool. pub fn lower_high_priority_thread_pool_cpu_priority(&mut self) { unsafe { ffi::rocksdb_env_lower_high_priority_thread_pool_cpu_priority(self.0.inner); } } } unsafe impl Send for EnvWrapper {} unsafe impl Sync for EnvWrapper {} rocksdb-0.23.0/src/ffi_util.rs000064400000000000000000000205041046102023000142740ustar 00000000000000// Copyright 2016 Alex Regueiro // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. // use crate::{ffi, Error}; use libc::{self, c_char, c_void, size_t}; use std::ffi::{CStr, CString}; use std::path::Path; use std::ptr; pub(crate) unsafe fn from_cstr(ptr: *const c_char) -> String { let cstr = CStr::from_ptr(ptr as *const _); String::from_utf8_lossy(cstr.to_bytes()).into_owned() } pub(crate) unsafe fn raw_data(ptr: *const c_char, size: usize) -> Option> { if ptr.is_null() { None } else { let mut dst = vec![0; size]; ptr::copy_nonoverlapping(ptr as *const u8, dst.as_mut_ptr(), size); Some(dst) } } pub fn error_message(ptr: *const c_char) -> String { unsafe { let s = from_cstr(ptr); ffi::rocksdb_free(ptr as *mut c_void); s } } pub fn opt_bytes_to_ptr>(opt: Option) -> *const c_char { match opt { Some(v) => v.as_ref().as_ptr() as *const c_char, None => ptr::null(), } } pub(crate) fn to_cpath>(path: P) -> Result { match CString::new(path.as_ref().to_string_lossy().as_bytes()) { Ok(c) => Ok(c), Err(e) => Err(Error::new(format!( "Failed to convert path to CString: {e}" ))), } } macro_rules! ffi_try { ( $($function:ident)::*() ) => { ffi_try_impl!($($function)::*()) }; ( $($function:ident)::*( $arg1:expr $(, $arg:expr)* $(,)? ) ) => { ffi_try_impl!($($function)::*($arg1 $(, $arg)* ,)) }; } macro_rules! ffi_try_impl { ( $($function:ident)::*( $($arg:expr,)*) ) => {{ let mut err: *mut ::libc::c_char = ::std::ptr::null_mut(); let result = $($function)::*($($arg,)* &mut err); if !err.is_null() { return Err(Error::new($crate::ffi_util::error_message(err))); } result }}; } /// Value which can be converted into a C string. /// /// The trait is used as argument to functions which wish to accept either /// [`&str`] or [`&CStr`](CStr) arguments while internally need to interact with /// C APIs. Accepting [`&str`] may be more convenient for users but requires /// conversion into [`CString`] internally which requires allocation. With this /// trait, latency-conscious users may choose to prepare [`CStr`] in advance and /// then pass it directly without having to incur the conversion cost. /// /// To use the trait, function should accept `impl CStrLike` and after baking /// the argument (with [`CStrLike::bake`] method) it can use it as a [`&CStr`](CStr) /// (since the baked result dereferences into [`CStr`]). /// /// # Example /// /// ``` /// use std::ffi::{CStr, CString}; /// use rocksdb::CStrLike; /// /// fn strlen(arg: impl CStrLike) -> std::result::Result { /// let baked = arg.bake().map_err(|err| err.to_string())?; /// Ok(unsafe { libc::strlen(baked.as_ptr()) }) /// } /// /// const FOO: &str = "foo"; /// const BAR: &CStr = unsafe { CStr::from_bytes_with_nul_unchecked(b"bar\0") }; /// /// assert_eq!(Ok(3), strlen(FOO)); /// assert_eq!(Ok(3), strlen(BAR)); /// ``` pub trait CStrLike { type Baked: std::ops::Deref; type Error: std::fmt::Debug + std::fmt::Display; /// Bakes self into value which can be freely converted into [`&CStr`](CStr). /// /// This may require allocation and may fail if `self` has invalid value. fn bake(self) -> Result; /// Consumers and converts value into an owned [`CString`]. /// /// If `Self` is already a `CString` simply returns it; if it’s a reference /// to a `CString` then the value is cloned. In other cases this may /// require allocation and may fail if `self` has invalid value. fn into_c_string(self) -> Result; } impl CStrLike for &str { type Baked = CString; type Error = std::ffi::NulError; fn bake(self) -> Result { CString::new(self) } fn into_c_string(self) -> Result { CString::new(self) } } // This is redundant for the most part and exists so that `foo(&string)` (where // `string: String` works just as if `foo` took `arg: &str` argument. impl CStrLike for &String { type Baked = CString; type Error = std::ffi::NulError; fn bake(self) -> Result { CString::new(self.as_bytes()) } fn into_c_string(self) -> Result { CString::new(self.as_bytes()) } } impl CStrLike for &CStr { type Baked = Self; type Error = std::convert::Infallible; fn bake(self) -> Result { Ok(self) } fn into_c_string(self) -> Result { Ok(self.to_owned()) } } // This exists so that if caller constructs a `CString` they can pass it into // the function accepting `CStrLike` argument. Some of such functions may take // the argument whereas otherwise they would need to allocated a new owned // object. impl CStrLike for CString { type Baked = CString; type Error = std::convert::Infallible; fn bake(self) -> Result { Ok(self) } fn into_c_string(self) -> Result { Ok(self) } } // This is redundant for the most part and exists so that `foo(&cstring)` (where // `string: CString` works just as if `foo` took `arg: &CStr` argument. impl<'a> CStrLike for &'a CString { type Baked = &'a CStr; type Error = std::convert::Infallible; fn bake(self) -> Result { Ok(self) } fn into_c_string(self) -> Result { Ok(self.clone()) } } /// Owned malloc-allocated memory slice. /// Do not derive `Clone` for this because it will cause double-free. pub struct CSlice { data: *const c_char, len: size_t, } impl CSlice { /// Constructing such a slice may be unsafe. /// /// # Safety /// The caller must ensure that the pointer and length are valid. /// Moreover, `CSlice` takes the ownership of the memory and will free it /// using `rocksdb_free`. The caller must ensure that the memory is /// allocated by `malloc` in RocksDB and will not be freed by any other /// means. pub(crate) unsafe fn from_raw_parts(data: *const c_char, len: size_t) -> Self { Self { data, len } } } impl AsRef<[u8]> for CSlice { fn as_ref(&self) -> &[u8] { unsafe { std::slice::from_raw_parts(self.data as *const u8, self.len) } } } impl Drop for CSlice { fn drop(&mut self) { unsafe { ffi::rocksdb_free(self.data as *mut c_void); } } } #[test] fn test_c_str_like_bake() { fn test(value: S) -> Result { value .bake() .map(|value| unsafe { libc::strlen(value.as_ptr()) }) } assert_eq!(Ok(3), test("foo")); // &str assert_eq!(Ok(3), test(&String::from("foo"))); // String assert_eq!(Ok(3), test(CString::new("foo").unwrap().as_ref())); // &CStr assert_eq!(Ok(3), test(&CString::new("foo").unwrap())); // &CString assert_eq!(Ok(3), test(CString::new("foo").unwrap())); // CString assert_eq!(3, test("foo\0bar").err().unwrap().nul_position()); } #[test] fn test_c_str_like_into() { fn test(value: S) -> Result { value.into_c_string() } let want = CString::new("foo").unwrap(); assert_eq!(Ok(want.clone()), test("foo")); // &str assert_eq!(Ok(want.clone()), test(&String::from("foo"))); // &String assert_eq!( Ok(want.clone()), test(CString::new("foo").unwrap().as_ref()) ); // &CStr assert_eq!(Ok(want.clone()), test(&CString::new("foo").unwrap())); // &CString assert_eq!(Ok(want), test(CString::new("foo").unwrap())); // CString assert_eq!(3, test("foo\0bar").err().unwrap().nul_position()); } rocksdb-0.23.0/src/iter_range.rs000064400000000000000000000070761046102023000146230ustar 00000000000000/// A range which can be set as iterate bounds on [`crate::ReadOptions`]. /// /// See [`crate::ReadOptions::set_iterate_range`] for documentation and /// examples. pub trait IterateBounds { /// Converts object into lower and upper bounds pair. /// /// If this object represents range with one of the bounds unset, /// corresponding element is returned as `None`. For example, `..upper` /// range would be converted into `(None, Some(upper))` pair. fn into_bounds(self) -> (Option>, Option>); } impl IterateBounds for std::ops::RangeFull { fn into_bounds(self) -> (Option>, Option>) { (None, None) } } impl>> IterateBounds for std::ops::Range { fn into_bounds(self) -> (Option>, Option>) { (Some(self.start.into()), Some(self.end.into())) } } impl>> IterateBounds for std::ops::RangeFrom { fn into_bounds(self) -> (Option>, Option>) { (Some(self.start.into()), None) } } impl>> IterateBounds for std::ops::RangeTo { fn into_bounds(self) -> (Option>, Option>) { (None, Some(self.end.into())) } } /// Representation of a range of keys starting with given prefix. /// /// Can be used as argument of [`crate::ReadOptions::set_iterate_range`] method /// to set iterate bounds. #[derive(Clone, Copy)] pub struct PrefixRange(pub K); impl>> IterateBounds for PrefixRange { /// Converts the prefix range representation into pair of bounds. /// /// The conversion assumes lexicographical sorting on `u8` values. For /// example, `PrefixRange("a")` is equivalent to `"a".."b"` range. Note /// that for some prefixes, either of the bounds may be `None`. For /// example, an empty prefix is equivalent to a full range (i.e. both bounds /// being `None`). fn into_bounds(self) -> (Option>, Option>) { let start = self.0.into(); if start.is_empty() { (None, None) } else { let end = next_prefix(&start); (Some(start), end) } } } /// Returns lowest value following largest value with given prefix. /// /// In other words, computes upper bound for a prefix scan over list of keys /// sorted in lexicographical order. This means that a prefix scan can be /// expressed as range scan over a right-open `[prefix, next_prefix(prefix))` /// range. /// /// For example, for prefix `foo` the function returns `fop`. /// /// Returns `None` if there is no value which can follow value with given /// prefix. This happens when prefix consists entirely of `'\xff'` bytes (or is /// empty). fn next_prefix(prefix: &[u8]) -> Option> { let ffs = prefix .iter() .rev() .take_while(|&&byte| byte == u8::MAX) .count(); let next = &prefix[..(prefix.len() - ffs)]; if next.is_empty() { // Prefix consisted of \xff bytes. There is no prefix that // follows it. None } else { let mut next = next.to_vec(); *next.last_mut().unwrap() += 1; Some(next) } } #[test] fn test_prefix_range() { fn test(start: &[u8], end: Option<&[u8]>) { let got = PrefixRange(start).into_bounds(); assert_eq!((Some(start), end), (got.0.as_deref(), got.1.as_deref())); } let empty: &[u8] = &[]; assert_eq!((None, None), PrefixRange(empty).into_bounds()); test(b"\xff", None); test(b"\xff\xff\xff\xff", None); test(b"a", Some(b"b")); test(b"a\xff\xff\xff", Some(b"b")); } rocksdb-0.23.0/src/lib.rs000064400000000000000000000242711046102023000132460ustar 00000000000000// Copyright 2020 Tyler Neely // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. // //! Rust wrapper for RocksDB. //! //! # Examples //! //! ``` //! use rocksdb::{DB, Options}; //! // NB: db is automatically closed at end of lifetime //! let tempdir = tempfile::Builder::new() //! .prefix("_path_for_rocksdb_storage") //! .tempdir() //! .expect("Failed to create temporary path for the _path_for_rocksdb_storage"); //! let path = tempdir.path(); //! { //! let db = DB::open_default(path).unwrap(); //! db.put(b"my key", b"my value").unwrap(); //! match db.get(b"my key") { //! Ok(Some(value)) => println!("retrieved value {}", String::from_utf8(value).unwrap()), //! Ok(None) => println!("value not found"), //! Err(e) => println!("operational problem encountered: {}", e), //! } //! db.delete(b"my key").unwrap(); //! } //! let _ = DB::destroy(&Options::default(), path); //! ``` //! //! Opening a database and a single column family with custom options: //! //! ``` //! use rocksdb::{DB, ColumnFamilyDescriptor, Options}; //! //! let tempdir = tempfile::Builder::new() //! .prefix("_path_for_rocksdb_storage_with_cfs") //! .tempdir() //! .expect("Failed to create temporary path for the _path_for_rocksdb_storage_with_cfs."); //! let path = tempdir.path(); //! let mut cf_opts = Options::default(); //! cf_opts.set_max_write_buffer_number(16); //! let cf = ColumnFamilyDescriptor::new("cf1", cf_opts); //! //! let mut db_opts = Options::default(); //! db_opts.create_missing_column_families(true); //! db_opts.create_if_missing(true); //! { //! let db = DB::open_cf_descriptors(&db_opts, path, vec![cf]).unwrap(); //! } //! let _ = DB::destroy(&db_opts, path); //! ``` //! #![warn(clippy::pedantic)] #![allow( // Next `cast_*` lints don't give alternatives. clippy::cast_possible_wrap, clippy::cast_possible_truncation, clippy::cast_sign_loss, // Next lints produce too much noise/false positives. clippy::module_name_repetitions, clippy::similar_names, clippy::must_use_candidate, // '... may panic' lints. // Too much work to fix. clippy::missing_errors_doc, // False positive: WebSocket clippy::doc_markdown, clippy::missing_safety_doc, clippy::needless_pass_by_value, clippy::ptr_as_ptr, clippy::missing_panics_doc, clippy::from_over_into, )] #[macro_use] mod ffi_util; pub mod backup; pub mod checkpoint; mod column_family; pub mod compaction_filter; pub mod compaction_filter_factory; mod comparator; mod db; mod db_iterator; mod db_options; mod db_pinnable_slice; mod env; mod iter_range; pub mod merge_operator; pub mod perf; mod prop_name; pub mod properties; mod slice_transform; mod snapshot; mod sst_file_writer; pub mod statistics; mod transactions; mod write_batch; pub use crate::{ column_family::{ AsColumnFamilyRef, BoundColumnFamily, ColumnFamily, ColumnFamilyDescriptor, ColumnFamilyRef, ColumnFamilyTtl, DEFAULT_COLUMN_FAMILY_NAME, }, compaction_filter::Decision as CompactionDecision, db::{ DBAccess, DBCommon, DBWithThreadMode, LiveFile, MultiThreaded, SingleThreaded, ThreadMode, DB, }, db_iterator::{ DBIterator, DBIteratorWithThreadMode, DBRawIterator, DBRawIteratorWithThreadMode, DBWALIterator, Direction, IteratorMode, }, db_options::{ BlockBasedIndexType, BlockBasedOptions, BottommostLevelCompaction, Cache, ChecksumType, CompactOptions, CompactionPri, CuckooTableOptions, DBCompactionStyle, DBCompressionType, DBPath, DBRecoveryMode, DataBlockIndexType, FifoCompactOptions, FlushOptions, IngestExternalFileOptions, KeyEncodingType, LogLevel, LruCacheOptions, MemtableFactory, Options, PlainTableFactoryOptions, ReadOptions, ReadTier, UniversalCompactOptions, UniversalCompactionStopStyle, WaitForCompactOptions, WriteBufferManager, WriteOptions, }, db_pinnable_slice::DBPinnableSlice, env::Env, ffi_util::CStrLike, iter_range::{IterateBounds, PrefixRange}, merge_operator::MergeOperands, perf::{PerfContext, PerfMetric, PerfStatsLevel}, slice_transform::SliceTransform, snapshot::{Snapshot, SnapshotWithThreadMode}, sst_file_writer::SstFileWriter, transactions::{ OptimisticTransactionDB, OptimisticTransactionOptions, Transaction, TransactionDB, TransactionDBOptions, TransactionOptions, }, write_batch::{WriteBatch, WriteBatchIterator, WriteBatchWithTransaction}, }; use librocksdb_sys as ffi; use std::error; use std::fmt; /// RocksDB error kind. #[derive(Debug, Clone, PartialEq, Eq)] pub enum ErrorKind { NotFound, Corruption, NotSupported, InvalidArgument, IOError, MergeInProgress, Incomplete, ShutdownInProgress, TimedOut, Aborted, Busy, Expired, TryAgain, CompactionTooLarge, ColumnFamilyDropped, Unknown, } /// A simple wrapper round a string, used for errors reported from /// ffi calls. #[derive(Debug, Clone, PartialEq, Eq)] pub struct Error { message: String, } impl Error { fn new(message: String) -> Error { Error { message } } pub fn into_string(self) -> String { self.into() } /// Parse corresponding [`ErrorKind`] from error message. pub fn kind(&self) -> ErrorKind { match self.message.split(':').next().unwrap_or("") { "NotFound" => ErrorKind::NotFound, "Corruption" => ErrorKind::Corruption, "Not implemented" => ErrorKind::NotSupported, "Invalid argument" => ErrorKind::InvalidArgument, "IO error" => ErrorKind::IOError, "Merge in progress" => ErrorKind::MergeInProgress, "Result incomplete" => ErrorKind::Incomplete, "Shutdown in progress" => ErrorKind::ShutdownInProgress, "Operation timed out" => ErrorKind::TimedOut, "Operation aborted" => ErrorKind::Aborted, "Resource busy" => ErrorKind::Busy, "Operation expired" => ErrorKind::Expired, "Operation failed. Try again." => ErrorKind::TryAgain, "Compaction too large" => ErrorKind::CompactionTooLarge, "Column family dropped" => ErrorKind::ColumnFamilyDropped, _ => ErrorKind::Unknown, } } } impl AsRef for Error { fn as_ref(&self) -> &str { &self.message } } impl From for String { fn from(e: Error) -> String { e.message } } impl error::Error for Error { fn description(&self) -> &str { &self.message } } impl fmt::Display for Error { fn fmt(&self, formatter: &mut fmt::Formatter) -> Result<(), fmt::Error> { self.message.fmt(formatter) } } #[cfg(test)] mod test { use crate::{ OptimisticTransactionDB, OptimisticTransactionOptions, Transaction, TransactionDB, TransactionDBOptions, TransactionOptions, }; use super::{ column_family::UnboundColumnFamily, db_options::{CacheWrapper, WriteBufferManagerWrapper}, env::{Env, EnvWrapper}, BlockBasedOptions, BoundColumnFamily, Cache, ColumnFamily, ColumnFamilyDescriptor, DBIterator, DBRawIterator, IngestExternalFileOptions, Options, PlainTableFactoryOptions, ReadOptions, Snapshot, SstFileWriter, WriteBatch, WriteBufferManager, WriteOptions, DB, }; #[test] fn is_send() { // test (at compile time) that certain types implement the auto-trait Send, either directly for // pointer-wrapping types or transitively for types with all Send fields fn is_send() { // dummy function just used for its parameterized type bound } is_send::(); is_send::>(); is_send::>(); is_send::(); is_send::(); is_send::(); is_send::(); is_send::(); is_send::(); is_send::(); is_send::(); is_send::(); is_send::>(); is_send::(); is_send::(); is_send::(); is_send::(); is_send::(); is_send::(); is_send::(); is_send::(); is_send::(); is_send::>(); is_send::(); is_send::(); is_send::(); is_send::(); is_send::(); } #[test] fn is_sync() { // test (at compile time) that certain types implement the auto-trait Sync fn is_sync() { // dummy function just used for its parameterized type bound } is_sync::(); is_sync::(); is_sync::(); is_sync::(); is_sync::(); is_sync::(); is_sync::(); is_sync::(); is_sync::(); is_sync::(); is_sync::(); is_sync::(); is_sync::(); is_sync::(); is_sync::(); is_sync::(); is_sync::(); is_sync::(); is_sync::(); is_sync::(); is_sync::(); is_sync::(); } } rocksdb-0.23.0/src/merge_operator.rs000064400000000000000000000162571046102023000155170ustar 00000000000000// Copyright 2020 Tyler Neely // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. // //! rustic merge operator //! //! ``` //! use rocksdb::{Options, DB, MergeOperands}; //! //! fn concat_merge(new_key: &[u8], //! existing_val: Option<&[u8]>, //! operands: &MergeOperands) //! -> Option> { //! //! let mut result: Vec = Vec::with_capacity(operands.len()); //! existing_val.map(|v| { //! for e in v { //! result.push(*e) //! } //! }); //! for op in operands { //! for e in op { //! result.push(*e) //! } //! } //! Some(result) //! } //! //!let tempdir = tempfile::Builder::new() //! .prefix("_rust_path_to_rocksdb") //! .tempdir() //! .expect("Failed to create temporary path for the _rust_path_to_rocksdb"); //!let path = tempdir.path(); //!let mut opts = Options::default(); //! //!opts.create_if_missing(true); //!opts.set_merge_operator_associative("test operator", concat_merge); //!{ //! let db = DB::open(&opts, path).unwrap(); //! let p = db.put(b"k1", b"a"); //! db.merge(b"k1", b"b"); //! db.merge(b"k1", b"c"); //! db.merge(b"k1", b"d"); //! db.merge(b"k1", b"efg"); //! let r = db.get(b"k1"); //! assert_eq!(r.unwrap().unwrap(), b"abcdefg"); //!} //!let _ = DB::destroy(&opts, path); //! ``` use libc::{self, c_char, c_int, c_void, size_t}; use std::ffi::CString; use std::mem; use std::ptr; use std::slice; pub trait MergeFn: Fn(&[u8], Option<&[u8]>, &MergeOperands) -> Option> + Send + Sync + 'static { } impl MergeFn for F where F: Fn(&[u8], Option<&[u8]>, &MergeOperands) -> Option> + Send + Sync + 'static { } pub struct MergeOperatorCallback { pub name: CString, pub full_merge_fn: F, pub partial_merge_fn: PF, } pub unsafe extern "C" fn destructor_callback(raw_cb: *mut c_void) { drop(Box::from_raw(raw_cb as *mut MergeOperatorCallback)); } pub unsafe extern "C" fn delete_callback( _raw_cb: *mut c_void, value: *const c_char, value_length: size_t, ) { if !value.is_null() { drop(Box::from_raw(slice::from_raw_parts_mut( value as *mut u8, value_length, ))); } } pub unsafe extern "C" fn name_callback( raw_cb: *mut c_void, ) -> *const c_char { let cb = &mut *(raw_cb as *mut MergeOperatorCallback); cb.name.as_ptr() } pub unsafe extern "C" fn full_merge_callback( raw_cb: *mut c_void, raw_key: *const c_char, key_len: size_t, existing_value: *const c_char, existing_value_len: size_t, operands_list: *const *const c_char, operands_list_len: *const size_t, num_operands: c_int, success: *mut u8, new_value_length: *mut size_t, ) -> *mut c_char { let cb = &mut *(raw_cb as *mut MergeOperatorCallback); let operands = &MergeOperands::new(operands_list, operands_list_len, num_operands); let key = slice::from_raw_parts(raw_key as *const u8, key_len); let oldval = if existing_value.is_null() { None } else { Some(slice::from_raw_parts( existing_value as *const u8, existing_value_len, )) }; (cb.full_merge_fn)(key, oldval, operands).map_or_else( || { *new_value_length = 0; *success = 0_u8; ptr::null_mut() as *mut c_char }, |result| { *new_value_length = result.len() as size_t; *success = 1_u8; Box::into_raw(result.into_boxed_slice()) as *mut c_char }, ) } pub unsafe extern "C" fn partial_merge_callback( raw_cb: *mut c_void, raw_key: *const c_char, key_len: size_t, operands_list: *const *const c_char, operands_list_len: *const size_t, num_operands: c_int, success: *mut u8, new_value_length: *mut size_t, ) -> *mut c_char { let cb = &mut *(raw_cb as *mut MergeOperatorCallback); let operands = &MergeOperands::new(operands_list, operands_list_len, num_operands); let key = slice::from_raw_parts(raw_key as *const u8, key_len); (cb.partial_merge_fn)(key, None, operands).map_or_else( || { *new_value_length = 0; *success = 0_u8; ptr::null_mut::() }, |result| { *new_value_length = result.len() as size_t; *success = 1_u8; Box::into_raw(result.into_boxed_slice()) as *mut c_char }, ) } pub struct MergeOperands { operands_list: *const *const c_char, operands_list_len: *const size_t, num_operands: usize, } impl MergeOperands { fn new( operands_list: *const *const c_char, operands_list_len: *const size_t, num_operands: c_int, ) -> MergeOperands { assert!(num_operands >= 0); MergeOperands { operands_list, operands_list_len, num_operands: num_operands as usize, } } pub fn len(&self) -> usize { self.num_operands } pub fn is_empty(&self) -> bool { self.num_operands == 0 } pub fn iter(&self) -> MergeOperandsIter { MergeOperandsIter { operands: self, cursor: 0, } } fn get_operand(&self, index: usize) -> Option<&[u8]> { if index >= self.num_operands { None } else { unsafe { let base = self.operands_list as usize; let base_len = self.operands_list_len as usize; let spacing = mem::size_of::<*const *const u8>(); let spacing_len = mem::size_of::<*const size_t>(); let len_ptr = (base_len + (spacing_len * index)) as *const size_t; let len = *len_ptr; let ptr = base + (spacing * index); Some(slice::from_raw_parts(*(ptr as *const *const u8), len)) } } } } pub struct MergeOperandsIter<'a> { operands: &'a MergeOperands, cursor: usize, } impl<'a> Iterator for MergeOperandsIter<'a> { type Item = &'a [u8]; fn next(&mut self) -> Option { let operand = self.operands.get_operand(self.cursor)?; self.cursor += 1; Some(operand) } fn size_hint(&self) -> (usize, Option) { let remaining = self.operands.num_operands - self.cursor; (remaining, Some(remaining)) } } impl<'a> IntoIterator for &'a MergeOperands { type Item = &'a [u8]; type IntoIter = MergeOperandsIter<'a>; fn into_iter(self) -> Self::IntoIter { Self::IntoIter { operands: self, cursor: 0, } } } rocksdb-0.23.0/src/perf.rs000064400000000000000000000214731046102023000134350ustar 00000000000000// Copyright 2020 Tran Tuan Linh // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. use libc::{c_int, c_uchar, c_void}; use crate::{db::DBInner, ffi, ffi_util::from_cstr, Cache, Error}; use crate::{DBCommon, ThreadMode, TransactionDB, DB}; #[derive(Debug, Copy, Clone, PartialEq, Eq)] #[repr(i32)] pub enum PerfStatsLevel { /// Unknown settings Uninitialized = 0, /// Disable perf stats Disable, /// Enables only count stats EnableCount, /// Count stats and enable time stats except for mutexes EnableTimeExceptForMutex, /// Other than time, also measure CPU time counters. Still don't measure /// time (neither wall time nor CPU time) for mutexes EnableTimeAndCPUTimeExceptForMutex, /// Enables count and time stats EnableTime, /// N.B must always be the last value! OutOfBound, } #[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)] #[non_exhaustive] #[repr(i32)] pub enum PerfMetric { UserKeyComparisonCount = 0, BlockCacheHitCount = 1, BlockReadCount = 2, BlockReadByte = 3, BlockReadTime = 4, BlockChecksumTime = 5, BlockDecompressTime = 6, GetReadBytes = 7, MultigetReadBytes = 8, IterReadBytes = 9, InternalKeySkippedCount = 10, InternalDeleteSkippedCount = 11, InternalRecentSkippedCount = 12, InternalMergeCount = 13, GetSnapshotTime = 14, GetFromMemtableTime = 15, GetFromMemtableCount = 16, GetPostProcessTime = 17, GetFromOutputFilesTime = 18, SeekOnMemtableTime = 19, SeekOnMemtableCount = 20, NextOnMemtableCount = 21, PrevOnMemtableCount = 22, SeekChildSeekTime = 23, SeekChildSeekCount = 24, SeekMinHeapTime = 25, SeekMaxHeapTime = 26, SeekInternalSeekTime = 27, FindNextUserEntryTime = 28, WriteWalTime = 29, WriteMemtableTime = 30, WriteDelayTime = 31, WritePreAndPostProcessTime = 32, DbMutexLockNanos = 33, DbConditionWaitNanos = 34, MergeOperatorTimeNanos = 35, ReadIndexBlockNanos = 36, ReadFilterBlockNanos = 37, NewTableBlockIterNanos = 38, NewTableIteratorNanos = 39, BlockSeekNanos = 40, FindTableNanos = 41, BloomMemtableHitCount = 42, BloomMemtableMissCount = 43, BloomSstHitCount = 44, BloomSstMissCount = 45, KeyLockWaitTime = 46, KeyLockWaitCount = 47, EnvNewSequentialFileNanos = 48, EnvNewRandomAccessFileNanos = 49, EnvNewWritableFileNanos = 50, EnvReuseWritableFileNanos = 51, EnvNewRandomRwFileNanos = 52, EnvNewDirectoryNanos = 53, EnvFileExistsNanos = 54, EnvGetChildrenNanos = 55, EnvGetChildrenFileAttributesNanos = 56, EnvDeleteFileNanos = 57, EnvCreateDirNanos = 58, EnvCreateDirIfMissingNanos = 59, EnvDeleteDirNanos = 60, EnvGetFileSizeNanos = 61, EnvGetFileModificationTimeNanos = 62, EnvRenameFileNanos = 63, EnvLinkFileNanos = 64, EnvLockFileNanos = 65, EnvUnlockFileNanos = 66, EnvNewLoggerNanos = 67, TotalMetricCount = 68, } /// Sets the perf stats level for current thread. pub fn set_perf_stats(lvl: PerfStatsLevel) { unsafe { ffi::rocksdb_set_perf_level(lvl as c_int); } } /// Thread local context for gathering performance counter efficiently /// and transparently. pub struct PerfContext { pub(crate) inner: *mut ffi::rocksdb_perfcontext_t, } impl Default for PerfContext { fn default() -> Self { let ctx = unsafe { ffi::rocksdb_perfcontext_create() }; assert!(!ctx.is_null(), "Could not create Perf Context"); Self { inner: ctx } } } impl Drop for PerfContext { fn drop(&mut self) { unsafe { ffi::rocksdb_perfcontext_destroy(self.inner); } } } impl PerfContext { /// Reset context pub fn reset(&mut self) { unsafe { ffi::rocksdb_perfcontext_reset(self.inner); } } /// Get the report on perf pub fn report(&self, exclude_zero_counters: bool) -> String { unsafe { let ptr = ffi::rocksdb_perfcontext_report(self.inner, c_uchar::from(exclude_zero_counters)); let report = from_cstr(ptr); ffi::rocksdb_free(ptr as *mut c_void); report } } /// Returns value of a metric pub fn metric(&self, id: PerfMetric) -> u64 { unsafe { ffi::rocksdb_perfcontext_metric(self.inner, id as c_int) } } } /// Memory usage stats pub struct MemoryUsageStats { /// Approximate memory usage of all the mem-tables pub mem_table_total: u64, /// Approximate memory usage of un-flushed mem-tables pub mem_table_unflushed: u64, /// Approximate memory usage of all the table readers pub mem_table_readers_total: u64, /// Approximate memory usage by cache pub cache_total: u64, } /// Wrap over memory_usage_t. Hold current memory usage of the specified DB instances and caches pub struct MemoryUsage { inner: *mut ffi::rocksdb_memory_usage_t, } impl Drop for MemoryUsage { fn drop(&mut self) { unsafe { ffi::rocksdb_approximate_memory_usage_destroy(self.inner); } } } impl MemoryUsage { /// Approximate memory usage of all the mem-tables pub fn approximate_mem_table_total(&self) -> u64 { unsafe { ffi::rocksdb_approximate_memory_usage_get_mem_table_total(self.inner) } } /// Approximate memory usage of un-flushed mem-tables pub fn approximate_mem_table_unflushed(&self) -> u64 { unsafe { ffi::rocksdb_approximate_memory_usage_get_mem_table_unflushed(self.inner) } } /// Approximate memory usage of all the table readers pub fn approximate_mem_table_readers_total(&self) -> u64 { unsafe { ffi::rocksdb_approximate_memory_usage_get_mem_table_readers_total(self.inner) } } /// Approximate memory usage by cache pub fn approximate_cache_total(&self) -> u64 { unsafe { ffi::rocksdb_approximate_memory_usage_get_cache_total(self.inner) } } } /// Builder for MemoryUsage pub struct MemoryUsageBuilder { inner: *mut ffi::rocksdb_memory_consumers_t, } impl Drop for MemoryUsageBuilder { fn drop(&mut self) { unsafe { ffi::rocksdb_memory_consumers_destroy(self.inner); } } } impl MemoryUsageBuilder { /// Create new instance pub fn new() -> Result { let mc = unsafe { ffi::rocksdb_memory_consumers_create() }; if mc.is_null() { Err(Error::new( "Could not create MemoryUsage builder".to_owned(), )) } else { Ok(Self { inner: mc }) } } /// Add a DB instance to collect memory usage from it and add up in total stats pub fn add_tx_db(&mut self, db: &TransactionDB) { unsafe { let base = ffi::rocksdb_transactiondb_get_base_db(db.inner); ffi::rocksdb_memory_consumers_add_db(self.inner, base); } } /// Add a DB instance to collect memory usage from it and add up in total stats pub fn add_db(&mut self, db: &DBCommon) { unsafe { ffi::rocksdb_memory_consumers_add_db(self.inner, db.inner.inner()); } } /// Add a cache to collect memory usage from it and add up in total stats pub fn add_cache(&mut self, cache: &Cache) { unsafe { ffi::rocksdb_memory_consumers_add_cache(self.inner, cache.0.inner.as_ptr()); } } /// Build up MemoryUsage pub fn build(&self) -> Result { unsafe { let mu = ffi_try!(ffi::rocksdb_approximate_memory_usage_create(self.inner)); Ok(MemoryUsage { inner: mu }) } } } /// Get memory usage stats from DB instances and Cache instances pub fn get_memory_usage_stats( dbs: Option<&[&DB]>, caches: Option<&[&Cache]>, ) -> Result { let mut builder = MemoryUsageBuilder::new()?; if let Some(dbs_) = dbs { dbs_.iter().for_each(|db| builder.add_db(db)); } if let Some(caches_) = caches { caches_.iter().for_each(|cache| builder.add_cache(cache)); } let mu = builder.build()?; Ok(MemoryUsageStats { mem_table_total: mu.approximate_mem_table_total(), mem_table_unflushed: mu.approximate_mem_table_unflushed(), mem_table_readers_total: mu.approximate_mem_table_readers_total(), cache_total: mu.approximate_cache_total(), }) } rocksdb-0.23.0/src/prop_name.rs000064400000000000000000000204061046102023000144540ustar 00000000000000use crate::ffi_util::CStrLike; use std::ffi::{CStr, CString}; /// A borrowed name of a RocksDB property. /// /// The value is guaranteed to be a nul-terminated UTF-8 string. This means it /// can be converted to [`CStr`] and [`str`] at zero cost. #[derive(PartialEq, Eq, PartialOrd, Ord, Hash)] #[repr(transparent)] pub struct PropName(CStr); impl PropName { /// Creates a new object from a nul-terminated string with no internal nul /// bytes. /// /// Panics if the `value` isn’t terminated by a nul byte or contains /// interior nul bytes. pub(crate) const fn new_unwrap(value: &str) -> &Self { let Some((&0, bytes)) = value.as_bytes().split_last() else { panic!("input was not nul-terminated"); }; let mut idx = 0; while idx < bytes.len() { assert!(bytes[idx] != 0, "input contained interior nul byte"); idx += 1; } // SAFETY: 1. We’ve just verified `value` is a nul-terminated with no // interior nul bytes and since its `str` it’s also valid UTF-8. // 2. Self and CStr have the same representation so casting is sound. unsafe { let value = CStr::from_bytes_with_nul_unchecked(value.as_bytes()); &*(value as *const CStr as *const Self) } } /// Converts the value into a C string slice. #[inline] pub fn as_c_str(&self) -> &CStr { &self.0 } /// Converts the value into a string slice. /// /// Nul byte terminating the underlying C string is not included in the /// returned slice. #[inline] pub fn as_str(&self) -> &str { // SAFETY: self.0 is guaranteed to be valid ASCII string. unsafe { std::str::from_utf8_unchecked(self.0.to_bytes()) } } } impl core::ops::Deref for PropName { type Target = CStr; #[inline] fn deref(&self) -> &Self::Target { self.as_c_str() } } impl core::convert::AsRef for PropName { #[inline] fn as_ref(&self) -> &CStr { self.as_c_str() } } impl core::convert::AsRef for PropName { #[inline] fn as_ref(&self) -> &str { self.as_str() } } impl std::borrow::ToOwned for PropName { type Owned = PropertyName; #[inline] fn to_owned(&self) -> Self::Owned { PropertyName(self.0.to_owned()) } #[inline] fn clone_into(&self, target: &mut Self::Owned) { self.0.clone_into(&mut target.0); } } impl core::fmt::Display for PropName { #[inline] fn fmt(&self, fmtr: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { self.as_str().fmt(fmtr) } } impl core::fmt::Debug for PropName { #[inline] fn fmt(&self, fmtr: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { self.as_str().fmt(fmtr) } } impl core::cmp::PartialEq for PropName { #[inline] fn eq(&self, other: &CStr) -> bool { self.as_c_str().eq(other) } } impl core::cmp::PartialEq for PropName { #[inline] fn eq(&self, other: &str) -> bool { self.as_str().eq(other) } } impl core::cmp::PartialEq for CStr { #[inline] fn eq(&self, other: &PropName) -> bool { self.eq(other.as_c_str()) } } impl core::cmp::PartialEq for str { #[inline] fn eq(&self, other: &PropName) -> bool { self.eq(other.as_str()) } } impl<'a> CStrLike for &'a PropName { type Baked = &'a CStr; type Error = std::convert::Infallible; #[inline] fn bake(self) -> Result { Ok(&self.0) } #[inline] fn into_c_string(self) -> Result { Ok(self.0.to_owned()) } } /// An owned name of a RocksDB property. /// /// The value is guaranteed to be a nul-terminated UTF-8 string. This means it /// can be converted to [`CString`] and [`String`] at zero cost. #[derive(PartialEq, Eq, PartialOrd, Ord, Hash)] #[repr(transparent)] pub struct PropertyName(CString); impl PropertyName { /// Creates a new object from valid nul-terminated UTF-8 string. The string /// must not contain interior nul bytes. #[inline] unsafe fn from_vec_with_nul_unchecked(inner: Vec) -> Self { // SAFETY: Caller promises inner is nul-terminated and valid UTF-8. Self(CString::from_vec_with_nul_unchecked(inner)) } /// Converts the value into a C string. #[inline] pub fn into_c_string(self) -> CString { self.0 } /// Converts the property name into a string. /// /// Nul byte terminating the underlying C string is not included in the /// returned value. #[inline] pub fn into_string(self) -> String { // SAFETY: self.0 is guaranteed to be valid UTF-8. unsafe { String::from_utf8_unchecked(self.0.into_bytes()) } } } impl std::ops::Deref for PropertyName { type Target = PropName; #[inline] fn deref(&self) -> &Self::Target { // SAFETY: 1. PropName and CStr have the same representation so casting // is safe. 2. self.0 is guaranteed to be valid nul-terminated UTF-8 // string. unsafe { &*(self.0.as_c_str() as *const CStr as *const PropName) } } } impl core::convert::AsRef for PropertyName { #[inline] fn as_ref(&self) -> &CStr { self.as_c_str() } } impl core::convert::AsRef for PropertyName { #[inline] fn as_ref(&self) -> &str { self.as_str() } } impl std::borrow::Borrow for PropertyName { #[inline] fn borrow(&self) -> &PropName { self } } impl core::fmt::Display for PropertyName { #[inline] fn fmt(&self, fmtr: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { self.as_str().fmt(fmtr) } } impl core::fmt::Debug for PropertyName { #[inline] fn fmt(&self, fmtr: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { self.as_str().fmt(fmtr) } } impl core::cmp::PartialEq for PropertyName { #[inline] fn eq(&self, other: &CString) -> bool { self.as_c_str().eq(other.as_c_str()) } } impl core::cmp::PartialEq for PropertyName { #[inline] fn eq(&self, other: &String) -> bool { self.as_str().eq(other.as_str()) } } impl core::cmp::PartialEq for CString { #[inline] fn eq(&self, other: &PropertyName) -> bool { self.as_c_str().eq(other.as_c_str()) } } impl core::cmp::PartialEq for String { #[inline] fn eq(&self, other: &PropertyName) -> bool { self.as_str().eq(other.as_str()) } } impl CStrLike for PropertyName { type Baked = CString; type Error = std::convert::Infallible; #[inline] fn bake(self) -> Result { Ok(self.0) } #[inline] fn into_c_string(self) -> Result { Ok(self.0) } } impl<'a> CStrLike for &'a PropertyName { type Baked = &'a CStr; type Error = std::convert::Infallible; #[inline] fn bake(self) -> Result { Ok(self.as_c_str()) } #[inline] fn into_c_string(self) -> Result { Ok(self.0.clone()) } } /// Constructs a property name for an ‘at level’ property. /// /// `name` is the infix of the property name (e.g. `"num-files-at-level"`) and /// `level` is level to get statistics of. The property name is constructed as /// `"rocksdb."`. /// /// Expects `name` not to contain any interior nul bytes. pub(crate) unsafe fn level_property(name: &str, level: usize) -> PropertyName { let bytes = format!("rocksdb.{name}{level}\0").into_bytes(); // SAFETY: We’re appending terminating nul and caller promises `name` has no // interior nul bytes. PropertyName::from_vec_with_nul_unchecked(bytes) } #[test] fn sanity_checks() { let want = "rocksdb.cfstats-no-file-histogram"; assert_eq!(want, crate::properties::CFSTATS_NO_FILE_HISTOGRAM); let want = "rocksdb.num-files-at-level5"; assert_eq!(want, &*crate::properties::num_files_at_level(5)); } #[test] #[should_panic(expected = "input contained interior nul byte")] fn test_interior_nul() { PropName::new_unwrap("interior nul\0\0"); } #[test] #[should_panic(expected = "input was not nul-terminated")] fn test_non_nul_terminated() { PropName::new_unwrap("no nul terminator"); } rocksdb-0.23.0/src/properties.rs000064400000000000000000000254371046102023000147010ustar 00000000000000//! Properties //! //! Full list of valid properties and descriptions pulled from //! [here](https:///github.com/facebook/rocksdb/blob/08809f5e6cd9cc4bc3958dd4d59457ae78c76660/include/rocksdb/db.h#L428-L634). use crate::prop_name::level_property; pub use crate::prop_name::{PropName, PropertyName}; macro_rules! property { ($suffix: literal) => { PropName::new_unwrap(concat!("rocksdb.", $suffix, "\0")) }; } /// "rocksdb.num-files-at-level<`N`>" - returns string containing the number /// of files at level <`N`>, where <`N`> is an ASCII representation of a /// level number (e.g., "0"). pub fn num_files_at_level(level: usize) -> PropertyName { unsafe { level_property("num-files-at-level", level) } } /// "rocksdb.compression-ratio-at-level<`N`>" - returns string containing the /// compression ratio of data at level <`N`>, where <`N`> is an ASCII /// representation of a level number (e.g., "0"). Here, compression /// ratio is defined as uncompressed data size / compressed file size. /// Returns "-1.0" if no open files at level <`N`>. pub fn compression_ratio_at_level(level: usize) -> PropertyName { unsafe { level_property("compression-ratio-at-level", level) } } /// "rocksdb.stats" - returns a multi-line string containing the data /// described by kCFStats followed by the data described by kDBStats. pub const STATS: &PropName = property!("stats"); /// "rocksdb.sstables" - returns a multi-line string summarizing current /// SST files. pub const SSTABLES: &PropName = property!("sstables"); /// "rocksdb.cfstats" - Both of "rocksdb.cfstats-no-file-histogram" and /// "rocksdb.cf-file-histogram" together. See below for description /// of the two. pub const CFSTATS: &PropName = property!("CFSTATS"); /// "rocksdb.cfstats-no-file-histogram" - returns a multi-line string with /// general column family stats per-level over db's lifetime ("`L`"), /// aggregated over db's lifetime ("Sum"), and aggregated over the /// interval since the last retrieval ("Int"). /// It could also be used to return the stats in the format of the map. /// In this case there will be a pair of string to array of double for /// each level as well as for "Sum". "Int" stats will not be affected /// when this form of stats are retrieved. pub const CFSTATS_NO_FILE_HISTOGRAM: &PropName = property!("cfstats-no-file-histogram"); /// "rocksdb.cf-file-histogram" - print out how many file reads to every /// level, as well as the histogram of latency of single requests. pub const CF_FILE_HISTOGRAM: &PropName = property!("cf-file-histogram"); /// "rocksdb.dbstats" - returns a multi-line string with general database /// stats, both cumulative (over the db's lifetime) and interval (since /// the last retrieval of kDBStats). pub const DBSTATS: &PropName = property!("dbstats"); /// "rocksdb.levelstats" - returns multi-line string containing the number /// of files per level and total size of each level (MB). pub const LEVELSTATS: &PropName = property!("levelstats"); /// "rocksdb.num-immutable-mem-table" - returns number of immutable /// memtables that have not yet been flushed. pub const NUM_IMMUTABLE_MEM_TABLE: &PropName = property!("num-immutable-mem-table"); /// "rocksdb.num-immutable-mem-table-flushed" - returns number of immutable /// memtables that have already been flushed. pub const NUM_IMMUTABLE_MEM_TABLE_FLUSHED: &PropName = property!("num-immutable-mem-table-flushed"); /// "rocksdb.mem-table-flush-pending" - returns 1 if a memtable flush is /// pending; otherwise, returns 0. pub const MEM_TABLE_FLUSH_PENDING: &PropName = property!("mem-table-flush-pending"); /// "rocksdb.num-running-flushes" - returns the number of currently running /// flushes. pub const NUM_RUNNING_FLUSHES: &PropName = property!("num-running-flushes"); /// "rocksdb.compaction-pending" - returns 1 if at least one compaction is /// pending; otherwise, returns 0. pub const COMPACTION_PENDING: &PropName = property!("compaction-pending"); /// "rocksdb.num-running-compactions" - returns the number of currently /// running compactions. pub const NUM_RUNNING_COMPACTIONS: &PropName = property!("num-running-compactions"); /// "rocksdb.background-errors" - returns accumulated number of background /// errors. pub const BACKGROUND_ERRORS: &PropName = property!("background-errors"); /// "rocksdb.cur-size-active-mem-table" - returns approximate size of active /// memtable (bytes). pub const CUR_SIZE_ACTIVE_MEM_TABLE: &PropName = property!("cur-size-active-mem-table"); /// "rocksdb.cur-size-all-mem-tables" - returns approximate size of active /// and unflushed immutable memtables (bytes). pub const CUR_SIZE_ALL_MEM_TABLES: &PropName = property!("cur-size-all-mem-tables"); /// "rocksdb.size-all-mem-tables" - returns approximate size of active, /// unflushed immutable, and pinned immutable memtables (bytes). pub const SIZE_ALL_MEM_TABLES: &PropName = property!("size-all-mem-tables"); /// "rocksdb.num-entries-active-mem-table" - returns total number of entries /// in the active memtable. pub const NUM_ENTRIES_ACTIVE_MEM_TABLE: &PropName = property!("num-entries-active-mem-table"); /// "rocksdb.num-entries-imm-mem-tables" - returns total number of entries /// in the unflushed immutable memtables. pub const NUM_ENTRIES_IMM_MEM_TABLES: &PropName = property!("num-entries-imm-mem-tables"); /// "rocksdb.num-deletes-active-mem-table" - returns total number of delete /// entries in the active memtable. pub const NUM_DELETES_ACTIVE_MEM_TABLE: &PropName = property!("num-deletes-active-mem-table"); /// "rocksdb.num-deletes-imm-mem-tables" - returns total number of delete /// entries in the unflushed immutable memtables. pub const NUM_DELETES_IMM_MEM_TABLES: &PropName = property!("num-deletes-imm-mem-tables"); /// "rocksdb.estimate-num-keys" - returns estimated number of total keys in /// the active and unflushed immutable memtables and storage. pub const ESTIMATE_NUM_KEYS: &PropName = property!("estimate-num-keys"); /// "rocksdb.estimate-table-readers-mem" - returns estimated memory used for /// reading SST tables, excluding memory used in block cache (e.g., /// filter and index blocks). pub const ESTIMATE_TABLE_READERS_MEM: &PropName = property!("estimate-table-readers-mem"); /// "rocksdb.is-file-deletions-enabled" - returns 0 if deletion of obsolete /// files is enabled; otherwise, returns a non-zero number. pub const IS_FILE_DELETIONS_ENABLED: &PropName = property!("is-file-deletions-enabled"); /// "rocksdb.num-snapshots" - returns number of unreleased snapshots of the /// database. pub const NUM_SNAPSHOTS: &PropName = property!("num-snapshots"); /// "rocksdb.oldest-snapshot-time" - returns number representing unix /// timestamp of oldest unreleased snapshot. pub const OLDEST_SNAPSHOT_TIME: &PropName = property!("oldest-snapshot-time"); /// "rocksdb.num-live-versions" - returns number of live versions. `Version` /// is an internal data structure. See version_set.h for details. More /// live versions often mean more SST files are held from being deleted, /// by iterators or unfinished compactions. pub const NUM_LIVE_VERSIONS: &PropName = property!("num-live-versions"); /// "rocksdb.current-super-version-number" - returns number of current LSM /// version. It is a uint64_t integer number, incremented after there is /// any change to the LSM tree. The number is not preserved after restarting /// the DB. After DB restart, it will start from 0 again. pub const CURRENT_SUPER_VERSION_NUMBER: &PropName = property!("current-super-version-number"); /// "rocksdb.estimate-live-data-size" - returns an estimate of the amount of /// live data in bytes. pub const ESTIMATE_LIVE_DATA_SIZE: &PropName = property!("estimate-live-data-size"); /// "rocksdb.min-log-number-to-keep" - return the minimum log number of the /// log files that should be kept. pub const MIN_LOG_NUMBER_TO_KEEP: &PropName = property!("min-log-number-to-keep"); /// "rocksdb.min-obsolete-sst-number-to-keep" - return the minimum file /// number for an obsolete SST to be kept. The max value of `uint64_t` /// will be returned if all obsolete files can be deleted. pub const MIN_OBSOLETE_SST_NUMBER_TO_KEEP: &PropName = property!("min-obsolete-sst-number-to-keep"); /// "rocksdb.total-sst-files-size" - returns total size (bytes) of all SST /// files. /// WARNING: may slow down online queries if there are too many files. pub const TOTAL_SST_FILES_SIZE: &PropName = property!("total-sst-files-size"); /// "rocksdb.live-sst-files-size" - returns total size (bytes) of all SST /// files belong to the latest LSM tree. pub const LIVE_SST_FILES_SIZE: &PropName = property!("live-sst-files-size"); /// "rocksdb.base-level" - returns number of level to which L0 data will be /// compacted. pub const BASE_LEVEL: &PropName = property!("base-level"); /// "rocksdb.estimate-pending-compaction-bytes" - returns estimated total /// number of bytes compaction needs to rewrite to get all levels down /// to under target size. Not valid for other compactions than level- /// based. pub const ESTIMATE_PENDING_COMPACTION_BYTES: &PropName = property!("estimate-pending-compaction-bytes"); /// "rocksdb.aggregated-table-properties" - returns a string representation /// of the aggregated table properties of the target column family. pub const AGGREGATED_TABLE_PROPERTIES: &PropName = property!("aggregated-table-properties"); /// "rocksdb.aggregated-table-properties-at-`level`", same as the previous /// one but only returns the aggregated table properties of the /// specified level "N" at the target column family. pub fn aggregated_table_properties_at_level(level: usize) -> PropertyName { unsafe { level_property("aggregated-table-properties-at-level", level) } } /// "rocksdb.actual-delayed-write-rate" - returns the current actual delayed /// write rate. 0 means no delay. pub const ACTUAL_DELAYED_WRITE_RATE: &PropName = property!("actual-delayed-write-rate"); /// "rocksdb.is-write-stopped" - Return 1 if write has been stopped. pub const IS_WRITE_STOPPED: &PropName = property!("is-write-stopped"); /// "rocksdb.estimate-oldest-key-time" - returns an estimation of /// oldest key timestamp in the DB. Currently only available for /// FIFO compaction with /// compaction_options_fifo.allow_compaction = false. pub const ESTIMATE_OLDEST_KEY_TIME: &PropName = property!("estimate-oldest-key-time"); /// "rocksdb.block-cache-capacity" - returns block cache capacity. pub const BLOCK_CACHE_CAPACITY: &PropName = property!("block-cache-capacity"); /// "rocksdb.block-cache-usage" - returns the memory size for the entries /// residing in block cache. pub const BLOCK_CACHE_USAGE: &PropName = property!("block-cache-usage"); /// "rocksdb.block-cache-pinned-usage" - returns the memory size for the /// entries being pinned. pub const BLOCK_CACHE_PINNED_USAGE: &PropName = property!("block-cache-pinned-usage"); /// "rocksdb.options-statistics" - returns multi-line string /// of options.statistics pub const OPTIONS_STATISTICS: &PropName = property!("options-statistics"); rocksdb-0.23.0/src/slice_transform.rs000064400000000000000000000072031046102023000156660ustar 00000000000000// Copyright 2020 Tyler Neely // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. use std::ffi::CString; use std::slice; use libc::{c_char, c_uchar, c_void, size_t}; use crate::{ffi, ffi_util::CStrLike}; /// A `SliceTransform` is a generic pluggable way of transforming one string /// to another. Its primary use-case is in configuring rocksdb /// to store prefix blooms by setting prefix_extractor in /// ColumnFamilyOptions. pub struct SliceTransform { pub inner: *mut ffi::rocksdb_slicetransform_t, } // NB we intentionally don't implement a Drop that passes // through to rocksdb_slicetransform_destroy because // this is currently only used (to my knowledge) // by people passing it as a prefix extractor when // opening a DB. impl SliceTransform { pub fn create( name: impl CStrLike, transform_fn: TransformFn, in_domain_fn: Option, ) -> SliceTransform { let cb = Box::into_raw(Box::new(TransformCallback { name: name.into_c_string().unwrap(), transform_fn, in_domain_fn, })); let st = unsafe { ffi::rocksdb_slicetransform_create( cb as *mut c_void, Some(slice_transform_destructor_callback), Some(transform_callback), Some(in_domain_callback), // this None points to the deprecated InRange callback None, Some(slice_transform_name_callback), ) }; SliceTransform { inner: st } } pub fn create_fixed_prefix(len: size_t) -> SliceTransform { SliceTransform { inner: unsafe { ffi::rocksdb_slicetransform_create_fixed_prefix(len) }, } } pub fn create_noop() -> SliceTransform { SliceTransform { inner: unsafe { ffi::rocksdb_slicetransform_create_noop() }, } } } pub type TransformFn<'a> = fn(&'a [u8]) -> &'a [u8]; pub type InDomainFn = fn(&[u8]) -> bool; pub struct TransformCallback<'a> { pub name: CString, pub transform_fn: TransformFn<'a>, pub in_domain_fn: Option, } pub unsafe extern "C" fn slice_transform_destructor_callback(raw_cb: *mut c_void) { drop(Box::from_raw(raw_cb as *mut TransformCallback)); } pub unsafe extern "C" fn slice_transform_name_callback(raw_cb: *mut c_void) -> *const c_char { let cb = &mut *(raw_cb as *mut TransformCallback); cb.name.as_ptr() } pub unsafe extern "C" fn transform_callback( raw_cb: *mut c_void, raw_key: *const c_char, key_len: size_t, dst_length: *mut size_t, ) -> *mut c_char { let cb = &mut *(raw_cb as *mut TransformCallback); let key = slice::from_raw_parts(raw_key as *const u8, key_len); let prefix = (cb.transform_fn)(key); *dst_length = prefix.len() as size_t; prefix.as_ptr() as *mut c_char } pub unsafe extern "C" fn in_domain_callback( raw_cb: *mut c_void, raw_key: *const c_char, key_len: size_t, ) -> c_uchar { let cb = &mut *(raw_cb as *mut TransformCallback); let key = slice::from_raw_parts(raw_key as *const u8, key_len); c_uchar::from(cb.in_domain_fn.map_or(true, |in_domain| in_domain(key))) } rocksdb-0.23.0/src/snapshot.rs000064400000000000000000000234111046102023000143320ustar 00000000000000// Copyright 2020 Tyler Neely // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. use crate::{ db::DBAccess, ffi, AsColumnFamilyRef, DBIteratorWithThreadMode, DBPinnableSlice, DBRawIteratorWithThreadMode, Error, IteratorMode, ReadOptions, DB, }; /// A type alias to keep compatibility. See [`SnapshotWithThreadMode`] for details pub type Snapshot<'a> = SnapshotWithThreadMode<'a, DB>; /// A consistent view of the database at the point of creation. /// /// # Examples /// /// ``` /// use rocksdb::{DB, IteratorMode, Options}; /// /// let tempdir = tempfile::Builder::new() /// .prefix("_path_for_rocksdb_storage3") /// .tempdir() /// .expect("Failed to create temporary path for the _path_for_rocksdb_storage3"); /// let path = tempdir.path(); /// { /// let db = DB::open_default(path).unwrap(); /// let snapshot = db.snapshot(); // Creates a longer-term snapshot of the DB, but closed when goes out of scope /// let mut iter = snapshot.iterator(IteratorMode::Start); // Make as many iterators as you'd like from one snapshot /// } /// let _ = DB::destroy(&Options::default(), path); /// ``` /// pub struct SnapshotWithThreadMode<'a, D: DBAccess> { db: &'a D, pub(crate) inner: *const ffi::rocksdb_snapshot_t, } impl<'a, D: DBAccess> SnapshotWithThreadMode<'a, D> { /// Creates a new `SnapshotWithThreadMode` of the database `db`. pub fn new(db: &'a D) -> Self { let snapshot = unsafe { db.create_snapshot() }; Self { db, inner: snapshot, } } /// Creates an iterator over the data in this snapshot, using the default read options. pub fn iterator(&self, mode: IteratorMode) -> DBIteratorWithThreadMode<'a, D> { let readopts = ReadOptions::default(); self.iterator_opt(mode, readopts) } /// Creates an iterator over the data in this snapshot under the given column family, using /// the default read options. pub fn iterator_cf( &self, cf_handle: &impl AsColumnFamilyRef, mode: IteratorMode, ) -> DBIteratorWithThreadMode { let readopts = ReadOptions::default(); self.iterator_cf_opt(cf_handle, readopts, mode) } /// Creates an iterator over the data in this snapshot, using the given read options. pub fn iterator_opt( &self, mode: IteratorMode, mut readopts: ReadOptions, ) -> DBIteratorWithThreadMode<'a, D> { readopts.set_snapshot(self); DBIteratorWithThreadMode::::new(self.db, readopts, mode) } /// Creates an iterator over the data in this snapshot under the given column family, using /// the given read options. pub fn iterator_cf_opt( &self, cf_handle: &impl AsColumnFamilyRef, mut readopts: ReadOptions, mode: IteratorMode, ) -> DBIteratorWithThreadMode { readopts.set_snapshot(self); DBIteratorWithThreadMode::new_cf(self.db, cf_handle.inner(), readopts, mode) } /// Creates a raw iterator over the data in this snapshot, using the default read options. pub fn raw_iterator(&self) -> DBRawIteratorWithThreadMode { let readopts = ReadOptions::default(); self.raw_iterator_opt(readopts) } /// Creates a raw iterator over the data in this snapshot under the given column family, using /// the default read options. pub fn raw_iterator_cf( &self, cf_handle: &impl AsColumnFamilyRef, ) -> DBRawIteratorWithThreadMode { let readopts = ReadOptions::default(); self.raw_iterator_cf_opt(cf_handle, readopts) } /// Creates a raw iterator over the data in this snapshot, using the given read options. pub fn raw_iterator_opt(&self, mut readopts: ReadOptions) -> DBRawIteratorWithThreadMode { readopts.set_snapshot(self); DBRawIteratorWithThreadMode::new(self.db, readopts) } /// Creates a raw iterator over the data in this snapshot under the given column family, using /// the given read options. pub fn raw_iterator_cf_opt( &self, cf_handle: &impl AsColumnFamilyRef, mut readopts: ReadOptions, ) -> DBRawIteratorWithThreadMode { readopts.set_snapshot(self); DBRawIteratorWithThreadMode::new_cf(self.db, cf_handle.inner(), readopts) } /// Returns the bytes associated with a key value with default read options. pub fn get>(&self, key: K) -> Result>, Error> { let readopts = ReadOptions::default(); self.get_opt(key, readopts) } /// Returns the bytes associated with a key value and given column family with default read /// options. pub fn get_cf>( &self, cf: &impl AsColumnFamilyRef, key: K, ) -> Result>, Error> { let readopts = ReadOptions::default(); self.get_cf_opt(cf, key.as_ref(), readopts) } /// Returns the bytes associated with a key value and given read options. pub fn get_opt>( &self, key: K, mut readopts: ReadOptions, ) -> Result>, Error> { readopts.set_snapshot(self); self.db.get_opt(key.as_ref(), &readopts) } /// Returns the bytes associated with a key value, given column family and read options. pub fn get_cf_opt>( &self, cf: &impl AsColumnFamilyRef, key: K, mut readopts: ReadOptions, ) -> Result>, Error> { readopts.set_snapshot(self); self.db.get_cf_opt(cf, key.as_ref(), &readopts) } /// Return the value associated with a key using RocksDB's PinnableSlice /// so as to avoid unnecessary memory copy. Similar to get_pinned_opt but /// leverages default options. pub fn get_pinned>(&self, key: K) -> Result, Error> { let readopts = ReadOptions::default(); self.get_pinned_opt(key, readopts) } /// Return the value associated with a key using RocksDB's PinnableSlice /// so as to avoid unnecessary memory copy. Similar to get_pinned_cf_opt but /// leverages default options. pub fn get_pinned_cf>( &self, cf: &impl AsColumnFamilyRef, key: K, ) -> Result, Error> { let readopts = ReadOptions::default(); self.get_pinned_cf_opt(cf, key.as_ref(), readopts) } /// Return the value associated with a key using RocksDB's PinnableSlice /// so as to avoid unnecessary memory copy. pub fn get_pinned_opt>( &self, key: K, mut readopts: ReadOptions, ) -> Result, Error> { readopts.set_snapshot(self); self.db.get_pinned_opt(key.as_ref(), &readopts) } /// Return the value associated with a key using RocksDB's PinnableSlice /// so as to avoid unnecessary memory copy. Similar to get_pinned_opt but /// allows specifying ColumnFamily. pub fn get_pinned_cf_opt>( &self, cf: &impl AsColumnFamilyRef, key: K, mut readopts: ReadOptions, ) -> Result, Error> { readopts.set_snapshot(self); self.db.get_pinned_cf_opt(cf, key.as_ref(), &readopts) } /// Returns the bytes associated with the given key values and default read options. pub fn multi_get, I>(&self, keys: I) -> Vec>, Error>> where I: IntoIterator, { let readopts = ReadOptions::default(); self.multi_get_opt(keys, readopts) } /// Returns the bytes associated with the given key values and default read options. pub fn multi_get_cf<'b, K, I, W>(&self, keys_cf: I) -> Vec>, Error>> where K: AsRef<[u8]>, I: IntoIterator, W: AsColumnFamilyRef + 'b, { let readopts = ReadOptions::default(); self.multi_get_cf_opt(keys_cf, readopts) } /// Returns the bytes associated with the given key values and given read options. pub fn multi_get_opt( &self, keys: I, mut readopts: ReadOptions, ) -> Vec>, Error>> where K: AsRef<[u8]>, I: IntoIterator, { readopts.set_snapshot(self); self.db.multi_get_opt(keys, &readopts) } /// Returns the bytes associated with the given key values, given column family and read options. pub fn multi_get_cf_opt<'b, K, I, W>( &self, keys_cf: I, mut readopts: ReadOptions, ) -> Vec>, Error>> where K: AsRef<[u8]>, I: IntoIterator, W: AsColumnFamilyRef + 'b, { readopts.set_snapshot(self); self.db.multi_get_cf_opt(keys_cf, &readopts) } } impl<'a, D: DBAccess> Drop for SnapshotWithThreadMode<'a, D> { fn drop(&mut self) { unsafe { self.db.release_snapshot(self.inner); } } } /// `Send` and `Sync` implementations for `SnapshotWithThreadMode` are safe, because `SnapshotWithThreadMode` is /// immutable and can be safely shared between threads. unsafe impl<'a, D: DBAccess> Send for SnapshotWithThreadMode<'a, D> {} unsafe impl<'a, D: DBAccess> Sync for SnapshotWithThreadMode<'a, D> {} rocksdb-0.23.0/src/sst_file_writer.rs000064400000000000000000000146551046102023000157110ustar 00000000000000// Copyright 2020 Lucjan Suski // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. //` use crate::{ffi, ffi_util::to_cpath, Error, Options}; use libc::{self, c_char, size_t}; use std::{ffi::CString, marker::PhantomData, path::Path}; /// SstFileWriter is used to create sst files that can be added to database later /// All keys in files generated by SstFileWriter will have sequence number = 0. pub struct SstFileWriter<'a> { pub(crate) inner: *mut ffi::rocksdb_sstfilewriter_t, // Options are needed to be alive when calling open(), // so let's make sure it doesn't get, dropped for the lifetime of SstFileWriter phantom: PhantomData<&'a Options>, } unsafe impl<'a> Send for SstFileWriter<'a> {} unsafe impl<'a> Sync for SstFileWriter<'a> {} struct EnvOptions { inner: *mut ffi::rocksdb_envoptions_t, } impl Drop for EnvOptions { fn drop(&mut self) { unsafe { ffi::rocksdb_envoptions_destroy(self.inner); } } } impl Default for EnvOptions { fn default() -> Self { let opts = unsafe { ffi::rocksdb_envoptions_create() }; Self { inner: opts } } } impl<'a> SstFileWriter<'a> { /// Initializes SstFileWriter with given DB options. pub fn create(opts: &'a Options) -> Self { let env_options = EnvOptions::default(); let writer = Self::create_raw(opts, &env_options); Self { inner: writer, phantom: PhantomData, } } fn create_raw(opts: &Options, env_opts: &EnvOptions) -> *mut ffi::rocksdb_sstfilewriter_t { unsafe { ffi::rocksdb_sstfilewriter_create(env_opts.inner, opts.inner) } } /// Prepare SstFileWriter to write into file located at "file_path". pub fn open>(&'a self, path: P) -> Result<(), Error> { let cpath = to_cpath(&path)?; self.open_raw(&cpath) } fn open_raw(&'a self, cpath: &CString) -> Result<(), Error> { unsafe { ffi_try!(ffi::rocksdb_sstfilewriter_open( self.inner, cpath.as_ptr() as *const _ )); Ok(()) } } /// Finalize writing to sst file and close file. pub fn finish(&mut self) -> Result<(), Error> { unsafe { ffi_try!(ffi::rocksdb_sstfilewriter_finish(self.inner,)); Ok(()) } } /// returns the current file size pub fn file_size(&self) -> u64 { let mut file_size: u64 = 0; unsafe { ffi::rocksdb_sstfilewriter_file_size(self.inner, &mut file_size); } file_size } /// Adds a Put key with value to currently opened file /// REQUIRES: key is after any previously added key according to comparator. pub fn put(&mut self, key: K, value: V) -> Result<(), Error> where K: AsRef<[u8]>, V: AsRef<[u8]>, { let key = key.as_ref(); let value = value.as_ref(); unsafe { ffi_try!(ffi::rocksdb_sstfilewriter_put( self.inner, key.as_ptr() as *const c_char, key.len() as size_t, value.as_ptr() as *const c_char, value.len() as size_t, )); Ok(()) } } /// Adds a Put key with value to currently opened file /// REQUIRES: key is after any previously added key according to comparator. pub fn put_with_ts(&mut self, key: K, ts: S, value: V) -> Result<(), Error> where K: AsRef<[u8]>, V: AsRef<[u8]>, S: AsRef<[u8]>, { let key = key.as_ref(); let value = value.as_ref(); let ts = ts.as_ref(); unsafe { ffi_try!(ffi::rocksdb_sstfilewriter_put_with_ts( self.inner, key.as_ptr() as *const c_char, key.len() as size_t, ts.as_ptr() as *const c_char, ts.len() as size_t, value.as_ptr() as *const c_char, value.len() as size_t, )); Ok(()) } } /// Adds a Merge key with value to currently opened file /// REQUIRES: key is after any previously added key according to comparator. pub fn merge(&mut self, key: K, value: V) -> Result<(), Error> where K: AsRef<[u8]>, V: AsRef<[u8]>, { let key = key.as_ref(); let value = value.as_ref(); unsafe { ffi_try!(ffi::rocksdb_sstfilewriter_merge( self.inner, key.as_ptr() as *const c_char, key.len() as size_t, value.as_ptr() as *const c_char, value.len() as size_t, )); Ok(()) } } /// Adds a deletion key to currently opened file /// REQUIRES: key is after any previously added key according to comparator. pub fn delete>(&mut self, key: K) -> Result<(), Error> { let key = key.as_ref(); unsafe { ffi_try!(ffi::rocksdb_sstfilewriter_delete( self.inner, key.as_ptr() as *const c_char, key.len() as size_t, )); Ok(()) } } /// Adds a deletion key to currently opened file /// REQUIRES: key is after any previously added key according to comparator. pub fn delete_with_ts, S: AsRef<[u8]>>( &mut self, key: K, ts: S, ) -> Result<(), Error> { let key = key.as_ref(); let ts = ts.as_ref(); unsafe { ffi_try!(ffi::rocksdb_sstfilewriter_delete_with_ts( self.inner, key.as_ptr() as *const c_char, key.len() as size_t, ts.as_ptr() as *const c_char, ts.len() as size_t, )); Ok(()) } } } impl<'a> Drop for SstFileWriter<'a> { fn drop(&mut self) { unsafe { ffi::rocksdb_sstfilewriter_destroy(self.inner); } } } rocksdb-0.23.0/src/statistics.rs000064400000000000000000000122431046102023000146660ustar 00000000000000use crate::ffi; #[derive(Debug, Clone)] pub struct NameParseError; impl core::fmt::Display for NameParseError { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { write!(f, "unrecognized name") } } impl std::error::Error for NameParseError {} // Helper macro to generate iterable nums that translate into static strings mapped from the cpp // land. macro_rules! iterable_named_enum { ( $(#[$m:meta])* $type_vis:vis enum $typename:ident { $( $(#[$variant_meta:meta])* $variant:ident($variant_str:literal) $(= $value:expr)?, )+ } ) => { // Main Type #[allow(clippy::all)] $(#[$m])* $type_vis enum $typename { $( $(#[$variant_meta])* $variant$( = $value)?, )+ } #[automatically_derived] impl $typename { #[doc = "The corresponding rocksdb string identifier for this variant"] pub const fn name(&self) -> &'static str { match self { $( $typename::$variant => $variant_str, )+ } } pub fn iter() -> ::core::slice::Iter<'static, $typename> { static VARIANTS: &'static [$typename] = &[ $( $typename::$variant, )+ ]; VARIANTS.iter() } } #[automatically_derived] impl ::core::str::FromStr for $typename { type Err = NameParseError; fn from_str(s: &str) -> Result { match s { $( $variant_str => Ok($typename::$variant), )+ _ => Err(NameParseError), } } } #[automatically_derived] impl ::core::fmt::Display for $typename { fn fmt(&self, f: &mut ::core::fmt::Formatter<'_>) -> ::core::fmt::Result { self.name().fmt(f) } } }; } /// StatsLevel can be used to reduce statistics overhead by skipping certain /// types of stats in the stats collection process. #[derive(Debug, Copy, Clone, PartialEq, Eq)] #[repr(u8)] pub enum StatsLevel { /// Disable all metrics DisableAll = 0, /// Disable timer stats, and skip histogram stats ExceptHistogramOrTimers = 2, /// Skip timer stats ExceptTimers, /// Collect all stats except time inside mutex lock AND time spent on /// compression. ExceptDetailedTimers, /// Collect all stats except the counters requiring to get time inside the /// mutex lock. ExceptTimeForMutex, /// Collect all stats, including measuring duration of mutex operations. /// If getting time is expensive on the platform to run, it can /// reduce scalability to more threads, especially for writes. All, } include!("statistics_enum_ticker.rs"); include!("statistics_enum_histogram.rs"); pub struct HistogramData { pub(crate) inner: *mut ffi::rocksdb_statistics_histogram_data_t, } impl HistogramData { pub fn new() -> HistogramData { HistogramData::default() } pub fn median(&self) -> f64 { unsafe { ffi::rocksdb_statistics_histogram_data_get_median(self.inner) } } pub fn average(&self) -> f64 { unsafe { ffi::rocksdb_statistics_histogram_data_get_average(self.inner) } } pub fn p95(&self) -> f64 { unsafe { ffi::rocksdb_statistics_histogram_data_get_p95(self.inner) } } pub fn p99(&self) -> f64 { unsafe { ffi::rocksdb_statistics_histogram_data_get_p99(self.inner) } } pub fn max(&self) -> f64 { unsafe { ffi::rocksdb_statistics_histogram_data_get_max(self.inner) } } pub fn min(&self) -> f64 { unsafe { ffi::rocksdb_statistics_histogram_data_get_min(self.inner) } } pub fn sum(&self) -> u64 { unsafe { ffi::rocksdb_statistics_histogram_data_get_sum(self.inner) } } pub fn count(&self) -> u64 { unsafe { ffi::rocksdb_statistics_histogram_data_get_count(self.inner) } } pub fn std_dev(&self) -> f64 { unsafe { ffi::rocksdb_statistics_histogram_data_get_std_dev(self.inner) } } } impl Default for HistogramData { fn default() -> Self { let histogram_data_inner = unsafe { ffi::rocksdb_statistics_histogram_data_create() }; assert!( !histogram_data_inner.is_null(), "Could not create RocksDB histogram data" ); Self { inner: histogram_data_inner, } } } impl Drop for HistogramData { fn drop(&mut self) { unsafe { ffi::rocksdb_statistics_histogram_data_destroy(self.inner); } } } #[test] fn sanity_checks() { let want = "rocksdb.async.read.bytes"; assert_eq!(want, Histogram::AsyncReadBytes.name()); let want = "rocksdb.block.cache.index.miss"; assert_eq!(want, Ticker::BlockCacheIndexMiss.to_string()); // assert enum lengths assert_eq!(Ticker::iter().count(), 211 /* TICKER_ENUM_MAX */); assert_eq!(Histogram::iter().count(), 62 /* HISTOGRAM_ENUM_MAX */); } rocksdb-0.23.0/src/statistics_enum_histogram.rs000064400000000000000000000102151046102023000177640ustar 00000000000000// **** DO NOT modify this file! **** // This file is generated by cmd: // gen_statistics.bash rocksdb/monitoring/statistics.cc Histogram iterable_named_enum! { #[derive(Debug, Copy, Clone, PartialEq, Eq)] #[repr(u32)] pub enum Histogram { DbGet("rocksdb.db.get.micros"), DbWrite("rocksdb.db.write.micros"), CompactionTime("rocksdb.compaction.times.micros"), CompactionCpuTime("rocksdb.compaction.times.cpu_micros"), SubcompactionSetupTime("rocksdb.subcompaction.setup.times.micros"), TableSyncMicros("rocksdb.table.sync.micros"), CompactionOutfileSyncMicros("rocksdb.compaction.outfile.sync.micros"), WalFileSyncMicros("rocksdb.wal.file.sync.micros"), ManifestFileSyncMicros("rocksdb.manifest.file.sync.micros"), TableOpenIoMicros("rocksdb.table.open.io.micros"), DbMultiget("rocksdb.db.multiget.micros"), ReadBlockCompactionMicros("rocksdb.read.block.compaction.micros"), ReadBlockGetMicros("rocksdb.read.block.get.micros"), WriteRawBlockMicros("rocksdb.write.raw.block.micros"), NumFilesInSingleCompaction("rocksdb.numfiles.in.singlecompaction"), DbSeek("rocksdb.db.seek.micros"), WriteStall("rocksdb.db.write.stall"), SstReadMicros("rocksdb.sst.read.micros"), FileReadFlushMicros("rocksdb.file.read.flush.micros"), FileReadCompactionMicros("rocksdb.file.read.compaction.micros"), FileReadDbOpenMicros("rocksdb.file.read.db.open.micros"), FileReadGetMicros("rocksdb.file.read.get.micros"), FileReadMultigetMicros("rocksdb.file.read.multiget.micros"), FileReadDbIteratorMicros("rocksdb.file.read.db.iterator.micros"), FileReadVerifyDbChecksumMicros("rocksdb.file.read.verify.db.checksum.micros"), FileReadVerifyFileChecksumsMicros("rocksdb.file.read.verify.file.checksums.micros"), SstWriteMicros("rocksdb.sst.write.micros"), FileWriteFlushMicros("rocksdb.file.write.flush.micros"), FileWriteCompactionMicros("rocksdb.file.write.compaction.micros"), FileWriteDbOpenMicros("rocksdb.file.write.db.open.micros"), NumSubcompactionsScheduled("rocksdb.num.subcompactions.scheduled"), BytesPerRead("rocksdb.bytes.per.read"), BytesPerWrite("rocksdb.bytes.per.write"), BytesPerMultiget("rocksdb.bytes.per.multiget"), CompressionTimesNanos("rocksdb.compression.times.nanos"), DecompressionTimesNanos("rocksdb.decompression.times.nanos"), ReadNumMergeOperands("rocksdb.read.num.merge_operands"), BlobDbKeySize("rocksdb.blobdb.key.size"), BlobDbValueSize("rocksdb.blobdb.value.size"), BlobDbWriteMicros("rocksdb.blobdb.write.micros"), BlobDbGetMicros("rocksdb.blobdb.get.micros"), BlobDbMultigetMicros("rocksdb.blobdb.multiget.micros"), BlobDbSeekMicros("rocksdb.blobdb.seek.micros"), BlobDbNextMicros("rocksdb.blobdb.next.micros"), BlobDbPrevMicros("rocksdb.blobdb.prev.micros"), BlobDbBlobFileWriteMicros("rocksdb.blobdb.blob.file.write.micros"), BlobDbBlobFileReadMicros("rocksdb.blobdb.blob.file.read.micros"), BlobDbBlobFileSyncMicros("rocksdb.blobdb.blob.file.sync.micros"), BlobDbCompressionMicros("rocksdb.blobdb.compression.micros"), BlobDbDecompressionMicros("rocksdb.blobdb.decompression.micros"), FlushTime("rocksdb.db.flush.micros"), SstBatchSize("rocksdb.sst.batch.size"), MultigetIoBatchSize("rocksdb.multiget.io.batch.size"), NumIndexAndFilterBlocksReadPerLevel("rocksdb.num.index.and.filter.blocks.read.per.level"), NumSstReadPerLevel("rocksdb.num.sst.read.per.level"), NumLevelReadPerMultiget("rocksdb.num.level.read.per.multiget"), ErrorHandlerAutoresumeRetryCount("rocksdb.error.handler.autoresume.retry.count"), AsyncReadBytes("rocksdb.async.read.bytes"), PollWaitMicros("rocksdb.poll.wait.micros"), PrefetchedBytesDiscarded("rocksdb.prefetched.bytes.discarded"), AsyncPrefetchAbortMicros("rocksdb.async.prefetch.abort.micros"), TableOpenPrefetchTailReadBytes("rocksdb.table.open.prefetch.tail.read.bytes"), } } rocksdb-0.23.0/src/statistics_enum_ticker.rs000064400000000000000000000336551046102023000172650ustar 00000000000000// **** DO NOT modify this file! **** // This file is generated by cmd: // gen_statistics.bash rocksdb/monitoring/statistics.cc Ticker iterable_named_enum! { #[derive(Debug, Copy, Clone, PartialEq, Eq)] #[repr(u32)] pub enum Ticker { BlockCacheMiss("rocksdb.block.cache.miss"), BlockCacheHit("rocksdb.block.cache.hit"), BlockCacheAdd("rocksdb.block.cache.add"), BlockCacheAddFailures("rocksdb.block.cache.add.failures"), BlockCacheIndexMiss("rocksdb.block.cache.index.miss"), BlockCacheIndexHit("rocksdb.block.cache.index.hit"), BlockCacheIndexAdd("rocksdb.block.cache.index.add"), BlockCacheIndexBytesInsert("rocksdb.block.cache.index.bytes.insert"), BlockCacheFilterMiss("rocksdb.block.cache.filter.miss"), BlockCacheFilterHit("rocksdb.block.cache.filter.hit"), BlockCacheFilterAdd("rocksdb.block.cache.filter.add"), BlockCacheFilterBytesInsert("rocksdb.block.cache.filter.bytes.insert"), BlockCacheDataMiss("rocksdb.block.cache.data.miss"), BlockCacheDataHit("rocksdb.block.cache.data.hit"), BlockCacheDataAdd("rocksdb.block.cache.data.add"), BlockCacheDataBytesInsert("rocksdb.block.cache.data.bytes.insert"), BlockCacheBytesRead("rocksdb.block.cache.bytes.read"), BlockCacheBytesWrite("rocksdb.block.cache.bytes.write"), BlockCacheCompressionDictMiss("rocksdb.block.cache.compression.dict.miss"), BlockCacheCompressionDictHit("rocksdb.block.cache.compression.dict.hit"), BlockCacheCompressionDictAdd("rocksdb.block.cache.compression.dict.add"), BlockCacheCompressionDictBytesInsert("rocksdb.block.cache.compression.dict.bytes.insert"), BlockCacheAddRedundant("rocksdb.block.cache.add.redundant"), BlockCacheIndexAddRedundant("rocksdb.block.cache.index.add.redundant"), BlockCacheFilterAddRedundant("rocksdb.block.cache.filter.add.redundant"), BlockCacheDataAddRedundant("rocksdb.block.cache.data.add.redundant"), BlockCacheCompressionDictAddRedundant("rocksdb.block.cache.compression.dict.add.redundant"), SecondaryCacheHits("rocksdb.secondary.cache.hits"), SecondaryCacheFilterHits("rocksdb.secondary.cache.filter.hits"), SecondaryCacheIndexHits("rocksdb.secondary.cache.index.hits"), SecondaryCacheDataHits("rocksdb.secondary.cache.data.hits"), CompressedSecondaryCacheDummyHits("rocksdb.compressed.secondary.cache.dummy.hits"), CompressedSecondaryCacheHits("rocksdb.compressed.secondary.cache.hits"), CompressedSecondaryCachePromotions("rocksdb.compressed.secondary.cache.promotions"), CompressedSecondaryCachePromotionSkips("rocksdb.compressed.secondary.cache.promotion.skips"), BloomFilterUseful("rocksdb.bloom.filter.useful"), BloomFilterFullPositive("rocksdb.bloom.filter.full.positive"), BloomFilterFullTruePositive("rocksdb.bloom.filter.full.true.positive"), BloomFilterPrefixChecked("rocksdb.bloom.filter.prefix.checked"), BloomFilterPrefixUseful("rocksdb.bloom.filter.prefix.useful"), BloomFilterPrefixTruePositive("rocksdb.bloom.filter.prefix.true.positive"), PersistentCacheHit("rocksdb.persistent.cache.hit"), PersistentCacheMiss("rocksdb.persistent.cache.miss"), SimBlockCacheHit("rocksdb.sim.block.cache.hit"), SimBlockCacheMiss("rocksdb.sim.block.cache.miss"), MemtableHit("rocksdb.memtable.hit"), MemtableMiss("rocksdb.memtable.miss"), GetHitL0("rocksdb.l0.hit"), GetHitL1("rocksdb.l1.hit"), GetHitL2AndUp("rocksdb.l2andup.hit"), CompactionKeyDropNewerEntry("rocksdb.compaction.key.drop.new"), CompactionKeyDropObsolete("rocksdb.compaction.key.drop.obsolete"), CompactionKeyDropRangeDel("rocksdb.compaction.key.drop.range_del"), CompactionKeyDropUser("rocksdb.compaction.key.drop.user"), CompactionRangeDelDropObsolete("rocksdb.compaction.range_del.drop.obsolete"), CompactionOptimizedDelDropObsolete("rocksdb.compaction.optimized.del.drop.obsolete"), CompactionCancelled("rocksdb.compaction.cancelled"), NumberKeysWritten("rocksdb.number.keys.written"), NumberKeysRead("rocksdb.number.keys.read"), NumberKeysUpdated("rocksdb.number.keys.updated"), BytesWritten("rocksdb.bytes.written"), BytesRead("rocksdb.bytes.read"), NumberDbSeek("rocksdb.number.db.seek"), NumberDbNext("rocksdb.number.db.next"), NumberDbPrev("rocksdb.number.db.prev"), NumberDbSeekFound("rocksdb.number.db.seek.found"), NumberDbNextFound("rocksdb.number.db.next.found"), NumberDbPrevFound("rocksdb.number.db.prev.found"), IterBytesRead("rocksdb.db.iter.bytes.read"), NumberIterSkip("rocksdb.number.iter.skip"), NumberOfReseeksInIteration("rocksdb.number.reseeks.iteration"), NoIteratorCreated("rocksdb.num.iterator.created"), NoIteratorDeleted("rocksdb.num.iterator.deleted"), NoFileOpens("rocksdb.no.file.opens"), NoFileErrors("rocksdb.no.file.errors"), StallMicros("rocksdb.stall.micros"), DbMutexWaitMicros("rocksdb.db.mutex.wait.micros"), NumberMultigetCalls("rocksdb.number.multiget.get"), NumberMultigetKeysRead("rocksdb.number.multiget.keys.read"), NumberMultigetBytesRead("rocksdb.number.multiget.bytes.read"), NumberMultigetKeysFound("rocksdb.number.multiget.keys.found"), NumberMergeFailures("rocksdb.number.merge.failures"), GetUpdatesSinceCalls("rocksdb.getupdatessince.calls"), WalFileSynced("rocksdb.wal.synced"), WalFileBytes("rocksdb.wal.bytes"), WriteDoneBySelf("rocksdb.write.self"), WriteDoneByOther("rocksdb.write.other"), WriteWithWal("rocksdb.write.wal"), CompactReadBytes("rocksdb.compact.read.bytes"), CompactWriteBytes("rocksdb.compact.write.bytes"), FlushWriteBytes("rocksdb.flush.write.bytes"), CompactReadBytesMarked("rocksdb.compact.read.marked.bytes"), CompactReadBytesPeriodic("rocksdb.compact.read.periodic.bytes"), CompactReadBytesTtl("rocksdb.compact.read.ttl.bytes"), CompactWriteBytesMarked("rocksdb.compact.write.marked.bytes"), CompactWriteBytesPeriodic("rocksdb.compact.write.periodic.bytes"), CompactWriteBytesTtl("rocksdb.compact.write.ttl.bytes"), NumberDirectLoadTableProperties("rocksdb.number.direct.load.table.properties"), NumberSuperversionAcquires("rocksdb.number.superversion_acquires"), NumberSuperversionReleases("rocksdb.number.superversion_releases"), NumberSuperversionCleanups("rocksdb.number.superversion_cleanups"), NumberBlockCompressed("rocksdb.number.block.compressed"), NumberBlockDecompressed("rocksdb.number.block.decompressed"), BytesCompressedFrom("rocksdb.bytes.compressed.from"), BytesCompressedTo("rocksdb.bytes.compressed.to"), BytesCompressionBypassed("rocksdb.bytes.compression_bypassed"), BytesCompressionRejected("rocksdb.bytes.compression.rejected"), NumberBlockCompressionBypassed("rocksdb.number.block_compression_bypassed"), NumberBlockCompressionRejected("rocksdb.number.block_compression_rejected"), BytesDecompressedFrom("rocksdb.bytes.decompressed.from"), BytesDecompressedTo("rocksdb.bytes.decompressed.to"), MergeOperationTotalTime("rocksdb.merge.operation.time.nanos"), FilterOperationTotalTime("rocksdb.filter.operation.time.nanos"), CompactionCpuTotalTime("rocksdb.compaction.total.time.cpu_micros"), RowCacheHit("rocksdb.row.cache.hit"), RowCacheMiss("rocksdb.row.cache.miss"), ReadAmpEstimateUsefulBytes("rocksdb.read.amp.estimate.useful.bytes"), ReadAmpTotalReadBytes("rocksdb.read.amp.total.read.bytes"), NumberRateLimiterDrains("rocksdb.number.rate_limiter.drains"), BlobDbNumPut("rocksdb.blobdb.num.put"), BlobDbNumWrite("rocksdb.blobdb.num.write"), BlobDbNumGet("rocksdb.blobdb.num.get"), BlobDbNumMultiget("rocksdb.blobdb.num.multiget"), BlobDbNumSeek("rocksdb.blobdb.num.seek"), BlobDbNumNext("rocksdb.blobdb.num.next"), BlobDbNumPrev("rocksdb.blobdb.num.prev"), BlobDbNumKeysWritten("rocksdb.blobdb.num.keys.written"), BlobDbNumKeysRead("rocksdb.blobdb.num.keys.read"), BlobDbBytesWritten("rocksdb.blobdb.bytes.written"), BlobDbBytesRead("rocksdb.blobdb.bytes.read"), BlobDbWriteInlined("rocksdb.blobdb.write.inlined"), BlobDbWriteInlinedTtl("rocksdb.blobdb.write.inlined.ttl"), BlobDbWriteBlob("rocksdb.blobdb.write.blob"), BlobDbWriteBlobTtl("rocksdb.blobdb.write.blob.ttl"), BlobDbBlobFileBytesWritten("rocksdb.blobdb.blob.file.bytes.written"), BlobDbBlobFileBytesRead("rocksdb.blobdb.blob.file.bytes.read"), BlobDbBlobFileSynced("rocksdb.blobdb.blob.file.synced"), BlobDbBlobIndexExpiredCount("rocksdb.blobdb.blob.index.expired.count"), BlobDbBlobIndexExpiredSize("rocksdb.blobdb.blob.index.expired.size"), BlobDbBlobIndexEvictedCount("rocksdb.blobdb.blob.index.evicted.count"), BlobDbBlobIndexEvictedSize("rocksdb.blobdb.blob.index.evicted.size"), BlobDbGcNumFiles("rocksdb.blobdb.gc.num.files"), BlobDbGcNumNewFiles("rocksdb.blobdb.gc.num.new.files"), BlobDbGcFailures("rocksdb.blobdb.gc.failures"), BlobDbGcNumKeysRelocated("rocksdb.blobdb.gc.num.keys.relocated"), BlobDbGcBytesRelocated("rocksdb.blobdb.gc.bytes.relocated"), BlobDbFifoNumFilesEvicted("rocksdb.blobdb.fifo.num.files.evicted"), BlobDbFifoNumKeysEvicted("rocksdb.blobdb.fifo.num.keys.evicted"), BlobDbFifoBytesEvicted("rocksdb.blobdb.fifo.bytes.evicted"), BlobDbCacheMiss("rocksdb.blobdb.cache.miss"), BlobDbCacheHit("rocksdb.blobdb.cache.hit"), BlobDbCacheAdd("rocksdb.blobdb.cache.add"), BlobDbCacheAddFailures("rocksdb.blobdb.cache.add.failures"), BlobDbCacheBytesRead("rocksdb.blobdb.cache.bytes.read"), BlobDbCacheBytesWrite("rocksdb.blobdb.cache.bytes.write"), TxnPrepareMutexOverhead("rocksdb.txn.overhead.mutex.prepare"), TxnOldCommitMapMutexOverhead("rocksdb.txn.overhead.mutex.old.commit.map"), TxnDuplicateKeyOverhead("rocksdb.txn.overhead.duplicate.key"), TxnSnapshotMutexOverhead("rocksdb.txn.overhead.mutex.snapshot"), TxnGetTryAgain("rocksdb.txn.get.tryagain"), FilesMarkedTrash("rocksdb.files.marked.trash"), FilesDeletedFromTrashQueue("rocksdb.files.marked.trash.deleted"), FilesDeletedImmediately("rocksdb.files.deleted.immediately"), ErrorHandlerBgErrorCount("rocksdb.error.handler.bg.error.count"), ErrorHandlerBgIoErrorCount("rocksdb.error.handler.bg.io.error.count"), ErrorHandlerBgRetryableIoErrorCount("rocksdb.error.handler.bg.retryable.io.error.count"), ErrorHandlerAutoresumeCount("rocksdb.error.handler.autoresume.count"), ErrorHandlerAutoresumeRetryTotalCount("rocksdb.error.handler.autoresume.retry.total.count"), ErrorHandlerAutoresumeSuccessCount("rocksdb.error.handler.autoresume.success.count"), MemtablePayloadBytesAtFlush("rocksdb.memtable.payload.bytes.at.flush"), MemtableGarbageBytesAtFlush("rocksdb.memtable.garbage.bytes.at.flush"), VerifyChecksumReadBytes("rocksdb.verify_checksum.read.bytes"), BackupReadBytes("rocksdb.backup.read.bytes"), BackupWriteBytes("rocksdb.backup.write.bytes"), RemoteCompactReadBytes("rocksdb.remote.compact.read.bytes"), RemoteCompactWriteBytes("rocksdb.remote.compact.write.bytes"), HotFileReadBytes("rocksdb.hot.file.read.bytes"), WarmFileReadBytes("rocksdb.warm.file.read.bytes"), ColdFileReadBytes("rocksdb.cold.file.read.bytes"), HotFileReadCount("rocksdb.hot.file.read.count"), WarmFileReadCount("rocksdb.warm.file.read.count"), ColdFileReadCount("rocksdb.cold.file.read.count"), LastLevelReadBytes("rocksdb.last.level.read.bytes"), LastLevelReadCount("rocksdb.last.level.read.count"), NonLastLevelReadBytes("rocksdb.non.last.level.read.bytes"), NonLastLevelReadCount("rocksdb.non.last.level.read.count"), LastLevelSeekFiltered("rocksdb.last.level.seek.filtered"), LastLevelSeekFilterMatch("rocksdb.last.level.seek.filter.match"), LastLevelSeekData("rocksdb.last.level.seek.data"), LastLevelSeekDataUsefulNoFilter("rocksdb.last.level.seek.data.useful.no.filter"), LastLevelSeekDataUsefulFilterMatch("rocksdb.last.level.seek.data.useful.filter.match"), NonLastLevelSeekFiltered("rocksdb.non.last.level.seek.filtered"), NonLastLevelSeekFilterMatch("rocksdb.non.last.level.seek.filter.match"), NonLastLevelSeekData("rocksdb.non.last.level.seek.data"), NonLastLevelSeekDataUsefulNoFilter("rocksdb.non.last.level.seek.data.useful.no.filter"), NonLastLevelSeekDataUsefulFilterMatch("rocksdb.non.last.level.seek.data.useful.filter.match"), BlockChecksumComputeCount("rocksdb.block.checksum.compute.count"), BlockChecksumMismatchCount("rocksdb.block.checksum.mismatch.count"), MultigetCoroutineCount("rocksdb.multiget.coroutine.count"), ReadAsyncMicros("rocksdb.read.async.micros"), AsyncReadErrorCount("rocksdb.async.read.error.count"), TableOpenPrefetchTailMiss("rocksdb.table.open.prefetch.tail.miss"), TableOpenPrefetchTailHit("rocksdb.table.open.prefetch.tail.hit"), TimestampFilterTableChecked("rocksdb.timestamp.filter.table.checked"), TimestampFilterTableFiltered("rocksdb.timestamp.filter.table.filtered"), ReadaheadTrimmed("rocksdb.readahead.trimmed"), FifoMaxSizeCompactions("rocksdb.fifo.max.size.compactions"), FifoTtlCompactions("rocksdb.fifo.ttl.compactions"), PrefetchBytes("rocksdb.prefetch.bytes"), PrefetchBytesUseful("rocksdb.prefetch.bytes.useful"), PrefetchHits("rocksdb.prefetch.hits"), } } rocksdb-0.23.0/src/transactions/mod.rs000064400000000000000000000015751046102023000157710ustar 00000000000000// Copyright 2021 Yiyuan Liu // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. // mod optimistic_transaction_db; mod options; mod transaction; mod transaction_db; pub use optimistic_transaction_db::OptimisticTransactionDB; pub use options::{OptimisticTransactionOptions, TransactionDBOptions, TransactionOptions}; pub use transaction::Transaction; pub use transaction_db::TransactionDB; rocksdb-0.23.0/src/transactions/optimistic_transaction_db.rs000064400000000000000000000263461046102023000224530ustar 00000000000000// Copyright 2021 Yiyuan Liu // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. // use std::{collections::BTreeMap, ffi::CString, fs, iter, marker::PhantomData, path::Path, ptr}; use libc::{c_char, c_int, size_t}; use crate::column_family::ColumnFamilyTtl; use crate::{ db::{DBCommon, DBInner}, ffi, ffi_util::to_cpath, write_batch::WriteBatchWithTransaction, AsColumnFamilyRef, ColumnFamilyDescriptor, Error, OptimisticTransactionOptions, Options, ThreadMode, Transaction, WriteOptions, DEFAULT_COLUMN_FAMILY_NAME, }; /// A type alias to RocksDB Optimistic Transaction DB. /// /// Please read the official /// [guide](https://github.com/facebook/rocksdb/wiki/Transactions#optimistictransactiondb) /// to learn more about RocksDB OptimisticTransactionDB. /// /// The default thread mode for [`OptimisticTransactionDB`] is [`SingleThreaded`] /// if feature `multi-threaded-cf` is not enabled. /// /// See [`DBCommon`] for full list of methods. /// /// # Examples /// /// ``` /// use rocksdb::{DB, Options, OptimisticTransactionDB, SingleThreaded}; /// let tempdir = tempfile::Builder::new() /// .prefix("_path_for_optimistic_transaction_db") /// .tempdir() /// .expect("Failed to create temporary path for the _path_for_optimistic_transaction_db"); /// let path = tempdir.path(); /// { /// let db: OptimisticTransactionDB = OptimisticTransactionDB::open_default(path).unwrap(); /// db.put(b"my key", b"my value").unwrap(); /// /// // create transaction /// let txn = db.transaction(); /// txn.put(b"key2", b"value2"); /// txn.put(b"key3", b"value3"); /// txn.commit().unwrap(); /// } /// let _ = DB::destroy(&Options::default(), path); /// ``` /// /// [`SingleThreaded`]: crate::SingleThreaded #[cfg(not(feature = "multi-threaded-cf"))] pub type OptimisticTransactionDB = DBCommon; #[cfg(feature = "multi-threaded-cf")] pub type OptimisticTransactionDB = DBCommon; pub struct OptimisticTransactionDBInner { base: *mut ffi::rocksdb_t, db: *mut ffi::rocksdb_optimistictransactiondb_t, } impl DBInner for OptimisticTransactionDBInner { fn inner(&self) -> *mut ffi::rocksdb_t { self.base } } impl Drop for OptimisticTransactionDBInner { fn drop(&mut self) { unsafe { ffi::rocksdb_optimistictransactiondb_close_base_db(self.base); ffi::rocksdb_optimistictransactiondb_close(self.db); } } } /// Methods of `OptimisticTransactionDB`. impl OptimisticTransactionDB { /// Opens a database with default options. pub fn open_default>(path: P) -> Result { let mut opts = Options::default(); opts.create_if_missing(true); Self::open(&opts, path) } /// Opens the database with the specified options. pub fn open>(opts: &Options, path: P) -> Result { Self::open_cf(opts, path, None::<&str>) } /// Opens a database with the given database options and column family names. /// /// Column families opened using this function will be created with default `Options`. /// *NOTE*: `default` column family will be opened with the `Options::default()`. /// If you want to open `default` column family with custom options, use `open_cf_descriptors` and /// provide a `ColumnFamilyDescriptor` with the desired options. pub fn open_cf(opts: &Options, path: P, cfs: I) -> Result where P: AsRef, I: IntoIterator, N: AsRef, { let cfs = cfs .into_iter() .map(|name| ColumnFamilyDescriptor::new(name.as_ref(), Options::default())); Self::open_cf_descriptors_internal(opts, path, cfs) } /// Opens a database with the given database options and column family descriptors. pub fn open_cf_descriptors(opts: &Options, path: P, cfs: I) -> Result where P: AsRef, I: IntoIterator, { Self::open_cf_descriptors_internal(opts, path, cfs) } /// Internal implementation for opening RocksDB. fn open_cf_descriptors_internal(opts: &Options, path: P, cfs: I) -> Result where P: AsRef, I: IntoIterator, { let cfs: Vec<_> = cfs.into_iter().collect(); let outlive = iter::once(opts.outlive.clone()) .chain(cfs.iter().map(|cf| cf.options.outlive.clone())) .collect(); let cpath = to_cpath(&path)?; if let Err(e) = fs::create_dir_all(&path) { return Err(Error::new(format!( "Failed to create RocksDB directory: `{e:?}`." ))); } let db: *mut ffi::rocksdb_optimistictransactiondb_t; let mut cf_map = BTreeMap::new(); if cfs.is_empty() { db = Self::open_raw(opts, &cpath)?; } else { let mut cfs_v = cfs; // Always open the default column family. if !cfs_v.iter().any(|cf| cf.name == DEFAULT_COLUMN_FAMILY_NAME) { cfs_v.push(ColumnFamilyDescriptor { name: String::from(DEFAULT_COLUMN_FAMILY_NAME), options: Options::default(), ttl: ColumnFamilyTtl::SameAsDb, }); } // We need to store our CStrings in an intermediate vector // so that their pointers remain valid. let c_cfs: Vec = cfs_v .iter() .map(|cf| CString::new(cf.name.as_bytes()).unwrap()) .collect(); let cfnames: Vec<_> = c_cfs.iter().map(|cf| cf.as_ptr()).collect(); // These handles will be populated by DB. let mut cfhandles: Vec<_> = cfs_v.iter().map(|_| ptr::null_mut()).collect(); let cfopts: Vec<_> = cfs_v .iter() .map(|cf| cf.options.inner.cast_const()) .collect(); db = Self::open_cf_raw(opts, &cpath, &cfs_v, &cfnames, &cfopts, &mut cfhandles)?; for handle in &cfhandles { if handle.is_null() { return Err(Error::new( "Received null column family handle from DB.".to_owned(), )); } } for (cf_desc, inner) in cfs_v.iter().zip(cfhandles) { cf_map.insert(cf_desc.name.clone(), inner); } } if db.is_null() { return Err(Error::new("Could not initialize database.".to_owned())); } let base = unsafe { ffi::rocksdb_optimistictransactiondb_get_base_db(db) }; if base.is_null() { unsafe { ffi::rocksdb_optimistictransactiondb_close(db); } return Err(Error::new("Could not initialize database.".to_owned())); } let inner = OptimisticTransactionDBInner { base, db }; Ok(Self::new( inner, T::new_cf_map_internal(cf_map), path.as_ref().to_path_buf(), outlive, )) } fn open_raw( opts: &Options, cpath: &CString, ) -> Result<*mut ffi::rocksdb_optimistictransactiondb_t, Error> { unsafe { let db = ffi_try!(ffi::rocksdb_optimistictransactiondb_open( opts.inner, cpath.as_ptr() )); Ok(db) } } fn open_cf_raw( opts: &Options, cpath: &CString, cfs_v: &[ColumnFamilyDescriptor], cfnames: &[*const c_char], cfopts: &[*const ffi::rocksdb_options_t], cfhandles: &mut [*mut ffi::rocksdb_column_family_handle_t], ) -> Result<*mut ffi::rocksdb_optimistictransactiondb_t, Error> { unsafe { let db = ffi_try!(ffi::rocksdb_optimistictransactiondb_open_column_families( opts.inner, cpath.as_ptr(), cfs_v.len() as c_int, cfnames.as_ptr(), cfopts.as_ptr(), cfhandles.as_mut_ptr(), )); Ok(db) } } /// Creates a transaction with default options. pub fn transaction(&self) -> Transaction { self.transaction_opt( &WriteOptions::default(), &OptimisticTransactionOptions::default(), ) } /// Creates a transaction with default options. pub fn transaction_opt( &self, writeopts: &WriteOptions, otxn_opts: &OptimisticTransactionOptions, ) -> Transaction { Transaction { inner: unsafe { ffi::rocksdb_optimistictransaction_begin( self.inner.db, writeopts.inner, otxn_opts.inner, std::ptr::null_mut(), ) }, _marker: PhantomData, } } pub fn write_opt( &self, batch: WriteBatchWithTransaction, writeopts: &WriteOptions, ) -> Result<(), Error> { unsafe { ffi_try!(ffi::rocksdb_optimistictransactiondb_write( self.inner.db, writeopts.inner, batch.inner )); } Ok(()) } pub fn write(&self, batch: WriteBatchWithTransaction) -> Result<(), Error> { self.write_opt(batch, &WriteOptions::default()) } pub fn write_without_wal(&self, batch: WriteBatchWithTransaction) -> Result<(), Error> { let mut wo = WriteOptions::new(); wo.disable_wal(true); self.write_opt(batch, &wo) } /// Removes the database entries in the range `["from", "to")` using given write options. pub fn delete_range_cf_opt>( &self, cf: &impl AsColumnFamilyRef, from: K, to: K, writeopts: &WriteOptions, ) -> Result<(), Error> { let from = from.as_ref(); let to = to.as_ref(); unsafe { ffi_try!(ffi::rocksdb_delete_range_cf( self.inner.inner(), writeopts.inner, cf.inner(), from.as_ptr() as *const c_char, from.len() as size_t, to.as_ptr() as *const c_char, to.len() as size_t, )); Ok(()) } } /// Removes the database entries in the range `["from", "to")` using default write options. pub fn delete_range_cf>( &self, cf: &impl AsColumnFamilyRef, from: K, to: K, ) -> Result<(), Error> { self.delete_range_cf_opt(cf, from, to, &WriteOptions::default()) } } rocksdb-0.23.0/src/transactions/options.rs000064400000000000000000000237021046102023000167010ustar 00000000000000// Copyright 2021 Yiyuan Liu // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. // use crate::ffi; pub struct TransactionOptions { pub(crate) inner: *mut ffi::rocksdb_transaction_options_t, } unsafe impl Send for TransactionOptions {} unsafe impl Sync for TransactionOptions {} impl Default for TransactionOptions { fn default() -> Self { let txn_opts = unsafe { ffi::rocksdb_transaction_options_create() }; assert!( !txn_opts.is_null(), "Could not create RocksDB transaction options" ); Self { inner: txn_opts } } } impl TransactionOptions { pub fn new() -> TransactionOptions { TransactionOptions::default() } pub fn set_skip_prepare(&mut self, skip_prepare: bool) { unsafe { ffi::rocksdb_transaction_options_set_set_snapshot(self.inner, u8::from(skip_prepare)); } } /// Specifies use snapshot or not. /// /// Default: false. /// /// If a transaction has a snapshot set, the transaction will ensure that /// any keys successfully written(or fetched via `get_for_update`) have not /// been modified outside this transaction since the time the snapshot was /// set. /// If a snapshot has not been set, the transaction guarantees that keys have /// not been modified since the time each key was first written (or fetched via /// `get_for_update`). /// /// Using snapshot will provide stricter isolation guarantees at the /// expense of potentially more transaction failures due to conflicts with /// other writes. /// /// Calling `set_snapshot` will not affect the version of Data returned by `get` /// methods. pub fn set_snapshot(&mut self, snapshot: bool) { unsafe { ffi::rocksdb_transaction_options_set_set_snapshot(self.inner, u8::from(snapshot)); } } /// Specifies whether detect deadlock or not. /// /// Setting to true means that before acquiring locks, this transaction will /// check if doing so will cause a deadlock. If so, it will return with /// Status::Busy. The user should retry their transaction. /// /// Default: false. pub fn set_deadlock_detect(&mut self, deadlock_detect: bool) { unsafe { ffi::rocksdb_transaction_options_set_deadlock_detect( self.inner, u8::from(deadlock_detect), ); } } /// Specifies the wait timeout in milliseconds when a transaction attempts to lock a key. /// /// If 0, no waiting is done if a lock cannot instantly be acquired. /// If negative, transaction lock timeout in `TransactionDBOptions` will be used. /// /// Default: -1. pub fn set_lock_timeout(&mut self, lock_timeout: i64) { unsafe { ffi::rocksdb_transaction_options_set_lock_timeout(self.inner, lock_timeout); } } /// Specifies expiration duration in milliseconds. /// /// If non-negative, transactions that last longer than this many milliseconds will fail to commit. /// If not set, a forgotten transaction that is never committed, rolled back, or deleted /// will never relinquish any locks it holds. This could prevent keys from being by other writers. /// /// Default: -1. pub fn set_expiration(&mut self, expiration: i64) { unsafe { ffi::rocksdb_transaction_options_set_expiration(self.inner, expiration); } } /// Specifies the number of traversals to make during deadlock detection. /// /// Default: 50. pub fn set_deadlock_detect_depth(&mut self, depth: i64) { unsafe { ffi::rocksdb_transaction_options_set_deadlock_detect_depth(self.inner, depth); } } /// Specifies the maximum number of bytes used for the write batch. 0 means no limit. /// /// Default: 0. pub fn set_max_write_batch_size(&mut self, size: usize) { unsafe { ffi::rocksdb_transaction_options_set_max_write_batch_size(self.inner, size); } } } impl Drop for TransactionOptions { fn drop(&mut self) { unsafe { ffi::rocksdb_transaction_options_destroy(self.inner); } } } pub struct TransactionDBOptions { pub(crate) inner: *mut ffi::rocksdb_transactiondb_options_t, } unsafe impl Send for TransactionDBOptions {} unsafe impl Sync for TransactionDBOptions {} impl Default for TransactionDBOptions { fn default() -> Self { let txn_db_opts = unsafe { ffi::rocksdb_transactiondb_options_create() }; assert!( !txn_db_opts.is_null(), "Could not create RocksDB transaction_db options" ); Self { inner: txn_db_opts } } } impl TransactionDBOptions { pub fn new() -> TransactionDBOptions { TransactionDBOptions::default() } /// Specifies the wait timeout in milliseconds when writing a key /// outside a transaction (i.e. by calling `TransactionDB::put` directly). /// /// If 0, no waiting is done if a lock cannot instantly be acquired. /// If negative, there is no timeout and will block indefinitely when acquiring /// a lock. /// /// Not using a timeout can lead to deadlocks. Currently, there /// is no deadlock-detection to recover from a deadlock. While DB writes /// cannot deadlock with other DB writes, they can deadlock with a transaction. /// A negative timeout should only be used if all transactions have a small /// expiration set. /// /// Default: 1000(1s). pub fn set_default_lock_timeout(&mut self, default_lock_timeout: i64) { unsafe { ffi::rocksdb_transactiondb_options_set_default_lock_timeout( self.inner, default_lock_timeout, ); } } /// Specifies the default wait timeout in milliseconds when a transaction /// attempts to lock a key if not specified in `TransactionOptions`. /// /// If 0, no waiting is done if a lock cannot instantly be acquired. /// If negative, there is no timeout. Not using a timeout is not recommended /// as it can lead to deadlocks. Currently, there is no deadlock-detection to /// recover from a deadlock. /// /// Default: 1000(1s). pub fn set_txn_lock_timeout(&mut self, txn_lock_timeout: i64) { unsafe { ffi::rocksdb_transactiondb_options_set_transaction_lock_timeout( self.inner, txn_lock_timeout, ); } } /// Specifies the maximum number of keys that can be locked at the same time /// per column family. /// /// If the number of locked keys is greater than `max_num_locks`, transaction /// `writes` (or `get_for_update`) will return an error. /// If this value is not positive, no limit will be enforced. /// /// Default: -1. pub fn set_max_num_locks(&mut self, max_num_locks: i64) { unsafe { ffi::rocksdb_transactiondb_options_set_max_num_locks(self.inner, max_num_locks); } } /// Specifies lock table stripes count. /// /// Increasing this value will increase the concurrency by dividing the lock /// table (per column family) into more sub-tables, each with their own /// separate mutex. /// /// Default: 16. pub fn set_num_stripes(&mut self, num_stripes: usize) { unsafe { ffi::rocksdb_transactiondb_options_set_num_stripes(self.inner, num_stripes); } } } impl Drop for TransactionDBOptions { fn drop(&mut self) { unsafe { ffi::rocksdb_transactiondb_options_destroy(self.inner); } } } pub struct OptimisticTransactionOptions { pub(crate) inner: *mut ffi::rocksdb_optimistictransaction_options_t, } unsafe impl Send for OptimisticTransactionOptions {} unsafe impl Sync for OptimisticTransactionOptions {} impl Default for OptimisticTransactionOptions { fn default() -> Self { let txn_opts = unsafe { ffi::rocksdb_optimistictransaction_options_create() }; assert!( !txn_opts.is_null(), "Could not create RocksDB optimistic transaction options" ); Self { inner: txn_opts } } } impl OptimisticTransactionOptions { pub fn new() -> OptimisticTransactionOptions { OptimisticTransactionOptions::default() } /// Specifies use snapshot or not. /// /// Default: false. /// /// If a transaction has a snapshot set, the transaction will ensure that /// any keys successfully written(or fetched via `get_for_update`) have not /// been modified outside the transaction since the time the snapshot was /// set. /// If a snapshot has not been set, the transaction guarantees that keys have /// not been modified since the time each key was first written (or fetched via /// `get_for_update`). /// /// Using snapshot will provide stricter isolation guarantees at the /// expense of potentially more transaction failures due to conflicts with /// other writes. /// /// Calling `set_snapshot` will not affect the version of Data returned by `get` /// methods. pub fn set_snapshot(&mut self, snapshot: bool) { unsafe { ffi::rocksdb_optimistictransaction_options_set_set_snapshot( self.inner, u8::from(snapshot), ); } } } impl Drop for OptimisticTransactionOptions { fn drop(&mut self) { unsafe { ffi::rocksdb_optimistictransaction_options_destroy(self.inner); } } } rocksdb-0.23.0/src/transactions/transaction.rs000064400000000000000000000744371046102023000175460ustar 00000000000000// Copyright 2021 Yiyuan Liu // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. // use std::{marker::PhantomData, ptr}; use crate::{ db::{convert_values, DBAccess}, ffi, AsColumnFamilyRef, DBIteratorWithThreadMode, DBPinnableSlice, DBRawIteratorWithThreadMode, Direction, Error, IteratorMode, ReadOptions, SnapshotWithThreadMode, WriteBatchWithTransaction, }; use libc::{c_char, c_void, size_t}; /// RocksDB Transaction. /// /// To use transactions, you must first create a [`TransactionDB`] or [`OptimisticTransactionDB`]. /// /// [`TransactionDB`]: crate::TransactionDB /// [`OptimisticTransactionDB`]: crate::OptimisticTransactionDB pub struct Transaction<'db, DB> { pub(crate) inner: *mut ffi::rocksdb_transaction_t, pub(crate) _marker: PhantomData<&'db DB>, } unsafe impl<'db, DB> Send for Transaction<'db, DB> {} impl<'db, DB> DBAccess for Transaction<'db, DB> { unsafe fn create_snapshot(&self) -> *const ffi::rocksdb_snapshot_t { ffi::rocksdb_transaction_get_snapshot(self.inner) } unsafe fn release_snapshot(&self, snapshot: *const ffi::rocksdb_snapshot_t) { ffi::rocksdb_free(snapshot as *mut c_void); } unsafe fn create_iterator(&self, readopts: &ReadOptions) -> *mut ffi::rocksdb_iterator_t { ffi::rocksdb_transaction_create_iterator(self.inner, readopts.inner) } unsafe fn create_iterator_cf( &self, cf_handle: *mut ffi::rocksdb_column_family_handle_t, readopts: &ReadOptions, ) -> *mut ffi::rocksdb_iterator_t { ffi::rocksdb_transaction_create_iterator_cf(self.inner, readopts.inner, cf_handle) } fn get_opt>( &self, key: K, readopts: &ReadOptions, ) -> Result>, Error> { self.get_opt(key, readopts) } fn get_cf_opt>( &self, cf: &impl AsColumnFamilyRef, key: K, readopts: &ReadOptions, ) -> Result>, Error> { self.get_cf_opt(cf, key, readopts) } fn get_pinned_opt>( &self, key: K, readopts: &ReadOptions, ) -> Result, Error> { self.get_pinned_opt(key, readopts) } fn get_pinned_cf_opt>( &self, cf: &impl AsColumnFamilyRef, key: K, readopts: &ReadOptions, ) -> Result, Error> { self.get_pinned_cf_opt(cf, key, readopts) } fn multi_get_opt( &self, keys: I, readopts: &ReadOptions, ) -> Vec>, Error>> where K: AsRef<[u8]>, I: IntoIterator, { self.multi_get_opt(keys, readopts) } fn multi_get_cf_opt<'b, K, I, W>( &self, keys_cf: I, readopts: &ReadOptions, ) -> Vec>, Error>> where K: AsRef<[u8]>, I: IntoIterator, W: AsColumnFamilyRef + 'b, { self.multi_get_cf_opt(keys_cf, readopts) } } impl<'db, DB> Transaction<'db, DB> { /// Write all batched keys to the DB atomically. /// /// May return any error that could be returned by `DB::write`. /// /// If this transaction was created by a [`TransactionDB`], an error of /// the [`Expired`] kind may be returned if this transaction has /// lived longer than expiration time in [`TransactionOptions`]. /// /// If this transaction was created by an [`OptimisticTransactionDB`], an error of /// the [`Busy`] kind may be returned if the transaction /// could not guarantee that there are no write conflicts. /// An error of the [`TryAgain`] kind may be returned if the memtable /// history size is not large enough (see [`Options::set_max_write_buffer_size_to_maintain`]). /// /// [`Expired`]: crate::ErrorKind::Expired /// [`TransactionOptions`]: crate::TransactionOptions /// [`TransactionDB`]: crate::TransactionDB /// [`OptimisticTransactionDB`]: crate::OptimisticTransactionDB /// [`Busy`]: crate::ErrorKind::Busy /// [`TryAgain`]: crate::ErrorKind::TryAgain /// [`Options::set_max_write_buffer_size_to_maintain`]: crate::Options::set_max_write_buffer_size_to_maintain pub fn commit(self) -> Result<(), Error> { unsafe { ffi_try!(ffi::rocksdb_transaction_commit(self.inner)); } Ok(()) } pub fn set_name(&self, name: &[u8]) -> Result<(), Error> { let ptr = name.as_ptr(); let len = name.len(); unsafe { ffi_try!(ffi::rocksdb_transaction_set_name( self.inner, ptr as _, len as _ )); } Ok(()) } pub fn get_name(&self) -> Option> { unsafe { let mut name_len = 0; let name = ffi::rocksdb_transaction_get_name(self.inner, &mut name_len); if name.is_null() { None } else { let mut vec = vec![0; name_len]; std::ptr::copy_nonoverlapping(name as *mut u8, vec.as_mut_ptr(), name_len); ffi::rocksdb_free(name as *mut c_void); Some(vec) } } } pub fn prepare(&self) -> Result<(), Error> { unsafe { ffi_try!(ffi::rocksdb_transaction_prepare(self.inner)); } Ok(()) } /// Returns snapshot associated with transaction if snapshot was enabled in [`TransactionOptions`]. /// Otherwise, returns a snapshot with `nullptr` inside which doesn't affect read operations. /// /// [`TransactionOptions`]: crate::TransactionOptions pub fn snapshot(&self) -> SnapshotWithThreadMode { SnapshotWithThreadMode::new(self) } /// Discard all batched writes in this transaction. pub fn rollback(&self) -> Result<(), Error> { unsafe { ffi_try!(ffi::rocksdb_transaction_rollback(self.inner)); Ok(()) } } /// Record the state of the transaction for future calls to [`rollback_to_savepoint`]. /// May be called multiple times to set multiple save points. /// /// [`rollback_to_savepoint`]: Self::rollback_to_savepoint pub fn set_savepoint(&self) { unsafe { ffi::rocksdb_transaction_set_savepoint(self.inner); } } /// Undo all operations in this transaction since the most recent call to [`set_savepoint`] /// and removes the most recent [`set_savepoint`]. /// /// Returns error if there is no previous call to [`set_savepoint`]. /// /// [`set_savepoint`]: Self::set_savepoint pub fn rollback_to_savepoint(&self) -> Result<(), Error> { unsafe { ffi_try!(ffi::rocksdb_transaction_rollback_to_savepoint(self.inner)); Ok(()) } } /// Get the bytes associated with a key value. /// /// See [`get_cf_opt`] for details. /// /// [`get_cf_opt`]: Self::get_cf_opt pub fn get>(&self, key: K) -> Result>, Error> { self.get_opt(key, &ReadOptions::default()) } pub fn get_pinned>(&self, key: K) -> Result, Error> { self.get_pinned_opt(key, &ReadOptions::default()) } /// Get the bytes associated with a key value and the given column family. /// /// See [`get_cf_opt`] for details. /// /// [`get_cf_opt`]: Self::get_cf_opt pub fn get_cf>( &self, cf: &impl AsColumnFamilyRef, key: K, ) -> Result>, Error> { self.get_cf_opt(cf, key, &ReadOptions::default()) } pub fn get_pinned_cf>( &self, cf: &impl AsColumnFamilyRef, key: K, ) -> Result, Error> { self.get_pinned_cf_opt(cf, key, &ReadOptions::default()) } /// Get the key and ensure that this transaction will only /// be able to be committed if this key is not written outside this /// transaction after it has first been read (or after the snapshot if a /// snapshot is set in this transaction). /// /// See [`get_for_update_cf_opt`] for details. /// /// [`get_for_update_cf_opt`]: Self::get_for_update_cf_opt pub fn get_for_update>( &self, key: K, exclusive: bool, ) -> Result>, Error> { self.get_for_update_opt(key, exclusive, &ReadOptions::default()) } pub fn get_pinned_for_update>( &self, key: K, exclusive: bool, ) -> Result, Error> { self.get_pinned_for_update_opt(key, exclusive, &ReadOptions::default()) } /// Get the key in the given column family and ensure that this transaction will only /// be able to be committed if this key is not written outside this /// transaction after it has first been read (or after the snapshot if a /// snapshot is set in this transaction). /// /// See [`get_for_update_cf_opt`] for details. /// /// [`get_for_update_cf_opt`]: Self::get_for_update_cf_opt pub fn get_for_update_cf>( &self, cf: &impl AsColumnFamilyRef, key: K, exclusive: bool, ) -> Result>, Error> { self.get_for_update_cf_opt(cf, key, exclusive, &ReadOptions::default()) } pub fn get_pinned_for_update_cf>( &self, cf: &impl AsColumnFamilyRef, key: K, exclusive: bool, ) -> Result, Error> { self.get_pinned_for_update_cf_opt(cf, key, exclusive, &ReadOptions::default()) } /// Returns the bytes associated with a key value with read options. /// /// See [`get_cf_opt`] for details. /// /// [`get_cf_opt`]: Self::get_cf_opt pub fn get_opt>( &self, key: K, readopts: &ReadOptions, ) -> Result>, Error> { self.get_pinned_opt(key, readopts) .map(|x| x.map(|v| v.as_ref().to_vec())) } pub fn get_pinned_opt>( &self, key: K, readopts: &ReadOptions, ) -> Result, Error> { let key = key.as_ref(); unsafe { let val = ffi_try!(ffi::rocksdb_transaction_get_pinned( self.inner, readopts.inner, key.as_ptr() as *const c_char, key.len(), )); if val.is_null() { Ok(None) } else { Ok(Some(DBPinnableSlice::from_c(val))) } } } /// Get the bytes associated with a key value and the given column family with read options. /// /// This function will also read pending changes in this transaction. /// Currently, this function will return an error of the [`MergeInProgress`] kind /// if the most recent write to the queried key in this batch is a Merge. /// /// [`MergeInProgress`]: crate::ErrorKind::MergeInProgress pub fn get_cf_opt>( &self, cf: &impl AsColumnFamilyRef, key: K, readopts: &ReadOptions, ) -> Result>, Error> { self.get_pinned_cf_opt(cf, key, readopts) .map(|x| x.map(|v| v.as_ref().to_vec())) } pub fn get_pinned_cf_opt>( &self, cf: &impl AsColumnFamilyRef, key: K, readopts: &ReadOptions, ) -> Result, Error> { let key = key.as_ref(); unsafe { let val = ffi_try!(ffi::rocksdb_transaction_get_pinned_cf( self.inner, readopts.inner, cf.inner(), key.as_ptr() as *const c_char, key.len(), )); if val.is_null() { Ok(None) } else { Ok(Some(DBPinnableSlice::from_c(val))) } } } /// Get the key with read options and ensure that this transaction will only /// be able to be committed if this key is not written outside this /// transaction after it has first been read (or after the snapshot if a /// snapshot is set in this transaction). /// /// See [`get_for_update_cf_opt`] for details. /// /// [`get_for_update_cf_opt`]: Self::get_for_update_cf_opt pub fn get_for_update_opt>( &self, key: K, exclusive: bool, opts: &ReadOptions, ) -> Result>, Error> { self.get_pinned_for_update_opt(key, exclusive, opts) .map(|x| x.map(|v| v.as_ref().to_vec())) } pub fn get_pinned_for_update_opt>( &self, key: K, exclusive: bool, opts: &ReadOptions, ) -> Result, Error> { let key = key.as_ref(); unsafe { let val = ffi_try!(ffi::rocksdb_transaction_get_pinned_for_update( self.inner, opts.inner, key.as_ptr() as *const c_char, key.len() as size_t, u8::from(exclusive), )); if val.is_null() { Ok(None) } else { Ok(Some(DBPinnableSlice::from_c(val))) } } } /// Get the key in the given column family with read options /// and ensure that this transaction will only /// be able to be committed if this key is not written outside this /// transaction after it has first been read (or after the snapshot if a /// snapshot is set in this transaction). /// /// Currently, this function will return an error of the [`MergeInProgress`] /// if the most recent write to the queried key in this batch is a Merge. /// /// If this transaction was created by a [`TransactionDB`], it can return error of kind: /// * [`Busy`] if there is a write conflict. /// * [`TimedOut`] if a lock could not be acquired. /// * [`TryAgain`] if the memtable history size is not large enough. /// * [`MergeInProgress`] if merge operations cannot be resolved. /// * or other errors if this key could not be read. /// /// If this transaction was created by an `[OptimisticTransactionDB]`, `get_for_update_opt` /// can cause [`commit`] to fail. Otherwise, it could return any error that could /// be returned by `[DB::get]`. /// /// [`Busy`]: crate::ErrorKind::Busy /// [`TimedOut`]: crate::ErrorKind::TimedOut /// [`TryAgain`]: crate::ErrorKind::TryAgain /// [`MergeInProgress`]: crate::ErrorKind::MergeInProgress /// [`TransactionDB`]: crate::TransactionDB /// [`OptimisticTransactionDB`]: crate::OptimisticTransactionDB /// [`commit`]: Self::commit /// [`DB::get`]: crate::DB::get pub fn get_for_update_cf_opt>( &self, cf: &impl AsColumnFamilyRef, key: K, exclusive: bool, opts: &ReadOptions, ) -> Result>, Error> { self.get_pinned_for_update_cf_opt(cf, key, exclusive, opts) .map(|x| x.map(|v| v.as_ref().to_vec())) } pub fn get_pinned_for_update_cf_opt>( &self, cf: &impl AsColumnFamilyRef, key: K, exclusive: bool, opts: &ReadOptions, ) -> Result, Error> { let key = key.as_ref(); unsafe { let val = ffi_try!(ffi::rocksdb_transaction_get_pinned_for_update_cf( self.inner, opts.inner, cf.inner(), key.as_ptr() as *const c_char, key.len() as size_t, u8::from(exclusive), )); if val.is_null() { Ok(None) } else { Ok(Some(DBPinnableSlice::from_c(val))) } } } /// Return the values associated with the given keys. pub fn multi_get(&self, keys: I) -> Vec>, Error>> where K: AsRef<[u8]>, I: IntoIterator, { self.multi_get_opt(keys, &ReadOptions::default()) } /// Return the values associated with the given keys using read options. pub fn multi_get_opt( &self, keys: I, readopts: &ReadOptions, ) -> Vec>, Error>> where K: AsRef<[u8]>, I: IntoIterator, { let (keys, keys_sizes): (Vec>, Vec<_>) = keys .into_iter() .map(|key| { let key = key.as_ref(); (Box::from(key), key.len()) }) .unzip(); let ptr_keys: Vec<_> = keys.iter().map(|k| k.as_ptr() as *const c_char).collect(); let mut values = vec![ptr::null_mut(); keys.len()]; let mut values_sizes = vec![0_usize; keys.len()]; let mut errors = vec![ptr::null_mut(); keys.len()]; unsafe { ffi::rocksdb_transaction_multi_get( self.inner, readopts.inner, ptr_keys.len(), ptr_keys.as_ptr(), keys_sizes.as_ptr(), values.as_mut_ptr(), values_sizes.as_mut_ptr(), errors.as_mut_ptr(), ); } convert_values(values, values_sizes, errors) } /// Return the values associated with the given keys and column families. pub fn multi_get_cf<'a, 'b: 'a, K, I, W>( &'a self, keys: I, ) -> Vec>, Error>> where K: AsRef<[u8]>, I: IntoIterator, W: 'b + AsColumnFamilyRef, { self.multi_get_cf_opt(keys, &ReadOptions::default()) } /// Return the values associated with the given keys and column families using read options. pub fn multi_get_cf_opt<'a, 'b: 'a, K, I, W>( &'a self, keys: I, readopts: &ReadOptions, ) -> Vec>, Error>> where K: AsRef<[u8]>, I: IntoIterator, W: 'b + AsColumnFamilyRef, { let (cfs_and_keys, keys_sizes): (Vec<(_, Box<[u8]>)>, Vec<_>) = keys .into_iter() .map(|(cf, key)| { let key = key.as_ref(); ((cf, Box::from(key)), key.len()) }) .unzip(); let ptr_keys: Vec<_> = cfs_and_keys .iter() .map(|(_, k)| k.as_ptr() as *const c_char) .collect(); let ptr_cfs: Vec<_> = cfs_and_keys .iter() .map(|(c, _)| c.inner().cast_const()) .collect(); let mut values = vec![ptr::null_mut(); ptr_keys.len()]; let mut values_sizes = vec![0_usize; ptr_keys.len()]; let mut errors = vec![ptr::null_mut(); ptr_keys.len()]; unsafe { ffi::rocksdb_transaction_multi_get_cf( self.inner, readopts.inner, ptr_cfs.as_ptr(), ptr_keys.len(), ptr_keys.as_ptr(), keys_sizes.as_ptr(), values.as_mut_ptr(), values_sizes.as_mut_ptr(), errors.as_mut_ptr(), ); } convert_values(values, values_sizes, errors) } /// Put the key value in default column family and do conflict checking on the key. /// /// See [`put_cf`] for details. /// /// [`put_cf`]: Self::put_cf pub fn put, V: AsRef<[u8]>>(&self, key: K, value: V) -> Result<(), Error> { let key = key.as_ref(); let value = value.as_ref(); unsafe { ffi_try!(ffi::rocksdb_transaction_put( self.inner, key.as_ptr() as *const c_char, key.len() as size_t, value.as_ptr() as *const c_char, value.len() as size_t, )); Ok(()) } } /// Put the key value in the given column family and do conflict checking on the key. /// /// If this transaction was created by a [`TransactionDB`], it can return error of kind: /// * [`Busy`] if there is a write conflict. /// * [`TimedOut`] if a lock could not be acquired. /// * [`TryAgain`] if the memtable history size is not large enough. /// * [`MergeInProgress`] if merge operations cannot be resolved. /// * or other errors on unexpected failures. /// /// [`Busy`]: crate::ErrorKind::Busy /// [`TimedOut`]: crate::ErrorKind::TimedOut /// [`TryAgain`]: crate::ErrorKind::TryAgain /// [`MergeInProgress`]: crate::ErrorKind::MergeInProgress /// [`TransactionDB`]: crate::TransactionDB /// [`OptimisticTransactionDB`]: crate::OptimisticTransactionDB pub fn put_cf, V: AsRef<[u8]>>( &self, cf: &impl AsColumnFamilyRef, key: K, value: V, ) -> Result<(), Error> { let key = key.as_ref(); let value = value.as_ref(); unsafe { ffi_try!(ffi::rocksdb_transaction_put_cf( self.inner, cf.inner(), key.as_ptr() as *const c_char, key.len() as size_t, value.as_ptr() as *const c_char, value.len() as size_t, )); Ok(()) } } /// Merge value with existing value of key, and also do conflict checking on the key. /// /// See [`merge_cf`] for details. /// /// [`merge_cf`]: Self::merge_cf pub fn merge, V: AsRef<[u8]>>(&self, key: K, value: V) -> Result<(), Error> { let key = key.as_ref(); let value = value.as_ref(); unsafe { ffi_try!(ffi::rocksdb_transaction_merge( self.inner, key.as_ptr() as *const c_char, key.len() as size_t, value.as_ptr() as *const c_char, value.len() as size_t )); Ok(()) } } /// Merge `value` with existing value of `key` in the given column family, /// and also do conflict checking on the key. /// /// If this transaction was created by a [`TransactionDB`], it can return error of kind: /// * [`Busy`] if there is a write conflict. /// * [`TimedOut`] if a lock could not be acquired. /// * [`TryAgain`] if the memtable history size is not large enough. /// * [`MergeInProgress`] if merge operations cannot be resolved. /// * or other errors on unexpected failures. /// /// [`Busy`]: crate::ErrorKind::Busy /// [`TimedOut`]: crate::ErrorKind::TimedOut /// [`TryAgain`]: crate::ErrorKind::TryAgain /// [`MergeInProgress`]: crate::ErrorKind::MergeInProgress /// [`TransactionDB`]: crate::TransactionDB pub fn merge_cf, V: AsRef<[u8]>>( &self, cf: &impl AsColumnFamilyRef, key: K, value: V, ) -> Result<(), Error> { let key = key.as_ref(); let value = value.as_ref(); unsafe { ffi_try!(ffi::rocksdb_transaction_merge_cf( self.inner, cf.inner(), key.as_ptr() as *const c_char, key.len() as size_t, value.as_ptr() as *const c_char, value.len() as size_t )); Ok(()) } } /// Delete the key value if it exists and do conflict checking on the key. /// /// See [`delete_cf`] for details. /// /// [`delete_cf`]: Self::delete_cf pub fn delete>(&self, key: K) -> Result<(), Error> { let key = key.as_ref(); unsafe { ffi_try!(ffi::rocksdb_transaction_delete( self.inner, key.as_ptr() as *const c_char, key.len() as size_t )); } Ok(()) } /// Delete the key value in the given column family and do conflict checking. /// /// If this transaction was created by a [`TransactionDB`], it can return error of kind: /// * [`Busy`] if there is a write conflict. /// * [`TimedOut`] if a lock could not be acquired. /// * [`TryAgain`] if the memtable history size is not large enough. /// * [`MergeInProgress`] if merge operations cannot be resolved. /// * or other errors on unexpected failures. /// /// [`Busy`]: crate::ErrorKind::Busy /// [`TimedOut`]: crate::ErrorKind::TimedOut /// [`TryAgain`]: crate::ErrorKind::TryAgain /// [`MergeInProgress`]: crate::ErrorKind::MergeInProgress /// [`TransactionDB`]: crate::TransactionDB pub fn delete_cf>( &self, cf: &impl AsColumnFamilyRef, key: K, ) -> Result<(), Error> { let key = key.as_ref(); unsafe { ffi_try!(ffi::rocksdb_transaction_delete_cf( self.inner, cf.inner(), key.as_ptr() as *const c_char, key.len() as size_t )); } Ok(()) } pub fn iterator<'a: 'b, 'b>( &'a self, mode: IteratorMode, ) -> DBIteratorWithThreadMode<'b, Self> { let readopts = ReadOptions::default(); self.iterator_opt(mode, readopts) } pub fn iterator_opt<'a: 'b, 'b>( &'a self, mode: IteratorMode, readopts: ReadOptions, ) -> DBIteratorWithThreadMode<'b, Self> { DBIteratorWithThreadMode::new(self, readopts, mode) } /// Opens an iterator using the provided ReadOptions. /// This is used when you want to iterate over a specific ColumnFamily with a modified ReadOptions. pub fn iterator_cf_opt<'a: 'b, 'b>( &'a self, cf_handle: &impl AsColumnFamilyRef, readopts: ReadOptions, mode: IteratorMode, ) -> DBIteratorWithThreadMode<'b, Self> { DBIteratorWithThreadMode::new_cf(self, cf_handle.inner(), readopts, mode) } /// Opens an iterator with `set_total_order_seek` enabled. /// This must be used to iterate across prefixes when `set_memtable_factory` has been called /// with a Hash-based implementation. pub fn full_iterator<'a: 'b, 'b>( &'a self, mode: IteratorMode, ) -> DBIteratorWithThreadMode<'b, Self> { let mut opts = ReadOptions::default(); opts.set_total_order_seek(true); DBIteratorWithThreadMode::new(self, opts, mode) } pub fn prefix_iterator<'a: 'b, 'b, P: AsRef<[u8]>>( &'a self, prefix: P, ) -> DBIteratorWithThreadMode<'b, Self> { let mut opts = ReadOptions::default(); opts.set_prefix_same_as_start(true); DBIteratorWithThreadMode::new( self, opts, IteratorMode::From(prefix.as_ref(), Direction::Forward), ) } pub fn iterator_cf<'a: 'b, 'b>( &'a self, cf_handle: &impl AsColumnFamilyRef, mode: IteratorMode, ) -> DBIteratorWithThreadMode<'b, Self> { let opts = ReadOptions::default(); DBIteratorWithThreadMode::new_cf(self, cf_handle.inner(), opts, mode) } pub fn full_iterator_cf<'a: 'b, 'b>( &'a self, cf_handle: &impl AsColumnFamilyRef, mode: IteratorMode, ) -> DBIteratorWithThreadMode<'b, Self> { let mut opts = ReadOptions::default(); opts.set_total_order_seek(true); DBIteratorWithThreadMode::new_cf(self, cf_handle.inner(), opts, mode) } pub fn prefix_iterator_cf<'a, P: AsRef<[u8]>>( &'a self, cf_handle: &impl AsColumnFamilyRef, prefix: P, ) -> DBIteratorWithThreadMode<'a, Self> { let mut opts = ReadOptions::default(); opts.set_prefix_same_as_start(true); DBIteratorWithThreadMode::<'a, Self>::new_cf( self, cf_handle.inner(), opts, IteratorMode::From(prefix.as_ref(), Direction::Forward), ) } /// Opens a raw iterator over the database, using the default read options pub fn raw_iterator<'a: 'b, 'b>(&'a self) -> DBRawIteratorWithThreadMode<'b, Self> { let opts = ReadOptions::default(); DBRawIteratorWithThreadMode::new(self, opts) } /// Opens a raw iterator over the given column family, using the default read options pub fn raw_iterator_cf<'a: 'b, 'b>( &'a self, cf_handle: &impl AsColumnFamilyRef, ) -> DBRawIteratorWithThreadMode<'b, Self> { let opts = ReadOptions::default(); DBRawIteratorWithThreadMode::new_cf(self, cf_handle.inner(), opts) } /// Opens a raw iterator over the database, using the given read options pub fn raw_iterator_opt<'a: 'b, 'b>( &'a self, readopts: ReadOptions, ) -> DBRawIteratorWithThreadMode<'b, Self> { DBRawIteratorWithThreadMode::new(self, readopts) } /// Opens a raw iterator over the given column family, using the given read options pub fn raw_iterator_cf_opt<'a: 'b, 'b>( &'a self, cf_handle: &impl AsColumnFamilyRef, readopts: ReadOptions, ) -> DBRawIteratorWithThreadMode<'b, Self> { DBRawIteratorWithThreadMode::new_cf(self, cf_handle.inner(), readopts) } pub fn get_writebatch(&self) -> WriteBatchWithTransaction { unsafe { let wi = ffi::rocksdb_transaction_get_writebatch_wi(self.inner); let mut len: usize = 0; let ptr = ffi::rocksdb_writebatch_wi_data(wi, &mut len as _); let writebatch = ffi::rocksdb_writebatch_create_from(ptr, len); ffi::rocksdb_free(wi as *mut c_void); WriteBatchWithTransaction { inner: writebatch } } } pub fn rebuild_from_writebatch( &self, writebatch: &WriteBatchWithTransaction, ) -> Result<(), Error> { unsafe { ffi_try!(ffi::rocksdb_transaction_rebuild_from_writebatch( self.inner, writebatch.inner )); } Ok(()) } } impl<'db, DB> Drop for Transaction<'db, DB> { fn drop(&mut self) { unsafe { ffi::rocksdb_transaction_destroy(self.inner); } } } rocksdb-0.23.0/src/transactions/transaction_db.rs000064400000000000000000001061441046102023000202020ustar 00000000000000// Copyright 2021 Yiyuan Liu // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. // use std::{ collections::BTreeMap, ffi::CString, fs, iter, marker::PhantomData, path::{Path, PathBuf}, ptr, sync::{Arc, Mutex}, }; use crate::CStrLike; use std::ffi::CStr; use crate::column_family::ColumnFamilyTtl; use crate::{ column_family::UnboundColumnFamily, db::{convert_values, DBAccess}, db_options::OptionsMustOutliveDB, ffi, ffi_util::to_cpath, AsColumnFamilyRef, BoundColumnFamily, ColumnFamily, ColumnFamilyDescriptor, DBIteratorWithThreadMode, DBPinnableSlice, DBRawIteratorWithThreadMode, Direction, Error, IteratorMode, MultiThreaded, Options, ReadOptions, SingleThreaded, SnapshotWithThreadMode, ThreadMode, Transaction, TransactionDBOptions, TransactionOptions, WriteBatchWithTransaction, WriteOptions, DB, DEFAULT_COLUMN_FAMILY_NAME, }; use ffi::rocksdb_transaction_t; use libc::{c_char, c_int, c_void, size_t}; #[cfg(not(feature = "multi-threaded-cf"))] type DefaultThreadMode = crate::SingleThreaded; #[cfg(feature = "multi-threaded-cf")] type DefaultThreadMode = crate::MultiThreaded; /// RocksDB TransactionDB. /// /// Please read the official [guide](https://github.com/facebook/rocksdb/wiki/Transactions) /// to learn more about RocksDB TransactionDB. /// /// The default thread mode for [`TransactionDB`] is [`SingleThreaded`] /// if feature `multi-threaded-cf` is not enabled. /// /// ``` /// use rocksdb::{DB, Options, TransactionDB, SingleThreaded}; /// let tempdir = tempfile::Builder::new() /// .prefix("_path_for_transaction_db") /// .tempdir() /// .expect("Failed to create temporary path for the _path_for_transaction_db"); /// let path = tempdir.path(); /// { /// let db: TransactionDB = TransactionDB::open_default(path).unwrap(); /// db.put(b"my key", b"my value").unwrap(); /// /// // create transaction /// let txn = db.transaction(); /// txn.put(b"key2", b"value2"); /// txn.put(b"key3", b"value3"); /// txn.commit().unwrap(); /// } /// let _ = DB::destroy(&Options::default(), path); /// ``` /// /// [`SingleThreaded`]: crate::SingleThreaded pub struct TransactionDB { pub(crate) inner: *mut ffi::rocksdb_transactiondb_t, cfs: T, path: PathBuf, // prepared 2pc transactions. prepared: Mutex>, _outlive: Vec, } unsafe impl Send for TransactionDB {} unsafe impl Sync for TransactionDB {} impl DBAccess for TransactionDB { unsafe fn create_snapshot(&self) -> *const ffi::rocksdb_snapshot_t { ffi::rocksdb_transactiondb_create_snapshot(self.inner) } unsafe fn release_snapshot(&self, snapshot: *const ffi::rocksdb_snapshot_t) { ffi::rocksdb_transactiondb_release_snapshot(self.inner, snapshot); } unsafe fn create_iterator(&self, readopts: &ReadOptions) -> *mut ffi::rocksdb_iterator_t { ffi::rocksdb_transactiondb_create_iterator(self.inner, readopts.inner) } unsafe fn create_iterator_cf( &self, cf_handle: *mut ffi::rocksdb_column_family_handle_t, readopts: &ReadOptions, ) -> *mut ffi::rocksdb_iterator_t { ffi::rocksdb_transactiondb_create_iterator_cf(self.inner, readopts.inner, cf_handle) } fn get_opt>( &self, key: K, readopts: &ReadOptions, ) -> Result>, Error> { self.get_opt(key, readopts) } fn get_cf_opt>( &self, cf: &impl AsColumnFamilyRef, key: K, readopts: &ReadOptions, ) -> Result>, Error> { self.get_cf_opt(cf, key, readopts) } fn get_pinned_opt>( &self, key: K, readopts: &ReadOptions, ) -> Result, Error> { self.get_pinned_opt(key, readopts) } fn get_pinned_cf_opt>( &self, cf: &impl AsColumnFamilyRef, key: K, readopts: &ReadOptions, ) -> Result, Error> { self.get_pinned_cf_opt(cf, key, readopts) } fn multi_get_opt( &self, keys: I, readopts: &ReadOptions, ) -> Vec>, Error>> where K: AsRef<[u8]>, I: IntoIterator, { self.multi_get_opt(keys, readopts) } fn multi_get_cf_opt<'b, K, I, W>( &self, keys_cf: I, readopts: &ReadOptions, ) -> Vec>, Error>> where K: AsRef<[u8]>, I: IntoIterator, W: AsColumnFamilyRef + 'b, { self.multi_get_cf_opt(keys_cf, readopts) } } impl TransactionDB { /// Opens a database with default options. pub fn open_default>(path: P) -> Result { let mut opts = Options::default(); opts.create_if_missing(true); let txn_db_opts = TransactionDBOptions::default(); Self::open(&opts, &txn_db_opts, path) } /// Opens the database with the specified options. pub fn open>( opts: &Options, txn_db_opts: &TransactionDBOptions, path: P, ) -> Result { Self::open_cf(opts, txn_db_opts, path, None::<&str>) } /// Opens a database with the given database options and column family names. /// /// Column families opened using this function will be created with default `Options`. pub fn open_cf( opts: &Options, txn_db_opts: &TransactionDBOptions, path: P, cfs: I, ) -> Result where P: AsRef, I: IntoIterator, N: AsRef, { let cfs = cfs .into_iter() .map(|name| ColumnFamilyDescriptor::new(name.as_ref(), Options::default())); Self::open_cf_descriptors_internal(opts, txn_db_opts, path, cfs) } /// Opens a database with the given database options and column family descriptors. pub fn open_cf_descriptors( opts: &Options, txn_db_opts: &TransactionDBOptions, path: P, cfs: I, ) -> Result where P: AsRef, I: IntoIterator, { Self::open_cf_descriptors_internal(opts, txn_db_opts, path, cfs) } /// Internal implementation for opening RocksDB. fn open_cf_descriptors_internal( opts: &Options, txn_db_opts: &TransactionDBOptions, path: P, cfs: I, ) -> Result where P: AsRef, I: IntoIterator, { let cfs: Vec<_> = cfs.into_iter().collect(); let outlive = iter::once(opts.outlive.clone()) .chain(cfs.iter().map(|cf| cf.options.outlive.clone())) .collect(); let cpath = to_cpath(&path)?; if let Err(e) = fs::create_dir_all(&path) { return Err(Error::new(format!( "Failed to create RocksDB directory: `{e:?}`." ))); } let db: *mut ffi::rocksdb_transactiondb_t; let mut cf_map = BTreeMap::new(); if cfs.is_empty() { db = Self::open_raw(opts, txn_db_opts, &cpath)?; } else { let mut cfs_v = cfs; // Always open the default column family. if !cfs_v.iter().any(|cf| cf.name == DEFAULT_COLUMN_FAMILY_NAME) { cfs_v.push(ColumnFamilyDescriptor { name: String::from(DEFAULT_COLUMN_FAMILY_NAME), options: Options::default(), ttl: ColumnFamilyTtl::SameAsDb, // it will have ttl specified in `DBWithThreadMode::open_with_ttl` }); } // We need to store our CStrings in an intermediate vector // so that their pointers remain valid. let c_cfs: Vec = cfs_v .iter() .map(|cf| CString::new(cf.name.as_bytes()).unwrap()) .collect(); let cfnames: Vec<_> = c_cfs.iter().map(|cf| cf.as_ptr()).collect(); // These handles will be populated by DB. let mut cfhandles: Vec<_> = cfs_v.iter().map(|_| ptr::null_mut()).collect(); let cfopts: Vec<_> = cfs_v .iter() .map(|cf| cf.options.inner.cast_const()) .collect(); db = Self::open_cf_raw( opts, txn_db_opts, &cpath, &cfs_v, &cfnames, &cfopts, &mut cfhandles, )?; for handle in &cfhandles { if handle.is_null() { return Err(Error::new( "Received null column family handle from DB.".to_owned(), )); } } for (cf_desc, inner) in cfs_v.iter().zip(cfhandles) { cf_map.insert(cf_desc.name.clone(), inner); } } if db.is_null() { return Err(Error::new("Could not initialize database.".to_owned())); } let prepared = unsafe { let mut cnt = 0; let ptr = ffi::rocksdb_transactiondb_get_prepared_transactions(db, &mut cnt); let mut vec = vec![std::ptr::null_mut(); cnt]; if !ptr.is_null() { std::ptr::copy_nonoverlapping(ptr, vec.as_mut_ptr(), cnt); ffi::rocksdb_free(ptr as *mut c_void); } vec }; Ok(TransactionDB { inner: db, cfs: T::new_cf_map_internal(cf_map), path: path.as_ref().to_path_buf(), prepared: Mutex::new(prepared), _outlive: outlive, }) } fn open_raw( opts: &Options, txn_db_opts: &TransactionDBOptions, cpath: &CString, ) -> Result<*mut ffi::rocksdb_transactiondb_t, Error> { unsafe { let db = ffi_try!(ffi::rocksdb_transactiondb_open( opts.inner, txn_db_opts.inner, cpath.as_ptr() )); Ok(db) } } fn open_cf_raw( opts: &Options, txn_db_opts: &TransactionDBOptions, cpath: &CString, cfs_v: &[ColumnFamilyDescriptor], cfnames: &[*const c_char], cfopts: &[*const ffi::rocksdb_options_t], cfhandles: &mut [*mut ffi::rocksdb_column_family_handle_t], ) -> Result<*mut ffi::rocksdb_transactiondb_t, Error> { unsafe { let db = ffi_try!(ffi::rocksdb_transactiondb_open_column_families( opts.inner, txn_db_opts.inner, cpath.as_ptr(), cfs_v.len() as c_int, cfnames.as_ptr(), cfopts.as_ptr(), cfhandles.as_mut_ptr(), )); Ok(db) } } fn create_inner_cf_handle( &self, name: &str, opts: &Options, ) -> Result<*mut ffi::rocksdb_column_family_handle_t, Error> { let cf_name = CString::new(name.as_bytes()).map_err(|_| { Error::new("Failed to convert path to CString when creating cf".to_owned()) })?; Ok(unsafe { ffi_try!(ffi::rocksdb_transactiondb_create_column_family( self.inner, opts.inner, cf_name.as_ptr(), )) }) } pub fn list_cf>(opts: &Options, path: P) -> Result, Error> { DB::list_cf(opts, path) } pub fn destroy>(opts: &Options, path: P) -> Result<(), Error> { DB::destroy(opts, path) } pub fn repair>(opts: &Options, path: P) -> Result<(), Error> { DB::repair(opts, path) } pub fn path(&self) -> &Path { self.path.as_path() } /// Creates a transaction with default options. pub fn transaction(&self) -> Transaction { self.transaction_opt(&WriteOptions::default(), &TransactionOptions::default()) } /// Creates a transaction with options. pub fn transaction_opt<'a>( &'a self, write_opts: &WriteOptions, txn_opts: &TransactionOptions, ) -> Transaction<'a, Self> { Transaction { inner: unsafe { ffi::rocksdb_transaction_begin( self.inner, write_opts.inner, txn_opts.inner, std::ptr::null_mut(), ) }, _marker: PhantomData, } } /// Get all prepared transactions for recovery. /// /// This function is expected to call once after open database. /// User should commit or rollback all transactions before start other transactions. pub fn prepared_transactions(&self) -> Vec> { self.prepared .lock() .unwrap() .drain(0..) .map(|inner| Transaction { inner, _marker: PhantomData, }) .collect() } /// Returns the bytes associated with a key value. pub fn get>(&self, key: K) -> Result>, Error> { self.get_pinned(key).map(|x| x.map(|v| v.as_ref().to_vec())) } /// Returns the bytes associated with a key value and the given column family. pub fn get_cf>( &self, cf: &impl AsColumnFamilyRef, key: K, ) -> Result>, Error> { self.get_pinned_cf(cf, key) .map(|x| x.map(|v| v.as_ref().to_vec())) } /// Returns the bytes associated with a key value with read options. pub fn get_opt>( &self, key: K, readopts: &ReadOptions, ) -> Result>, Error> { self.get_pinned_opt(key, readopts) .map(|x| x.map(|v| v.as_ref().to_vec())) } /// Returns the bytes associated with a key value and the given column family with read options. pub fn get_cf_opt>( &self, cf: &impl AsColumnFamilyRef, key: K, readopts: &ReadOptions, ) -> Result>, Error> { self.get_pinned_cf_opt(cf, key, readopts) .map(|x| x.map(|v| v.as_ref().to_vec())) } pub fn get_pinned>(&self, key: K) -> Result, Error> { self.get_pinned_opt(key, &ReadOptions::default()) } /// Returns the bytes associated with a key value and the given column family. pub fn get_pinned_cf>( &self, cf: &impl AsColumnFamilyRef, key: K, ) -> Result, Error> { self.get_pinned_cf_opt(cf, key, &ReadOptions::default()) } /// Returns the bytes associated with a key value with read options. pub fn get_pinned_opt>( &self, key: K, readopts: &ReadOptions, ) -> Result, Error> { let key = key.as_ref(); unsafe { let val = ffi_try!(ffi::rocksdb_transactiondb_get_pinned( self.inner, readopts.inner, key.as_ptr() as *const c_char, key.len() as size_t, )); if val.is_null() { Ok(None) } else { Ok(Some(DBPinnableSlice::from_c(val))) } } } /// Returns the bytes associated with a key value and the given column family with read options. pub fn get_pinned_cf_opt>( &self, cf: &impl AsColumnFamilyRef, key: K, readopts: &ReadOptions, ) -> Result, Error> { let key = key.as_ref(); unsafe { let val = ffi_try!(ffi::rocksdb_transactiondb_get_pinned_cf( self.inner, readopts.inner, cf.inner(), key.as_ptr() as *const c_char, key.len() as size_t, )); if val.is_null() { Ok(None) } else { Ok(Some(DBPinnableSlice::from_c(val))) } } } /// Return the values associated with the given keys. pub fn multi_get(&self, keys: I) -> Vec>, Error>> where K: AsRef<[u8]>, I: IntoIterator, { self.multi_get_opt(keys, &ReadOptions::default()) } /// Return the values associated with the given keys using read options. pub fn multi_get_opt( &self, keys: I, readopts: &ReadOptions, ) -> Vec>, Error>> where K: AsRef<[u8]>, I: IntoIterator, { let (keys, keys_sizes): (Vec>, Vec<_>) = keys .into_iter() .map(|key| { let key = key.as_ref(); (Box::from(key), key.len()) }) .unzip(); let ptr_keys: Vec<_> = keys.iter().map(|k| k.as_ptr() as *const c_char).collect(); let mut values = vec![ptr::null_mut(); keys.len()]; let mut values_sizes = vec![0_usize; keys.len()]; let mut errors = vec![ptr::null_mut(); keys.len()]; unsafe { ffi::rocksdb_transactiondb_multi_get( self.inner, readopts.inner, ptr_keys.len(), ptr_keys.as_ptr(), keys_sizes.as_ptr(), values.as_mut_ptr(), values_sizes.as_mut_ptr(), errors.as_mut_ptr(), ); } convert_values(values, values_sizes, errors) } /// Return the values associated with the given keys and column families. pub fn multi_get_cf<'a, 'b: 'a, K, I, W>( &'a self, keys: I, ) -> Vec>, Error>> where K: AsRef<[u8]>, I: IntoIterator, W: 'b + AsColumnFamilyRef, { self.multi_get_cf_opt(keys, &ReadOptions::default()) } /// Return the values associated with the given keys and column families using read options. pub fn multi_get_cf_opt<'a, 'b: 'a, K, I, W>( &'a self, keys: I, readopts: &ReadOptions, ) -> Vec>, Error>> where K: AsRef<[u8]>, I: IntoIterator, W: 'b + AsColumnFamilyRef, { let (cfs_and_keys, keys_sizes): (Vec<(_, Box<[u8]>)>, Vec<_>) = keys .into_iter() .map(|(cf, key)| { let key = key.as_ref(); ((cf, Box::from(key)), key.len()) }) .unzip(); let ptr_keys: Vec<_> = cfs_and_keys .iter() .map(|(_, k)| k.as_ptr() as *const c_char) .collect(); let ptr_cfs: Vec<_> = cfs_and_keys .iter() .map(|(c, _)| c.inner().cast_const()) .collect(); let mut values = vec![ptr::null_mut(); ptr_keys.len()]; let mut values_sizes = vec![0_usize; ptr_keys.len()]; let mut errors = vec![ptr::null_mut(); ptr_keys.len()]; unsafe { ffi::rocksdb_transactiondb_multi_get_cf( self.inner, readopts.inner, ptr_cfs.as_ptr(), ptr_keys.len(), ptr_keys.as_ptr(), keys_sizes.as_ptr(), values.as_mut_ptr(), values_sizes.as_mut_ptr(), errors.as_mut_ptr(), ); } convert_values(values, values_sizes, errors) } pub fn put(&self, key: K, value: V) -> Result<(), Error> where K: AsRef<[u8]>, V: AsRef<[u8]>, { self.put_opt(key, value, &WriteOptions::default()) } pub fn put_cf(&self, cf: &impl AsColumnFamilyRef, key: K, value: V) -> Result<(), Error> where K: AsRef<[u8]>, V: AsRef<[u8]>, { self.put_cf_opt(cf, key, value, &WriteOptions::default()) } pub fn put_opt(&self, key: K, value: V, writeopts: &WriteOptions) -> Result<(), Error> where K: AsRef<[u8]>, V: AsRef<[u8]>, { let key = key.as_ref(); let value = value.as_ref(); unsafe { ffi_try!(ffi::rocksdb_transactiondb_put( self.inner, writeopts.inner, key.as_ptr() as *const c_char, key.len() as size_t, value.as_ptr() as *const c_char, value.len() as size_t )); } Ok(()) } pub fn put_cf_opt( &self, cf: &impl AsColumnFamilyRef, key: K, value: V, writeopts: &WriteOptions, ) -> Result<(), Error> where K: AsRef<[u8]>, V: AsRef<[u8]>, { let key = key.as_ref(); let value = value.as_ref(); unsafe { ffi_try!(ffi::rocksdb_transactiondb_put_cf( self.inner, writeopts.inner, cf.inner(), key.as_ptr() as *const c_char, key.len() as size_t, value.as_ptr() as *const c_char, value.len() as size_t )); } Ok(()) } pub fn write(&self, batch: WriteBatchWithTransaction) -> Result<(), Error> { self.write_opt(batch, &WriteOptions::default()) } pub fn write_opt( &self, batch: WriteBatchWithTransaction, writeopts: &WriteOptions, ) -> Result<(), Error> { unsafe { ffi_try!(ffi::rocksdb_transactiondb_write( self.inner, writeopts.inner, batch.inner )); } Ok(()) } pub fn merge(&self, key: K, value: V) -> Result<(), Error> where K: AsRef<[u8]>, V: AsRef<[u8]>, { self.merge_opt(key, value, &WriteOptions::default()) } pub fn merge_cf(&self, cf: &impl AsColumnFamilyRef, key: K, value: V) -> Result<(), Error> where K: AsRef<[u8]>, V: AsRef<[u8]>, { self.merge_cf_opt(cf, key, value, &WriteOptions::default()) } pub fn merge_opt(&self, key: K, value: V, writeopts: &WriteOptions) -> Result<(), Error> where K: AsRef<[u8]>, V: AsRef<[u8]>, { let key = key.as_ref(); let value = value.as_ref(); unsafe { ffi_try!(ffi::rocksdb_transactiondb_merge( self.inner, writeopts.inner, key.as_ptr() as *const c_char, key.len() as size_t, value.as_ptr() as *const c_char, value.len() as size_t, )); Ok(()) } } pub fn merge_cf_opt( &self, cf: &impl AsColumnFamilyRef, key: K, value: V, writeopts: &WriteOptions, ) -> Result<(), Error> where K: AsRef<[u8]>, V: AsRef<[u8]>, { let key = key.as_ref(); let value = value.as_ref(); unsafe { ffi_try!(ffi::rocksdb_transactiondb_merge_cf( self.inner, writeopts.inner, cf.inner(), key.as_ptr() as *const c_char, key.len() as size_t, value.as_ptr() as *const c_char, value.len() as size_t, )); Ok(()) } } pub fn delete>(&self, key: K) -> Result<(), Error> { self.delete_opt(key, &WriteOptions::default()) } pub fn delete_cf>( &self, cf: &impl AsColumnFamilyRef, key: K, ) -> Result<(), Error> { self.delete_cf_opt(cf, key, &WriteOptions::default()) } pub fn delete_opt>( &self, key: K, writeopts: &WriteOptions, ) -> Result<(), Error> { let key = key.as_ref(); unsafe { ffi_try!(ffi::rocksdb_transactiondb_delete( self.inner, writeopts.inner, key.as_ptr() as *const c_char, key.len() as size_t, )); } Ok(()) } pub fn delete_cf_opt>( &self, cf: &impl AsColumnFamilyRef, key: K, writeopts: &WriteOptions, ) -> Result<(), Error> { let key = key.as_ref(); unsafe { ffi_try!(ffi::rocksdb_transactiondb_delete_cf( self.inner, writeopts.inner, cf.inner(), key.as_ptr() as *const c_char, key.len() as size_t, )); } Ok(()) } pub fn iterator<'a: 'b, 'b>( &'a self, mode: IteratorMode, ) -> DBIteratorWithThreadMode<'b, Self> { let readopts = ReadOptions::default(); self.iterator_opt(mode, readopts) } pub fn iterator_opt<'a: 'b, 'b>( &'a self, mode: IteratorMode, readopts: ReadOptions, ) -> DBIteratorWithThreadMode<'b, Self> { DBIteratorWithThreadMode::new(self, readopts, mode) } /// Opens an iterator using the provided ReadOptions. /// This is used when you want to iterate over a specific ColumnFamily with a modified ReadOptions pub fn iterator_cf_opt<'a: 'b, 'b>( &'a self, cf_handle: &impl AsColumnFamilyRef, readopts: ReadOptions, mode: IteratorMode, ) -> DBIteratorWithThreadMode<'b, Self> { DBIteratorWithThreadMode::new_cf(self, cf_handle.inner(), readopts, mode) } /// Opens an iterator with `set_total_order_seek` enabled. /// This must be used to iterate across prefixes when `set_memtable_factory` has been called /// with a Hash-based implementation. pub fn full_iterator<'a: 'b, 'b>( &'a self, mode: IteratorMode, ) -> DBIteratorWithThreadMode<'b, Self> { let mut opts = ReadOptions::default(); opts.set_total_order_seek(true); DBIteratorWithThreadMode::new(self, opts, mode) } pub fn prefix_iterator<'a: 'b, 'b, P: AsRef<[u8]>>( &'a self, prefix: P, ) -> DBIteratorWithThreadMode<'b, Self> { let mut opts = ReadOptions::default(); opts.set_prefix_same_as_start(true); DBIteratorWithThreadMode::new( self, opts, IteratorMode::From(prefix.as_ref(), Direction::Forward), ) } pub fn iterator_cf<'a: 'b, 'b>( &'a self, cf_handle: &impl AsColumnFamilyRef, mode: IteratorMode, ) -> DBIteratorWithThreadMode<'b, Self> { let opts = ReadOptions::default(); DBIteratorWithThreadMode::new_cf(self, cf_handle.inner(), opts, mode) } pub fn full_iterator_cf<'a: 'b, 'b>( &'a self, cf_handle: &impl AsColumnFamilyRef, mode: IteratorMode, ) -> DBIteratorWithThreadMode<'b, Self> { let mut opts = ReadOptions::default(); opts.set_total_order_seek(true); DBIteratorWithThreadMode::new_cf(self, cf_handle.inner(), opts, mode) } pub fn prefix_iterator_cf<'a, P: AsRef<[u8]>>( &'a self, cf_handle: &impl AsColumnFamilyRef, prefix: P, ) -> DBIteratorWithThreadMode<'a, Self> { let mut opts = ReadOptions::default(); opts.set_prefix_same_as_start(true); DBIteratorWithThreadMode::<'a, Self>::new_cf( self, cf_handle.inner(), opts, IteratorMode::From(prefix.as_ref(), Direction::Forward), ) } /// Opens a raw iterator over the database, using the default read options pub fn raw_iterator<'a: 'b, 'b>(&'a self) -> DBRawIteratorWithThreadMode<'b, Self> { let opts = ReadOptions::default(); DBRawIteratorWithThreadMode::new(self, opts) } /// Opens a raw iterator over the given column family, using the default read options pub fn raw_iterator_cf<'a: 'b, 'b>( &'a self, cf_handle: &impl AsColumnFamilyRef, ) -> DBRawIteratorWithThreadMode<'b, Self> { let opts = ReadOptions::default(); DBRawIteratorWithThreadMode::new_cf(self, cf_handle.inner(), opts) } /// Opens a raw iterator over the database, using the given read options pub fn raw_iterator_opt<'a: 'b, 'b>( &'a self, readopts: ReadOptions, ) -> DBRawIteratorWithThreadMode<'b, Self> { DBRawIteratorWithThreadMode::new(self, readopts) } /// Opens a raw iterator over the given column family, using the given read options pub fn raw_iterator_cf_opt<'a: 'b, 'b>( &'a self, cf_handle: &impl AsColumnFamilyRef, readopts: ReadOptions, ) -> DBRawIteratorWithThreadMode<'b, Self> { DBRawIteratorWithThreadMode::new_cf(self, cf_handle.inner(), readopts) } pub fn snapshot(&self) -> SnapshotWithThreadMode { SnapshotWithThreadMode::::new(self) } fn drop_column_family( &self, cf_inner: *mut ffi::rocksdb_column_family_handle_t, _cf: C, ) -> Result<(), Error> { unsafe { // first mark the column family as dropped ffi_try!(ffi::rocksdb_drop_column_family( self.inner as *mut ffi::rocksdb_t, cf_inner )); } // Since `_cf` is dropped here, the column family handle is destroyed // and any resources (mem, files) are reclaimed. Ok(()) } } impl TransactionDB { /// Creates column family with given name and options. pub fn create_cf>(&mut self, name: N, opts: &Options) -> Result<(), Error> { let inner = self.create_inner_cf_handle(name.as_ref(), opts)?; self.cfs .cfs .insert(name.as_ref().to_string(), ColumnFamily { inner }); Ok(()) } /// Returns the underlying column family handle. pub fn cf_handle(&self, name: &str) -> Option<&ColumnFamily> { self.cfs.cfs.get(name) } /// Drops the column family with the given name pub fn drop_cf(&mut self, name: &str) -> Result<(), Error> { if let Some(cf) = self.cfs.cfs.remove(name) { self.drop_column_family(cf.inner, cf) } else { Err(Error::new(format!("Invalid column family: {name}"))) } } } impl TransactionDB { /// Creates column family with given name and options. pub fn create_cf>(&self, name: N, opts: &Options) -> Result<(), Error> { // Note that we acquire the cfs lock before inserting: otherwise we might race // another caller who observed the handle as missing. let mut cfs = self.cfs.cfs.write().unwrap(); let inner = self.create_inner_cf_handle(name.as_ref(), opts)?; cfs.insert( name.as_ref().to_string(), Arc::new(UnboundColumnFamily { inner }), ); Ok(()) } /// Returns the underlying column family handle. pub fn cf_handle(&self, name: &str) -> Option> { self.cfs .cfs .read() .unwrap() .get(name) .cloned() .map(UnboundColumnFamily::bound_column_family) } /// Drops the column family with the given name by internally locking the inner column /// family map. This avoids needing `&mut self` reference pub fn drop_cf(&self, name: &str) -> Result<(), Error> { if let Some(cf) = self.cfs.cfs.write().unwrap().remove(name) { self.drop_column_family(cf.inner, cf) } else { Err(Error::new(format!("Invalid column family: {name}"))) } } /// Implementation for property_value et al methods. /// /// `name` is the name of the property. It will be converted into a CString /// and passed to `get_property` as argument. `get_property` reads the /// specified property and either returns NULL or a pointer to a C allocated /// string; this method takes ownership of that string and will free it at /// the end. That string is parsed using `parse` callback which produces /// the returned result. fn property_value_impl( name: impl CStrLike, get_property: impl FnOnce(*const c_char) -> *mut c_char, parse: impl FnOnce(&str) -> Result, ) -> Result, Error> { let value = match name.bake() { Ok(prop_name) => get_property(prop_name.as_ptr()), Err(e) => { return Err(Error::new(format!( "Failed to convert property name to CString: {e}" ))); } }; if value.is_null() { return Ok(None); } let result = match unsafe { CStr::from_ptr(value) }.to_str() { Ok(s) => parse(s).map(|value| Some(value)), Err(e) => Err(Error::new(format!( "Failed to convert property value to string: {e}" ))), }; unsafe { ffi::rocksdb_free(value as *mut c_void); } result } /// Retrieves a RocksDB property by name. /// /// Full list of properties could be find /// [here](https://github.com/facebook/rocksdb/blob/08809f5e6cd9cc4bc3958dd4d59457ae78c76660/include/rocksdb/db.h#L428-L634). pub fn property_value(&self, name: impl CStrLike) -> Result, Error> { Self::property_value_impl( name, |prop_name| unsafe { ffi::rocksdb_transactiondb_property_value(self.inner, prop_name) }, |str_value| Ok(str_value.to_owned()), ) } fn parse_property_int_value(value: &str) -> Result { value.parse::().map_err(|err| { Error::new(format!( "Failed to convert property value {value} to int: {err}" )) }) } /// Retrieves a RocksDB property and casts it to an integer. /// /// Full list of properties that return int values could be find /// [here](https://github.com/facebook/rocksdb/blob/08809f5e6cd9cc4bc3958dd4d59457ae78c76660/include/rocksdb/db.h#L654-L689). pub fn property_int_value(&self, name: impl CStrLike) -> Result, Error> { Self::property_value_impl( name, |prop_name| unsafe { ffi::rocksdb_transactiondb_property_value(self.inner, prop_name) }, Self::parse_property_int_value, ) } } impl Drop for TransactionDB { fn drop(&mut self) { unsafe { self.prepared_transactions().clear(); self.cfs.drop_all_cfs_internal(); ffi::rocksdb_transactiondb_close(self.inner); } } } rocksdb-0.23.0/src/write_batch.rs000064400000000000000000000312701046102023000147700ustar 00000000000000// Copyright 2020 Tyler Neely // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. use crate::{ffi, AsColumnFamilyRef}; use libc::{c_char, c_void, size_t}; use std::slice; /// A type alias to keep compatibility. See [`WriteBatchWithTransaction`] for details pub type WriteBatch = WriteBatchWithTransaction; /// An atomic batch of write operations. /// /// [`delete_range`](#method.delete_range) is not supported in [`Transaction`]. /// /// Making an atomic commit of several writes: /// /// ``` /// use rocksdb::{DB, Options, WriteBatchWithTransaction}; /// /// let tempdir = tempfile::Builder::new() /// .prefix("_path_for_rocksdb_storage1") /// .tempdir() /// .expect("Failed to create temporary path for the _path_for_rocksdb_storage1"); /// let path = tempdir.path(); /// { /// let db = DB::open_default(path).unwrap(); /// let mut batch = WriteBatchWithTransaction::::default(); /// batch.put(b"my key", b"my value"); /// batch.put(b"key2", b"value2"); /// batch.put(b"key3", b"value3"); /// /// // delete_range is supported when use without transaction /// batch.delete_range(b"key2", b"key3"); /// /// db.write(batch); // Atomically commits the batch /// } /// let _ = DB::destroy(&Options::default(), path); /// ``` /// /// [`Transaction`]: crate::Transaction pub struct WriteBatchWithTransaction { pub(crate) inner: *mut ffi::rocksdb_writebatch_t, } /// Receives the puts and deletes of a write batch. /// /// The application must provide an implementation of this trait when /// iterating the operations within a `WriteBatch` pub trait WriteBatchIterator { /// Called with a key and value that were `put` into the batch. fn put(&mut self, key: Box<[u8]>, value: Box<[u8]>); /// Called with a key that was `delete`d from the batch. fn delete(&mut self, key: Box<[u8]>); } unsafe extern "C" fn writebatch_put_callback( state: *mut c_void, k: *const c_char, klen: usize, v: *const c_char, vlen: usize, ) { // coerce the raw pointer back into a box, but "leak" it so we prevent // freeing the resource before we are done with it let boxed_cb = Box::from_raw(state as *mut &mut dyn WriteBatchIterator); let leaked_cb = Box::leak(boxed_cb); let key = slice::from_raw_parts(k as *const u8, klen); let value = slice::from_raw_parts(v as *const u8, vlen); leaked_cb.put( key.to_vec().into_boxed_slice(), value.to_vec().into_boxed_slice(), ); } unsafe extern "C" fn writebatch_delete_callback(state: *mut c_void, k: *const c_char, klen: usize) { // coerce the raw pointer back into a box, but "leak" it so we prevent // freeing the resource before we are done with it let boxed_cb = Box::from_raw(state as *mut &mut dyn WriteBatchIterator); let leaked_cb = Box::leak(boxed_cb); let key = slice::from_raw_parts(k as *const u8, klen); leaked_cb.delete(key.to_vec().into_boxed_slice()); } impl WriteBatchWithTransaction { /// Create a new `WriteBatch` without allocating memory. pub fn new() -> Self { Self { inner: unsafe { ffi::rocksdb_writebatch_create() }, } } /// Creates `WriteBatch` with the specified `capacity` in bytes. Allocates immediately. pub fn with_capacity_bytes(capacity_bytes: usize) -> Self { Self { // zeroes from default constructor // https://github.com/facebook/rocksdb/blob/0f35db55d86ea8699ea936c9e2a4e34c82458d6b/include/rocksdb/write_batch.h#L66 inner: unsafe { ffi::rocksdb_writebatch_create_with_params(capacity_bytes, 0, 0, 0) }, } } /// Construct with a reference to a byte array serialized by [`WriteBatch`]. pub fn from_data(data: &[u8]) -> Self { unsafe { let ptr = data.as_ptr(); let len = data.len(); Self { inner: ffi::rocksdb_writebatch_create_from( ptr as *const libc::c_char, len as size_t, ), } } } pub fn len(&self) -> usize { unsafe { ffi::rocksdb_writebatch_count(self.inner) as usize } } /// Return WriteBatch serialized size (in bytes). pub fn size_in_bytes(&self) -> usize { unsafe { let mut batch_size: size_t = 0; ffi::rocksdb_writebatch_data(self.inner, &mut batch_size); batch_size } } /// Return a reference to a byte array which represents a serialized version of the batch. pub fn data(&self) -> &[u8] { unsafe { let mut batch_size: size_t = 0; let batch_data = ffi::rocksdb_writebatch_data(self.inner, &mut batch_size); std::slice::from_raw_parts(batch_data as _, batch_size) } } pub fn is_empty(&self) -> bool { self.len() == 0 } /// Iterate the put and delete operations within this write batch. Note that /// this does _not_ return an `Iterator` but instead will invoke the `put()` /// and `delete()` member functions of the provided `WriteBatchIterator` /// trait implementation. pub fn iterate(&self, callbacks: &mut dyn WriteBatchIterator) { let state = Box::into_raw(Box::new(callbacks)); unsafe { ffi::rocksdb_writebatch_iterate( self.inner, state as *mut c_void, Some(writebatch_put_callback), Some(writebatch_delete_callback), ); // we must manually set the raw box free since there is no // associated "destroy" callback for this object drop(Box::from_raw(state)); } } /// Insert a value into the database under the given key. pub fn put(&mut self, key: K, value: V) where K: AsRef<[u8]>, V: AsRef<[u8]>, { let key = key.as_ref(); let value = value.as_ref(); unsafe { ffi::rocksdb_writebatch_put( self.inner, key.as_ptr() as *const c_char, key.len() as size_t, value.as_ptr() as *const c_char, value.len() as size_t, ); } } /// Insert a value into the specific column family of the database under the given key. pub fn put_cf(&mut self, cf: &impl AsColumnFamilyRef, key: K, value: V) where K: AsRef<[u8]>, V: AsRef<[u8]>, { let key = key.as_ref(); let value = value.as_ref(); unsafe { ffi::rocksdb_writebatch_put_cf( self.inner, cf.inner(), key.as_ptr() as *const c_char, key.len() as size_t, value.as_ptr() as *const c_char, value.len() as size_t, ); } } /// Insert a value into the specific column family of the database /// under the given key with timestamp. pub fn put_cf_with_ts(&mut self, cf: &impl AsColumnFamilyRef, key: K, ts: S, value: V) where K: AsRef<[u8]>, V: AsRef<[u8]>, S: AsRef<[u8]>, { let key = key.as_ref(); let value = value.as_ref(); let ts = ts.as_ref(); unsafe { ffi::rocksdb_writebatch_put_cf_with_ts( self.inner, cf.inner(), key.as_ptr() as *const c_char, key.len() as size_t, ts.as_ptr() as *const c_char, ts.len() as size_t, value.as_ptr() as *const c_char, value.len() as size_t, ); } } pub fn merge(&mut self, key: K, value: V) where K: AsRef<[u8]>, V: AsRef<[u8]>, { let key = key.as_ref(); let value = value.as_ref(); unsafe { ffi::rocksdb_writebatch_merge( self.inner, key.as_ptr() as *const c_char, key.len() as size_t, value.as_ptr() as *const c_char, value.len() as size_t, ); } } pub fn merge_cf(&mut self, cf: &impl AsColumnFamilyRef, key: K, value: V) where K: AsRef<[u8]>, V: AsRef<[u8]>, { let key = key.as_ref(); let value = value.as_ref(); unsafe { ffi::rocksdb_writebatch_merge_cf( self.inner, cf.inner(), key.as_ptr() as *const c_char, key.len() as size_t, value.as_ptr() as *const c_char, value.len() as size_t, ); } } /// Removes the database entry for key. Does nothing if the key was not found. pub fn delete>(&mut self, key: K) { let key = key.as_ref(); unsafe { ffi::rocksdb_writebatch_delete( self.inner, key.as_ptr() as *const c_char, key.len() as size_t, ); } } /// Removes the database entry in the specific column family for key. /// Does nothing if the key was not found. pub fn delete_cf>(&mut self, cf: &impl AsColumnFamilyRef, key: K) { let key = key.as_ref(); unsafe { ffi::rocksdb_writebatch_delete_cf( self.inner, cf.inner(), key.as_ptr() as *const c_char, key.len() as size_t, ); } } /// Removes the database entry in the specific column family with timestamp for key. /// Does nothing if the key was not found. pub fn delete_cf_with_ts, S: AsRef<[u8]>>( &mut self, cf: &impl AsColumnFamilyRef, key: K, ts: S, ) { let key = key.as_ref(); let ts = ts.as_ref(); unsafe { ffi::rocksdb_writebatch_delete_cf_with_ts( self.inner, cf.inner(), key.as_ptr() as *const c_char, key.len() as size_t, ts.as_ptr() as *const c_char, ts.len() as size_t, ); } } /// Clear all updates buffered in this batch. pub fn clear(&mut self) { unsafe { ffi::rocksdb_writebatch_clear(self.inner); } } } impl WriteBatchWithTransaction { /// Remove database entries from start key to end key. /// /// Removes the database entries in the range ["begin_key", "end_key"), i.e., /// including "begin_key" and excluding "end_key". It is not an error if no /// keys exist in the range ["begin_key", "end_key"). pub fn delete_range>(&mut self, from: K, to: K) { let (start_key, end_key) = (from.as_ref(), to.as_ref()); unsafe { ffi::rocksdb_writebatch_delete_range( self.inner, start_key.as_ptr() as *const c_char, start_key.len() as size_t, end_key.as_ptr() as *const c_char, end_key.len() as size_t, ); } } /// Remove database entries in column family from start key to end key. /// /// Removes the database entries in the range ["begin_key", "end_key"), i.e., /// including "begin_key" and excluding "end_key". It is not an error if no /// keys exist in the range ["begin_key", "end_key"). pub fn delete_range_cf>(&mut self, cf: &impl AsColumnFamilyRef, from: K, to: K) { let (start_key, end_key) = (from.as_ref(), to.as_ref()); unsafe { ffi::rocksdb_writebatch_delete_range_cf( self.inner, cf.inner(), start_key.as_ptr() as *const c_char, start_key.len() as size_t, end_key.as_ptr() as *const c_char, end_key.len() as size_t, ); } } } impl Default for WriteBatchWithTransaction { fn default() -> Self { Self::new() } } impl Drop for WriteBatchWithTransaction { fn drop(&mut self) { unsafe { ffi::rocksdb_writebatch_destroy(self.inner); } } } unsafe impl Send for WriteBatchWithTransaction {} rocksdb-0.23.0/tests/fail/checkpoint_outlive_db.rs000064400000000000000000000002511046102023000203210ustar 00000000000000use rocksdb::{DB, checkpoint::Checkpoint}; fn main() { let _checkpoint = { let db = DB::open_default("foo").unwrap(); Checkpoint::new(&db) }; } rocksdb-0.23.0/tests/fail/checkpoint_outlive_db.stderr000064400000000000000000000006671046102023000212130ustar 00000000000000error[E0597]: `db` does not live long enough --> tests/fail/checkpoint_outlive_db.rs:6:25 | 4 | let _checkpoint = { | ----------- borrow later stored here 5 | let db = DB::open_default("foo").unwrap(); | -- binding `db` declared here 6 | Checkpoint::new(&db) | ^^^ borrowed value does not live long enough 7 | }; | - `db` dropped here while still borrowed rocksdb-0.23.0/tests/fail/iterator_outlive_db.rs000064400000000000000000000002451046102023000200260ustar 00000000000000use rocksdb::{IteratorMode, DB}; fn main() { let _iter = { let db = DB::open_default("foo").unwrap(); db.iterator(IteratorMode::Start) }; } rocksdb-0.23.0/tests/fail/iterator_outlive_db.stderr000064400000000000000000000007011046102023000207020ustar 00000000000000error[E0597]: `db` does not live long enough --> tests/fail/iterator_outlive_db.rs:6:9 | 4 | let _iter = { | ----- borrow later stored here 5 | let db = DB::open_default("foo").unwrap(); | -- binding `db` declared here 6 | db.iterator(IteratorMode::Start) | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ borrowed value does not live long enough 7 | }; | - `db` dropped here while still borrowed rocksdb-0.23.0/tests/fail/open_with_multiple_refs_as_single_threaded.rs000064400000000000000000000005201046102023000245670ustar 00000000000000use rocksdb::{SingleThreaded, DBWithThreadMode, Options}; fn main() { let db = DBWithThreadMode::::open_default("/path/to/dummy").unwrap(); let db_ref1 = &db; let db_ref2 = &db; let opts = Options::default(); db_ref1.create_cf("cf1", &opts).unwrap(); db_ref2.create_cf("cf2", &opts).unwrap(); } rocksdb-0.23.0/tests/fail/open_with_multiple_refs_as_single_threaded.stderr000064400000000000000000000016031046102023000254510ustar 00000000000000error[E0596]: cannot borrow `*db_ref1` as mutable, as it is behind a `&` reference --> tests/fail/open_with_multiple_refs_as_single_threaded.rs:8:5 | 8 | db_ref1.create_cf("cf1", &opts).unwrap(); | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ `db_ref1` is a `&` reference, so the data it refers to cannot be borrowed as mutable | help: consider changing this to be a mutable reference | 5 | let db_ref1 = &mut db; | +++ error[E0596]: cannot borrow `*db_ref2` as mutable, as it is behind a `&` reference --> tests/fail/open_with_multiple_refs_as_single_threaded.rs:9:5 | 9 | db_ref2.create_cf("cf2", &opts).unwrap(); | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ `db_ref2` is a `&` reference, so the data it refers to cannot be borrowed as mutable | help: consider changing this to be a mutable reference | 6 | let db_ref2 = &mut db; | +++ rocksdb-0.23.0/tests/fail/snapshot_outlive_db.rs000064400000000000000000000002061046102023000200310ustar 00000000000000use rocksdb::DB; fn main() { let _snapshot = { let db = DB::open_default("foo").unwrap(); db.snapshot() }; } rocksdb-0.23.0/tests/fail/snapshot_outlive_db.stderr000064400000000000000000000006431046102023000207150ustar 00000000000000error[E0597]: `db` does not live long enough --> tests/fail/snapshot_outlive_db.rs:6:9 | 4 | let _snapshot = { | --------- borrow later stored here 5 | let db = DB::open_default("foo").unwrap(); | -- binding `db` declared here 6 | db.snapshot() | ^^^^^^^^^^^^^ borrowed value does not live long enough 7 | }; | - `db` dropped here while still borrowed rocksdb-0.23.0/tests/fail/snapshot_outlive_transaction.rs000064400000000000000000000003401046102023000217700ustar 00000000000000use rocksdb::{TransactionDB, SingleThreaded}; fn main() { let db = TransactionDB::::open_default("foo").unwrap(); let _snapshot = { let txn = db.transaction(); txn.snapshot() }; }rocksdb-0.23.0/tests/fail/snapshot_outlive_transaction.stderr000064400000000000000000000006431046102023000226550ustar 00000000000000error[E0597]: `txn` does not live long enough --> tests/fail/snapshot_outlive_transaction.rs:7:9 | 5 | let _snapshot = { | --------- borrow later stored here 6 | let txn = db.transaction(); | --- binding `txn` declared here 7 | txn.snapshot() | ^^^^^^^^^^^^^^ borrowed value does not live long enough 8 | }; | - `txn` dropped here while still borrowed rocksdb-0.23.0/tests/fail/snapshot_outlive_transaction_db.rs000064400000000000000000000002771046102023000224460ustar 00000000000000use rocksdb::{TransactionDB, SingleThreaded}; fn main() { let _snapshot = { let db = TransactionDB::::open_default("foo").unwrap(); db.snapshot() }; }rocksdb-0.23.0/tests/fail/snapshot_outlive_transaction_db.stderr000064400000000000000000000007141046102023000233210ustar 00000000000000error[E0597]: `db` does not live long enough --> tests/fail/snapshot_outlive_transaction_db.rs:6:9 | 4 | let _snapshot = { | --------- borrow later stored here 5 | let db = TransactionDB::::open_default("foo").unwrap(); | -- binding `db` declared here 6 | db.snapshot() | ^^^^^^^^^^^^^ borrowed value does not live long enough 7 | }; | - `db` dropped here while still borrowed rocksdb-0.23.0/tests/fail/transaction_outlive_transaction_db.rs000064400000000000000000000002751046102023000231320ustar 00000000000000use rocksdb::{TransactionDB, SingleThreaded}; fn main() { let _txn = { let db = TransactionDB::::open_default("foo").unwrap(); db.transaction() }; }rocksdb-0.23.0/tests/fail/transaction_outlive_transaction_db.stderr000064400000000000000000000007131046102023000240060ustar 00000000000000error[E0597]: `db` does not live long enough --> tests/fail/transaction_outlive_transaction_db.rs:6:9 | 4 | let _txn = { | ---- borrow later stored here 5 | let db = TransactionDB::::open_default("foo").unwrap(); | -- binding `db` declared here 6 | db.transaction() | ^^^^^^^^^^^^^^^^ borrowed value does not live long enough 7 | }; | - `db` dropped here while still borrowed rocksdb-0.23.0/tests/test_backup.rs000064400000000000000000000100611046102023000153470ustar 00000000000000// Copyright 2020 Tyler Neely // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. mod util; use pretty_assertions::assert_eq; use rocksdb::{ backup::{BackupEngine, BackupEngineOptions, RestoreOptions}, Env, DB, }; use util::DBPath; #[test] fn restore_from_latest() { // create backup let path = DBPath::new("restore_from_latest_test"); let restore_path = DBPath::new("restore_from_latest_path"); { let db = DB::open_default(&path).unwrap(); assert!(db.put(b"k1", b"v1111").is_ok()); let value = db.get(b"k1"); assert_eq!(value.unwrap().unwrap(), b"v1111"); { let backup_path = DBPath::new("restore_from_latest_test_backup"); let env = Env::new().unwrap(); let backup_opts = BackupEngineOptions::new(&backup_path).unwrap(); let mut backup_engine = BackupEngine::open(&backup_opts, &env).unwrap(); assert!(backup_engine.create_new_backup(&db).is_ok()); // check backup info let info = backup_engine.get_backup_info(); assert!(!info.is_empty()); info.iter().for_each(|i| { assert!(backup_engine.verify_backup(i.backup_id).is_ok()); assert!(i.size > 0); }); let mut restore_option = RestoreOptions::default(); restore_option.set_keep_log_files(false); // true to keep log files let restore_status = backup_engine.restore_from_latest_backup( &restore_path, &restore_path, &restore_option, ); assert!(restore_status.is_ok()); let db_restore = DB::open_default(&restore_path).unwrap(); let value = db_restore.get(b"k1"); assert_eq!(value.unwrap().unwrap(), b"v1111"); } } } #[test] fn restore_from_backup() { // create backup let path = DBPath::new("restore_from_backup_test"); let restore_path = DBPath::new("restore_from_backup_path"); { let db = DB::open_default(&path).unwrap(); assert!(db.put(b"k1", b"v1111").is_ok()); let value = db.get(b"k1"); assert_eq!(value.unwrap().unwrap(), b"v1111"); { let backup_path = DBPath::new("restore_from_latest_test_backup"); let env = Env::new().unwrap(); let backup_opts = BackupEngineOptions::new(&backup_path).unwrap(); let mut backup_engine = BackupEngine::open(&backup_opts, &env).unwrap(); assert!(backup_engine.create_new_backup(&db).is_ok()); // check backup info let info = backup_engine.get_backup_info(); assert!(!info.is_empty()); info.iter().for_each(|i| { assert!(backup_engine.verify_backup(i.backup_id).is_ok()); assert!(i.size > 0); }); let backup_id = info.first().unwrap().backup_id; let mut restore_option = RestoreOptions::default(); restore_option.set_keep_log_files(false); // true to keep log files let restore_status = backup_engine.restore_from_backup( &restore_path, &restore_path, &restore_option, backup_id, ); assert!(restore_status.is_ok()); let db_restore = DB::open_default(&restore_path).unwrap(); let value = db_restore.get(b"k1"); assert_eq!(value.unwrap().unwrap(), b"v1111"); } } } fn assert_send_generic() {} #[test] fn assert_send() { assert_send_generic::(); } rocksdb-0.23.0/tests/test_checkpoint.rs000064400000000000000000000065771046102023000162520ustar 00000000000000// Copyright 2020 Tyler Neely // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. mod util; use pretty_assertions::assert_eq; use rocksdb::{checkpoint::Checkpoint, Options, DB}; use util::DBPath; #[test] pub fn test_single_checkpoint() { const PATH_PREFIX: &str = "_rust_rocksdb_cp_single_"; // Create DB with some data let db_path = DBPath::new(&format!("{PATH_PREFIX}db1")); let mut opts = Options::default(); opts.create_if_missing(true); let db = DB::open(&opts, &db_path).unwrap(); db.put(b"k1", b"v1").unwrap(); db.put(b"k2", b"v2").unwrap(); db.put(b"k3", b"v3").unwrap(); db.put(b"k4", b"v4").unwrap(); // Create checkpoint let cp1 = Checkpoint::new(&db).unwrap(); let cp1_path = DBPath::new(&format!("{PATH_PREFIX}cp1")); cp1.create_checkpoint(&cp1_path).unwrap(); // Verify checkpoint let cp = DB::open_default(&cp1_path).unwrap(); assert_eq!(cp.get(b"k1").unwrap().unwrap(), b"v1"); assert_eq!(cp.get(b"k2").unwrap().unwrap(), b"v2"); assert_eq!(cp.get(b"k3").unwrap().unwrap(), b"v3"); assert_eq!(cp.get(b"k4").unwrap().unwrap(), b"v4"); } #[test] pub fn test_multi_checkpoints() { const PATH_PREFIX: &str = "_rust_rocksdb_cp_multi_"; // Create DB with some data let db_path = DBPath::new(&format!("{PATH_PREFIX}db1")); let mut opts = Options::default(); opts.create_if_missing(true); let db = DB::open(&opts, &db_path).unwrap(); db.put(b"k1", b"v1").unwrap(); db.put(b"k2", b"v2").unwrap(); db.put(b"k3", b"v3").unwrap(); db.put(b"k4", b"v4").unwrap(); // Create first checkpoint let cp1 = Checkpoint::new(&db).unwrap(); let cp1_path = DBPath::new(&format!("{PATH_PREFIX}cp1")); cp1.create_checkpoint(&cp1_path).unwrap(); // Verify checkpoint let cp = DB::open_default(&cp1_path).unwrap(); assert_eq!(cp.get(b"k1").unwrap().unwrap(), b"v1"); assert_eq!(cp.get(b"k2").unwrap().unwrap(), b"v2"); assert_eq!(cp.get(b"k3").unwrap().unwrap(), b"v3"); assert_eq!(cp.get(b"k4").unwrap().unwrap(), b"v4"); // Change some existing keys db.put(b"k1", b"modified").unwrap(); db.put(b"k2", b"changed").unwrap(); // Add some new keys db.put(b"k5", b"v5").unwrap(); db.put(b"k6", b"v6").unwrap(); // Create another checkpoint let cp2 = Checkpoint::new(&db).unwrap(); let cp2_path = DBPath::new(&format!("{PATH_PREFIX}cp2")); cp2.create_checkpoint(&cp2_path).unwrap(); // Verify second checkpoint let cp = DB::open_default(&cp2_path).unwrap(); assert_eq!(cp.get(b"k1").unwrap().unwrap(), b"modified"); assert_eq!(cp.get(b"k2").unwrap().unwrap(), b"changed"); assert_eq!(cp.get(b"k5").unwrap().unwrap(), b"v5"); assert_eq!(cp.get(b"k6").unwrap().unwrap(), b"v6"); } #[test] fn test_checkpoint_outlive_db() { let t = trybuild::TestCases::new(); t.compile_fail("tests/fail/checkpoint_outlive_db.rs"); } rocksdb-0.23.0/tests/test_column_family.rs000064400000000000000000000407221046102023000167470ustar 00000000000000// Copyright 2020 Tyler Neely // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. mod util; use pretty_assertions::assert_eq; use rocksdb::{ColumnFamilyDescriptor, MergeOperands, Options, DB, DEFAULT_COLUMN_FAMILY_NAME}; use rocksdb::{TransactionDB, TransactionDBOptions}; use util::DBPath; use std::fs; use std::io; use std::path::Path; #[cfg(feature = "multi-threaded-cf")] use rocksdb::MultiThreaded; #[cfg(not(feature = "multi-threaded-cf"))] use rocksdb::SingleThreaded; fn dir_size(path: impl AsRef) -> io::Result { fn dir_size(mut dir: fs::ReadDir) -> io::Result { dir.try_fold(0, |acc, file| { let file = file?; let size = match file.metadata()? { data if data.is_dir() => dir_size(fs::read_dir(file.path())?)?, data => data.len(), }; Ok(acc + size) }) } dir_size(fs::read_dir(path)?) } #[test] fn test_column_family() { let n = DBPath::new("_rust_rocksdb_cftest"); // should be able to create column families { let mut opts = Options::default(); opts.create_if_missing(true); opts.set_merge_operator_associative("test operator", test_provided_merge); #[cfg(feature = "multi-threaded-cf")] let db = DB::open(&opts, &n).unwrap(); #[cfg(not(feature = "multi-threaded-cf"))] let mut db = DB::open(&opts, &n).unwrap(); let opts = Options::default(); match db.create_cf("cf1", &opts) { Ok(()) => println!("cf1 created successfully"), Err(e) => { panic!("could not create column family: {}", e); } } } // should fail to open db without specifying same column families { let mut opts = Options::default(); opts.set_merge_operator_associative("test operator", test_provided_merge); match DB::open(&opts, &n) { Ok(_db) => panic!( "should not have opened DB successfully without \ specifying column families" ), Err(e) => assert!(e.to_string().starts_with("Invalid argument")), } } // should properly open db when specifying all column families { let mut opts = Options::default(); opts.set_merge_operator_associative("test operator", test_provided_merge); match DB::open_cf(&opts, &n, ["cf1"]) { Ok(_db) => println!("successfully opened db with column family"), Err(e) => panic!("failed to open db with column family: {}", e), } } // should be able to list a cf { let opts = Options::default(); let vec = DB::list_cf(&opts, &n); match vec { Ok(vec) => assert_eq!(vec, vec![DEFAULT_COLUMN_FAMILY_NAME, "cf1"]), Err(e) => panic!("failed to drop column family: {}", e), } } // TODO should be able to use writebatch ops with a cf {} // TODO should be able to iterate over a cf {} // should be able to drop a cf { #[cfg(feature = "multi-threaded-cf")] let db = DB::open_cf(&Options::default(), &n, ["cf1"]).unwrap(); #[cfg(not(feature = "multi-threaded-cf"))] let mut db = DB::open_cf(&Options::default(), &n, ["cf1"]).unwrap(); match db.drop_cf("cf1") { Ok(_) => println!("cf1 successfully dropped."), Err(e) => panic!("failed to drop column family: {}", e), } } } #[test] fn test_column_family_with_transactiondb() { let n = DBPath::new("_rust_rocksdb_cftest"); // should be able to create column families { let mut opts = Options::default(); opts.create_if_missing(true); opts.set_merge_operator_associative("test operator", test_provided_merge); #[cfg(feature = "multi-threaded-cf")] let db = TransactionDB::open(&opts, &TransactionDBOptions::default(), &n).unwrap(); #[cfg(not(feature = "multi-threaded-cf"))] let db = TransactionDB::open(&opts, &TransactionDBOptions::default(), &n).unwrap(); let opts = Options::default(); match db.create_cf("cf1", &opts) { Ok(()) => println!("cf1 created successfully"), Err(e) => { panic!("could not create column family: {}", e); } } } // should fail to open db without specifying same column families { let mut opts = Options::default(); opts.set_merge_operator_associative("test operator", test_provided_merge); #[cfg(feature = "multi-threaded-cf")] let db = TransactionDB::::open(&opts, &TransactionDBOptions::default(), &n); #[cfg(not(feature = "multi-threaded-cf"))] let db = TransactionDB::::open(&opts, &TransactionDBOptions::default(), &n); match db { Ok(_db) => panic!( "should not have opened TransactionDB successfully without \ specifying column families" ), Err(e) => assert!(e.to_string().starts_with("Invalid argument")), } } // should properly open db when specyfing all column families { let mut opts = Options::default(); opts.set_merge_operator_associative("test operator", test_provided_merge); let cfs = &["cf1"]; #[cfg(feature = "multi-threaded-cf")] let db = TransactionDB::::open_cf( &opts, &TransactionDBOptions::default(), &n, cfs, ); #[cfg(not(feature = "multi-threaded-cf"))] let db = TransactionDB::::open_cf( &opts, &TransactionDBOptions::default(), &n, cfs, ); match db { Ok(_db) => println!("successfully opened db with column family"), Err(e) => panic!("failed to open db with column family: {}", e), } } // should be able to list a cf { let opts = Options::default(); let vec = DB::list_cf(&opts, &n); match vec { Ok(vec) => assert_eq!(vec, vec![DEFAULT_COLUMN_FAMILY_NAME, "cf1"]), Err(e) => panic!("failed to drop column family: {}", e), } } // should be able to drop a cf { let opts = Options::default(); let cfs = &["cf1"]; #[cfg(feature = "multi-threaded-cf")] let db = TransactionDB::::open_cf( &opts, &TransactionDBOptions::default(), &n, cfs, ) .unwrap(); #[cfg(not(feature = "multi-threaded-cf"))] let mut db = TransactionDB::::open_cf( &opts, &TransactionDBOptions::default(), &n, cfs, ) .unwrap(); match db.drop_cf("cf1") { Ok(_) => println!("cf1 successfully dropped."), Err(e) => panic!("failed to drop column family: {}", e), } } // should not be able to open cf after dropping. { let opts = Options::default(); let cfs = &["cf1"]; #[cfg(feature = "multi-threaded-cf")] let db = TransactionDB::::open_cf( &opts, &TransactionDBOptions::default(), &n, cfs, ); #[cfg(not(feature = "multi-threaded-cf"))] let db = TransactionDB::::open_cf( &opts, &TransactionDBOptions::default(), &n, cfs, ); assert!(db.is_err()) } } #[test] fn test_can_open_db_with_results_of_list_cf() { // Test scenario derived from GitHub issue #175 and 177 let n = DBPath::new("_rust_rocksdb_cftest_with_list_cf"); { let mut opts = Options::default(); opts.create_if_missing(true); #[cfg(feature = "multi-threaded-cf")] let db = DB::open(&opts, &n).unwrap(); #[cfg(not(feature = "multi-threaded-cf"))] let mut db = DB::open(&opts, &n).unwrap(); let opts = Options::default(); assert!(db.create_cf("cf1", &opts).is_ok()); } { let options = Options::default(); let cfs = DB::list_cf(&options, &n).unwrap(); let db = DB::open_cf(&options, &n, cfs).unwrap(); assert!(db.cf_handle("cf1").is_some()); } } #[test] fn test_create_missing_column_family() { let n = DBPath::new("_rust_rocksdb_missing_cftest"); // should be able to create new column families when opening a new database { let mut opts = Options::default(); opts.create_if_missing(true); opts.create_missing_column_families(true); match DB::open_cf(&opts, &n, ["cf1"]) { Ok(_db) => println!("successfully created new column family"), Err(e) => panic!("failed to create new column family: {}", e), } } } #[test] fn test_open_column_family_with_opts() { let n = DBPath::new("_rust_rocksdb_open_cf_with_opts"); { let mut opts = Options::default(); opts.create_if_missing(true); opts.create_missing_column_families(true); // We can use different parameters for different column family. let mut cf1_opts = Options::default(); cf1_opts.set_min_write_buffer_number(2); cf1_opts.set_min_write_buffer_number_to_merge(4); let mut cf2_opts = Options::default(); cf2_opts.set_min_write_buffer_number(5); cf2_opts.set_min_write_buffer_number_to_merge(10); let cfs = vec![("cf1", cf1_opts), ("cf2", cf2_opts)]; match DB::open_cf_with_opts(&opts, &n, cfs) { Ok(_db) => println!("successfully opened column family with the specified options"), Err(e) => panic!("failed to open cf with options: {}", e), } } } #[test] #[ignore] fn test_merge_operator() { let n = DBPath::new("_rust_rocksdb_cftest_merge"); // TODO should be able to write, read, merge, batch, and iterate over a cf { let mut opts = Options::default(); opts.set_merge_operator_associative("test operator", test_provided_merge); let db = match DB::open_cf(&opts, &n, ["cf1"]) { Ok(db) => { println!("successfully opened db with column family"); db } Err(e) => panic!("failed to open db with column family: {}", e), }; let cf1 = db.cf_handle("cf1").unwrap(); assert!(db.put_cf(&cf1, b"k1", b"v1").is_ok()); assert_eq!(db.get_cf(&cf1, b"k1").unwrap().unwrap(), b"v1"); let p = db.put_cf(&cf1, b"k1", b"a"); assert!(p.is_ok()); db.merge_cf(&cf1, b"k1", b"b").unwrap(); db.merge_cf(&cf1, b"k1", b"c").unwrap(); db.merge_cf(&cf1, b"k1", b"d").unwrap(); db.merge_cf(&cf1, b"k1", b"efg").unwrap(); let m = db.merge_cf(&cf1, b"k1", b"h"); println!("m is {m:?}"); // TODO assert!(m.is_ok()); match db.get(b"k1") { Ok(Some(value)) => match std::str::from_utf8(&value) { Ok(v) => println!("retrieved utf8 value: {v}"), Err(_) => println!("did not read valid utf-8 out of the db"), }, Err(_) => println!("error reading value"), _ => panic!("value not present!"), } let _ = db.get_cf(&cf1, b"k1"); // TODO assert!(r.unwrap().as_ref() == b"abcdefgh"); assert!(db.delete(b"k1").is_ok()); assert!(db.get(b"k1").unwrap().is_none()); } } fn test_provided_merge( _: &[u8], existing_val: Option<&[u8]>, operands: &MergeOperands, ) -> Option> { let nops = operands.len(); let mut result: Vec = Vec::with_capacity(nops); if let Some(v) = existing_val { for e in v { result.push(*e); } } for op in operands { for e in op { result.push(*e); } } Some(result) } #[test] fn test_column_family_with_options() { let n = DBPath::new("_rust_rocksdb_cf_with_optionstest"); { let mut cfopts = Options::default(); cfopts.set_max_write_buffer_number(16); let cf_descriptor = ColumnFamilyDescriptor::new("cf1", cfopts); let mut opts = Options::default(); opts.create_if_missing(true); opts.create_missing_column_families(true); let cfs = vec![cf_descriptor]; match DB::open_cf_descriptors(&opts, &n, cfs) { Ok(_db) => println!("created db with column family descriptors successfully"), Err(e) => { panic!( "could not create new database with column family descriptors: {}", e ); } } } { let mut cfopts = Options::default(); cfopts.set_max_write_buffer_number(16); let cf_descriptor = ColumnFamilyDescriptor::new("cf1", cfopts); let opts = Options::default(); let cfs = vec![cf_descriptor]; match DB::open_cf_descriptors(&opts, &n, cfs) { Ok(_db) => println!("successfully re-opened database with column family descriptors"), Err(e) => { panic!( "unable to re-open database with column family descriptors: {}", e ); } } } } #[test] fn test_create_duplicate_column_family() { let n = DBPath::new("_rust_rocksdb_create_duplicate_column_family"); { let mut opts = Options::default(); opts.create_if_missing(true); opts.create_missing_column_families(true); #[cfg(feature = "multi-threaded-cf")] let db = DB::open_cf(&opts, &n, ["cf1"]).unwrap(); #[cfg(not(feature = "multi-threaded-cf"))] let mut db = DB::open_cf(&opts, &n, ["cf1"]).unwrap(); assert!(db.create_cf("cf1", &opts).is_err()); } } #[test] fn test_no_leaked_column_family() { let n = DBPath::new("_rust_rocksdb_no_leaked_column_family"); { let mut opts = Options::default(); opts.create_if_missing(true); opts.create_missing_column_families(true); let mut write_options = rocksdb::WriteOptions::default(); write_options.set_sync(false); write_options.disable_wal(true); #[cfg(feature = "multi-threaded-cf")] let db = DB::open(&opts, &n).unwrap(); #[cfg(not(feature = "multi-threaded-cf"))] let mut db = DB::open(&opts, &n).unwrap(); #[cfg(feature = "multi-threaded-cf")] let mut outlived_cf = None; let large_blob = vec![0x20; 1024 * 1024]; // repeat creating and dropping cfs many times to indirectly detect // possible leak via large dir. for cf_index in 0..20 { let cf_name = format!("cf{cf_index}"); db.create_cf(&cf_name, &Options::default()).unwrap(); let cf = db.cf_handle(&cf_name).unwrap(); let mut batch = rocksdb::WriteBatch::default(); for key_index in 0..100 { batch.put_cf(&cf, format!("k{key_index}"), &large_blob); } db.write_opt(batch, &write_options).unwrap(); // force create an SST file db.flush_cf(&cf).unwrap(); db.drop_cf(&cf_name).unwrap(); #[cfg(feature = "multi-threaded-cf")] { outlived_cf = Some(cf); } } // if we're not leaking, the dir bytes should be well under 10M bytes in total let dir_bytes = dir_size(&n).unwrap(); let leak_msg = format!("{dir_bytes} is too large (maybe leaking...)"); assert!(dir_bytes < 10_000_000, "{}", leak_msg); // only if MultiThreaded, cf can outlive db.drop_cf() and shouldn't cause SEGV... #[cfg(feature = "multi-threaded-cf")] { let outlived_cf = outlived_cf.unwrap(); assert_eq!( &db.get_cf(&outlived_cf, "k0").unwrap().unwrap(), &large_blob ); drop(outlived_cf); } // make it explicit not to drop the db until we get dir size above... drop(db); } } rocksdb-0.23.0/tests/test_compactionfilter.rs000064400000000000000000000031071046102023000174470ustar 00000000000000// Copyright 2020 Tyler Neely // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. mod util; use pretty_assertions::assert_eq; use rocksdb::{CompactionDecision, Options, DB}; use util::DBPath; #[cfg(test)] #[allow(unused_variables)] fn test_filter(level: u32, key: &[u8], value: &[u8]) -> CompactionDecision { use self::CompactionDecision::*; match key.first() { Some(&b'_') => Remove, Some(&b'%') => Change(b"secret"), _ => Keep, } } #[test] fn compaction_filter_test() { let path = DBPath::new("_rust_rocksdb_filter_test"); let mut opts = Options::default(); opts.create_if_missing(true); opts.set_compaction_filter("test", test_filter); { let db = DB::open(&opts, &path).unwrap(); let _ = db.put(b"k1", b"a"); let _ = db.put(b"_k", b"b"); let _ = db.put(b"%k", b"c"); db.compact_range(None::<&[u8]>, None::<&[u8]>); assert_eq!(&*db.get(b"k1").unwrap().unwrap(), b"a"); assert!(db.get(b"_k").unwrap().is_none()); assert_eq!(&*db.get(b"%k").unwrap().unwrap(), b"secret"); } } rocksdb-0.23.0/tests/test_comparator.rs000064400000000000000000000233141046102023000162560ustar 00000000000000mod util; use rocksdb::{CompactOptions, Options, ReadOptions, DB}; use std::cmp::Ordering; use std::iter::FromIterator; use util::{U64Comparator, U64Timestamp}; /// This function is for ensuring test of backwards compatibility pub fn rocks_old_compare(one: &[u8], two: &[u8]) -> Ordering { one.cmp(two) } type CompareFn = dyn Fn(&[u8], &[u8]) -> Ordering; /// create database add some values, and iterate over these pub fn write_to_db_with_comparator(compare_fn: Box) -> Vec { let mut result_vec = Vec::new(); let tempdir = tempfile::Builder::new() .prefix("_path_for_rocksdb_storage") .tempdir() .expect("Failed to create temporary path for the _path_for_rocksdb_storage"); let path = tempdir.path(); { let mut db_opts = Options::default(); db_opts.create_missing_column_families(true); db_opts.create_if_missing(true); db_opts.set_comparator("cname", compare_fn); let db = DB::open(&db_opts, path).unwrap(); db.put(b"a-key", b"a-value").unwrap(); db.put(b"b-key", b"b-value").unwrap(); let mut iter = db.raw_iterator(); iter.seek_to_first(); while iter.valid() { let key = iter.key().unwrap(); // maybe not best way to copy? let key_str = key.iter().map(|b| *b as char).collect::>(); result_vec.push(String::from_iter(key_str)); iter.next(); } } let _ = DB::destroy(&Options::default(), path); result_vec } #[test] /// First verify that using a function as a comparator works as expected /// This should verify backwards compatibility /// Then run a test with a clojure where an x-variable is passed /// Keep in mind that this variable must be moved to the clojure /// Then run a test with a reverse sorting clojure and make sure the order is reverted fn test_comparator() { let local_compare = move |one: &[u8], two: &[u8]| one.cmp(two); let x = 0; let local_compare_reverse = move |one: &[u8], two: &[u8]| { println!( "Use the x value from the closure scope to do something smart: {:?}", x ); match one.cmp(two) { Ordering::Less => Ordering::Greater, Ordering::Equal => Ordering::Equal, Ordering::Greater => Ordering::Less, } }; let old_res = write_to_db_with_comparator(Box::new(rocks_old_compare)); println!("Keys in normal sort order, no closure: {:?}", old_res); assert_eq!(vec!["a-key", "b-key"], old_res); let res_closure = write_to_db_with_comparator(Box::new(local_compare)); println!("Keys in normal sort order, closure: {:?}", res_closure); assert_eq!(res_closure, old_res); let res_closure_reverse = write_to_db_with_comparator(Box::new(local_compare_reverse)); println!( "Keys in reverse sort order, closure: {:?}", res_closure_reverse ); assert_eq!(vec!["b-key", "a-key"], res_closure_reverse); } #[test] fn test_comparator_with_ts() { let tempdir = tempfile::Builder::new() .prefix("_path_for_rocksdb_storage_with_ts") .tempdir() .expect("Failed to create temporary path for the _path_for_rocksdb_storage_with_ts."); let path = tempdir.path(); let _ = DB::destroy(&Options::default(), path); { let mut db_opts = Options::default(); db_opts.create_missing_column_families(true); db_opts.create_if_missing(true); db_opts.set_comparator_with_ts( U64Comparator::NAME, U64Timestamp::SIZE, Box::new(U64Comparator::compare), Box::new(U64Comparator::compare_ts), Box::new(U64Comparator::compare_without_ts), ); let db = DB::open(&db_opts, path).unwrap(); let key = b"hello"; let val1 = b"world0"; let val2 = b"world1"; let ts = U64Timestamp::new(1); let ts2 = U64Timestamp::new(2); let ts3 = U64Timestamp::new(3); let mut opts = ReadOptions::default(); opts.set_timestamp(ts); // basic put and get db.put_with_ts(key, ts, val1).unwrap(); let value = db.get_opt(key, &opts).unwrap(); assert_eq!(value.unwrap().as_slice(), val1); // update db.put_with_ts(key, ts2, val2).unwrap(); opts.set_timestamp(ts2); let value = db.get_opt(key, &opts).unwrap(); assert_eq!(value.unwrap().as_slice(), val2); // delete db.delete_with_ts(key, ts3).unwrap(); opts.set_timestamp(ts3); let value = db.get_opt(key, &opts).unwrap(); assert!(value.is_none()); // ts2 should read deleted data opts.set_timestamp(ts2); let value = db.get_opt(key, &opts).unwrap(); assert_eq!(value.unwrap().as_slice(), val2); // ts1 should read old data opts.set_timestamp(ts); let value = db.get_opt(key, &opts).unwrap(); assert_eq!(value.unwrap().as_slice(), val1); // test iterator with ts opts.set_timestamp(ts2); let mut iter = db.raw_iterator_opt(opts); iter.seek_to_first(); let mut result_vec = Vec::new(); while iter.valid() { let key = iter.key().unwrap(); // maybe not best way to copy? let key_str = key.iter().map(|b| *b as char).collect::>(); result_vec.push(String::from_iter(key_str)); iter.next(); } assert_eq!(result_vec, ["hello"]); // test full_history_ts_low works let mut compact_opts = CompactOptions::default(); compact_opts.set_full_history_ts_low(ts2); db.compact_range_opt(None::<&[u8]>, None::<&[u8]>, &compact_opts); db.flush().unwrap(); let mut opts = ReadOptions::default(); opts.set_timestamp(ts3); let value = db.get_opt(key, &opts).unwrap(); assert_eq!(value, None); // cannot read with timestamp older than full_history_ts_low opts.set_timestamp(ts); assert!(db.get_opt(key, &opts).is_err()); } let _ = DB::destroy(&Options::default(), path); } #[test] fn test_comparator_with_column_family_with_ts() { let tempdir = tempfile::Builder::new() .prefix("_path_for_rocksdb_storage_with_column_family_with_ts") .tempdir() .expect("Failed to create temporary path for the _path_for_rocksdb_storage_with_column_family_with_ts."); let path = tempdir.path(); let _ = DB::destroy(&Options::default(), path); { let mut db_opts = Options::default(); db_opts.create_missing_column_families(true); db_opts.create_if_missing(true); let mut cf_opts = Options::default(); cf_opts.set_comparator_with_ts( U64Comparator::NAME, U64Timestamp::SIZE, Box::new(U64Comparator::compare), Box::new(U64Comparator::compare_ts), Box::new(U64Comparator::compare_without_ts), ); let cfs = vec![("cf", cf_opts)]; let db = DB::open_cf_with_opts(&db_opts, path, cfs).unwrap(); let cf = db.cf_handle("cf").unwrap(); let key = b"hello"; let val1 = b"world0"; let val2 = b"world1"; let ts = U64Timestamp::new(1); let ts2 = U64Timestamp::new(2); let ts3 = U64Timestamp::new(3); let mut opts = ReadOptions::default(); opts.set_timestamp(ts); // basic put and get db.put_cf_with_ts(&cf, key, ts, val1).unwrap(); let value = db.get_cf_opt(&cf, key, &opts).unwrap(); assert_eq!(value.unwrap().as_slice(), val1); // update db.put_cf_with_ts(&cf, key, ts2, val2).unwrap(); opts.set_timestamp(ts2); let value = db.get_cf_opt(&cf, key, &opts).unwrap(); assert_eq!(value.unwrap().as_slice(), val2); // delete db.delete_cf_with_ts(&cf, key, ts3).unwrap(); opts.set_timestamp(ts3); let value = db.get_cf_opt(&cf, key, &opts).unwrap(); assert!(value.is_none()); // ts2 should read deleted data opts.set_timestamp(ts2); let value = db.get_cf_opt(&cf, key, &opts).unwrap(); assert_eq!(value.unwrap().as_slice(), val2); // ts1 should read old data opts.set_timestamp(ts); let value = db.get_cf_opt(&cf, key, &opts).unwrap(); assert_eq!(value.unwrap().as_slice(), val1); // test iterator with ts opts.set_timestamp(ts2); let mut iter = db.raw_iterator_cf_opt(&cf, opts); iter.seek_to_first(); let mut result_vec = Vec::new(); while iter.valid() { let key = iter.key().unwrap(); // maybe not best way to copy? let key_str = key.iter().map(|b| *b as char).collect::>(); result_vec.push(String::from_iter(key_str)); iter.next(); } assert_eq!(result_vec, ["hello"]); // test full_history_ts_low works let mut compact_opts = CompactOptions::default(); compact_opts.set_full_history_ts_low(ts2); db.compact_range_cf_opt(&cf, None::<&[u8]>, None::<&[u8]>, &compact_opts); db.flush().unwrap(); // Attempt to read `full_history_ts_low`. // It should match the value we set earlier (`ts2`). let full_history_ts_low = db.get_full_history_ts_low(&cf).unwrap(); assert_eq!(U64Timestamp::from(full_history_ts_low.as_slice()), ts2); let mut opts = ReadOptions::default(); opts.set_timestamp(ts3); let value = db.get_cf_opt(&cf, key, &opts).unwrap(); assert_eq!(value, None); // cannot read with timestamp older than full_history_ts_low opts.set_timestamp(ts); assert!(db.get_cf_opt(&cf, key, &opts).is_err()); } let _ = DB::destroy(&Options::default(), path); } rocksdb-0.23.0/tests/test_db.rs000064400000000000000000001565641046102023000145120ustar 00000000000000// Copyright 2020 Tyler Neely // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. mod util; use std::convert::TryInto; use std::sync::atomic::{AtomicUsize, Ordering}; use std::{mem, sync::Arc, thread, time::Duration}; use pretty_assertions::assert_eq; use rocksdb::statistics::{Histogram, StatsLevel, Ticker}; use rocksdb::{ perf::get_memory_usage_stats, BlockBasedOptions, BottommostLevelCompaction, Cache, ColumnFamilyDescriptor, ColumnFamilyTtl, CompactOptions, CuckooTableOptions, DBAccess, DBCompactionStyle, DBWithThreadMode, Env, Error, ErrorKind, FifoCompactOptions, IteratorMode, MultiThreaded, Options, PerfContext, PerfMetric, ReadOptions, SingleThreaded, SliceTransform, Snapshot, UniversalCompactOptions, UniversalCompactionStopStyle, WaitForCompactOptions, WriteBatch, DB, DEFAULT_COLUMN_FAMILY_NAME, }; use util::{assert_iter, pair, DBPath, U64Comparator, U64Timestamp}; #[test] fn external() { let path = DBPath::new("_rust_rocksdb_externaltest"); { let db = DB::open_default(&path).unwrap(); assert!(db.put(b"k1", b"v1111").is_ok()); let r: Result>, Error> = db.get(b"k1"); assert_eq!(r.unwrap().unwrap(), b"v1111"); assert!(db.delete(b"k1").is_ok()); assert!(db.get(b"k1").unwrap().is_none()); } } #[test] fn db_vector_as_ref_byte_slice() { let path = DBPath::new("_rust_rocksdb_db_vector_as_ref_byte_slice"); { let db = DB::open_default(&path).unwrap(); assert!(db.put(b"k1", b"v1111").is_ok()); let result = db.get(b"k1").unwrap().unwrap(); assert_eq!(get_byte_slice(&result), b"v1111"); } } fn get_byte_slice>(source: &'_ T) -> &'_ [u8] { source.as_ref() } #[test] fn errors_do_stuff() { let path = DBPath::new("_rust_rocksdb_error"); let _db = DB::open_default(&path).unwrap(); let opts = Options::default(); // The DB will still be open when we try to destroy it and the lock should fail. match DB::destroy(&opts, &path) { Err(s) => { let message = s.to_string(); assert_eq!(s.kind(), ErrorKind::IOError); assert!(message.contains("_rust_rocksdb_error")); assert!(message.contains("/LOCK:")); } Ok(_) => panic!("should fail"), } } #[test] fn writebatch_works() { let path = DBPath::new("_rust_rocksdb_writebacktest"); { let db = DB::open_default(&path).unwrap(); { // test put let mut batch = WriteBatch::default(); assert!(db.get(b"k1").unwrap().is_none()); assert_eq!(batch.len(), 0); assert!(batch.is_empty()); batch.put(b"k1", b"v1111"); batch.put(b"k2", b"v2222"); batch.put(b"k3", b"v3333"); assert_eq!(batch.len(), 3); assert!(!batch.is_empty()); assert!(db.get(b"k1").unwrap().is_none()); let p = db.write(batch); assert!(p.is_ok()); let r: Result>, Error> = db.get(b"k1"); assert_eq!(r.unwrap().unwrap(), b"v1111"); } { // test delete let mut batch = WriteBatch::default(); batch.delete(b"k1"); assert_eq!(batch.len(), 1); assert!(!batch.is_empty()); let p = db.write(batch); assert!(p.is_ok()); assert!(db.get(b"k1").unwrap().is_none()); } { // test delete_range let mut batch = WriteBatch::default(); batch.delete_range(b"k2", b"k4"); assert_eq!(batch.len(), 1); assert!(!batch.is_empty()); let p = db.write(batch); assert!(p.is_ok()); assert!(db.get(b"k2").unwrap().is_none()); assert!(db.get(b"k3").unwrap().is_none()); } { // test size_in_bytes let mut batch = WriteBatch::default(); let before = batch.size_in_bytes(); batch.put(b"k1", b"v1234567890"); let after = batch.size_in_bytes(); assert!(before + 10 <= after); } } } #[test] fn iterator_test() { let path = DBPath::new("_rust_rocksdb_iteratortest"); { let data = [(b"k1", b"v1111"), (b"k2", b"v2222"), (b"k3", b"v3333")]; let db = DB::open_default(&path).unwrap(); for (key, value) in &data { assert!(db.put(key, value).is_ok()); } let iter = db.iterator(IteratorMode::Start); for (idx, (db_key, db_value)) in iter.map(Result::unwrap).enumerate() { let (key, value) = data[idx]; assert_eq!((&key[..], &value[..]), (db_key.as_ref(), db_value.as_ref())); } } } #[test] fn iterator_test_past_end() { let path = DBPath::new("_rust_rocksdb_iteratortest_past_end"); { let db = DB::open_default(&path).unwrap(); db.put(b"k1", b"v1111").unwrap(); let mut iter = db.iterator(IteratorMode::Start); assert!(iter.next().is_some()); assert!(iter.next().is_none()); assert!(iter.next().is_none()); } } #[test] fn iterator_test_tailing() { let path = DBPath::new("_rust_rocksdb_iteratortest_tailing"); { let data = [(b"k1", b"v1"), (b"k2", b"v2"), (b"k3", b"v3")]; let mut ro = ReadOptions::default(); ro.set_tailing(true); let db = DB::open_default(&path).unwrap(); let mut data_iter = data.iter(); let (k, v) = data_iter.next().unwrap(); let r = db.put(k, v); assert!(r.is_ok()); let tail_iter = db.iterator_opt(IteratorMode::Start, ro); for (k, v) in data_iter { let r = db.put(k, v); assert!(r.is_ok()); } let mut tot = 0; for (i, (k, v)) in tail_iter.map(Result::unwrap).enumerate() { assert_eq!( (k.to_vec(), v.to_vec()), (data[i].0.to_vec(), data[i].1.to_vec()) ); tot += 1; } assert_eq!(tot, data.len()); } } #[test] fn iterator_test_upper_bound() { let path = DBPath::new("_rust_rocksdb_iteratortest_upper_bound"); { let db = DB::open_default(&path).unwrap(); db.put(b"k1", b"v1").unwrap(); db.put(b"k2", b"v2").unwrap(); db.put(b"k3", b"v3").unwrap(); db.put(b"k4", b"v4").unwrap(); db.put(b"k5", b"v5").unwrap(); let mut readopts = ReadOptions::default(); readopts.set_iterate_upper_bound(b"k4".to_vec()); assert_iter( db.iterator_opt(IteratorMode::Start, readopts), &[pair(b"k1", b"v1"), pair(b"k2", b"v2"), pair(b"k3", b"v3")], ); } } #[test] fn iterator_test_lower_bound() { let path = DBPath::new("_rust_rocksdb_iteratortest_lower_bound"); { let db = DB::open_default(&path).unwrap(); db.put(b"k1", b"v1").unwrap(); db.put(b"k2", b"v2").unwrap(); db.put(b"k3", b"v3").unwrap(); db.put(b"k4", b"v4").unwrap(); db.put(b"k5", b"v5").unwrap(); let mut readopts = ReadOptions::default(); readopts.set_iterate_lower_bound(b"k4".to_vec()); assert_iter( db.iterator_opt(IteratorMode::Start, readopts), &[pair(b"k4", b"v4"), pair(b"k5", b"v5")], ); } } #[test] fn snapshot_test() { let path = DBPath::new("_rust_rocksdb_snapshottest"); { let db = DB::open_default(&path).unwrap(); assert!(db.put(b"k1", b"v1111").is_ok()); let snap = db.snapshot(); assert_eq!(snap.get(b"k1").unwrap().unwrap(), b"v1111"); assert!(db.put(b"k2", b"v2222").is_ok()); assert!(db.get(b"k2").unwrap().is_some()); assert!(snap.get(b"k2").unwrap().is_none()); } } #[derive(Clone)] struct SnapshotWrapper { snapshot: Arc>, } impl SnapshotWrapper { fn new(db: &DB) -> Self { Self { snapshot: Arc::new(unsafe { mem::transmute(db.snapshot()) }), } } fn check(&self, key: K, value: &[u8]) -> bool where K: AsRef<[u8]>, { self.snapshot.get(key).unwrap().unwrap() == value } } #[test] fn sync_snapshot_test() { let path = DBPath::new("_rust_rocksdb_sync_snapshottest"); let db = DB::open_default(&path).unwrap(); assert!(db.put(b"k1", b"v1").is_ok()); assert!(db.put(b"k2", b"v2").is_ok()); let wrapper = SnapshotWrapper::new(&db); let wrapper_1 = wrapper.clone(); let handler_1 = thread::spawn(move || wrapper_1.check("k1", b"v1")); let handler_2 = thread::spawn(move || wrapper.check("k2", b"v2")); assert!(handler_1.join().unwrap()); assert!(handler_2.join().unwrap()); } #[test] fn set_option_test() { let path = DBPath::new("_rust_rocksdb_set_optionstest"); { let db = DB::open_default(&path).unwrap(); // set an option to valid values assert!(db .set_options(&[("disable_auto_compactions", "true")]) .is_ok()); assert!(db .set_options(&[("disable_auto_compactions", "false")]) .is_ok()); // invalid names/values should result in an error assert!(db .set_options(&[("disable_auto_compactions", "INVALID_VALUE")]) .is_err()); assert!(db .set_options(&[("INVALID_NAME", "INVALID_VALUE")]) .is_err()); // option names/values must not contain NULLs assert!(db .set_options(&[("disable_auto_compactions", "true\0")]) .is_err()); assert!(db .set_options(&[("disable_auto_compactions\0", "true")]) .is_err()); // empty options are not allowed assert!(db.set_options(&[]).is_err()); // multiple options can be set in a single API call let multiple_options = [ ("paranoid_file_checks", "true"), ("report_bg_io_stats", "true"), ]; db.set_options(&multiple_options).unwrap(); } } #[test] fn set_option_cf_test() { let path = DBPath::new("_rust_rocksdb_set_options_cftest"); { let mut opts = Options::default(); opts.create_if_missing(true); opts.create_missing_column_families(true); let db = DB::open_cf(&opts, &path, vec!["cf1"]).unwrap(); let cf = db.cf_handle("cf1").unwrap(); // set an option to valid values assert!(db .set_options_cf(&cf, &[("disable_auto_compactions", "true")]) .is_ok()); assert!(db .set_options_cf(&cf, &[("disable_auto_compactions", "false")]) .is_ok()); // invalid names/values should result in an error assert!(db .set_options_cf(&cf, &[("disable_auto_compactions", "INVALID_VALUE")]) .is_err()); assert!(db .set_options_cf(&cf, &[("INVALID_NAME", "INVALID_VALUE")]) .is_err()); // option names/values must not contain NULLs assert!(db .set_options_cf(&cf, &[("disable_auto_compactions", "true\0")]) .is_err()); assert!(db .set_options_cf(&cf, &[("disable_auto_compactions\0", "true")]) .is_err()); // empty options are not allowed assert!(db.set_options_cf(&cf, &[]).is_err()); // multiple options can be set in a single API call let multiple_options = [ ("paranoid_file_checks", "true"), ("report_bg_io_stats", "true"), ]; db.set_options(&multiple_options).unwrap(); } } #[test] fn get_statistics_test() { let path = DBPath::new("_rust_rocksdb_get_statisticstest"); { let mut opts = Options::default(); opts.create_if_missing(true); opts.create_missing_column_families(true); opts.enable_statistics(); opts.set_statistics_level(StatsLevel::All); let db = DB::open_cf(&opts, &path, vec!["cf1"]).unwrap(); let cf = db.cf_handle("cf1").unwrap(); let initial_bytes_written = opts.get_ticker_count(Ticker::BytesWritten); db.put_cf(&cf, b"key1", b"value").unwrap(); db.put_cf(&cf, b"key2", b"value").unwrap(); db.put_cf(&cf, b"key3", b"value").unwrap(); db.flush_cf(&cf).unwrap(); assert!(opts.get_ticker_count(Ticker::BytesWritten) > 0); // We should see some counters increased assert!(opts.get_ticker_count(Ticker::BytesWritten) > initial_bytes_written); let histogram_data = opts.get_histogram_data(Histogram::DbWrite); assert!(histogram_data.count() > 0); assert!(histogram_data.max().is_normal()); } } #[test] fn set_column_family_metadata_test() { let path = DBPath::new("_set_column_family_metadata_test"); { let mut opts = Options::default(); opts.create_if_missing(true); opts.create_missing_column_families(true); let db = DB::open_cf(&opts, &path, vec![DEFAULT_COLUMN_FAMILY_NAME, "cf2"]).unwrap(); let cf1 = db.cf_handle(DEFAULT_COLUMN_FAMILY_NAME).unwrap(); db.put_cf(&cf1, b"key1", b"value").unwrap(); let cf2 = db.cf_handle("cf2").unwrap(); db.put_cf(&cf2, b"key1", b"value").unwrap(); db.put_cf(&cf2, b"key2", b"value").unwrap(); db.put_cf(&cf2, b"key3", b"value").unwrap(); db.flush_cf(&cf1).unwrap(); db.flush_cf(&cf2).unwrap(); let default_cf_metadata = db.get_column_family_metadata(); assert_eq!(default_cf_metadata.size > 150, true); assert_eq!(default_cf_metadata.file_count, 1); let cf2_metadata = db.get_column_family_metadata_cf(&cf2); assert_eq!(cf2_metadata.size > default_cf_metadata.size, true); assert_eq!(cf2_metadata.file_count, 1); } } #[test] fn test_sequence_number() { let path = DBPath::new("_rust_rocksdb_test_sequence_number"); { let db = DB::open_default(&path).unwrap(); assert_eq!(db.latest_sequence_number(), 0); let _ = db.put(b"key", b"value"); assert_eq!(db.latest_sequence_number(), 1); } } struct OperationCounts { puts: usize, deletes: usize, } impl rocksdb::WriteBatchIterator for OperationCounts { fn put(&mut self, _key: Box<[u8]>, _value: Box<[u8]>) { self.puts += 1; } fn delete(&mut self, _key: Box<[u8]>) { self.deletes += 1; } } #[test] fn test_get_updates_since_empty() { let path = DBPath::new("_rust_rocksdb_test_get_updates_since_empty"); let db = DB::open_default(&path).unwrap(); // get_updates_since() on an empty database let mut iter = db.get_updates_since(0).unwrap(); assert!(iter.next().is_none()); } #[test] fn test_get_updates_since_start() { let path = DBPath::new("_rust_rocksdb_test_test_get_updates_since_start"); let db = DB::open_default(&path).unwrap(); // add some records and collect sequence numbers, // verify 4 batches of 1 put each were done let seq0 = db.latest_sequence_number(); db.put(b"key1", b"value1").unwrap(); db.put(b"key2", b"value2").unwrap(); db.put(b"key3", b"value3").unwrap(); db.put(b"key4", b"value4").unwrap(); let mut iter = db.get_updates_since(seq0).unwrap(); let mut counts = OperationCounts { puts: 0, deletes: 0, }; let (seq, batch) = iter.next().unwrap().unwrap(); assert_eq!(seq, 1); batch.iterate(&mut counts); let (seq, batch) = iter.next().unwrap().unwrap(); assert_eq!(seq, 2); batch.iterate(&mut counts); let (seq, batch) = iter.next().unwrap().unwrap(); assert_eq!(seq, 3); batch.iterate(&mut counts); let (seq, batch) = iter.next().unwrap().unwrap(); assert_eq!(seq, 4); batch.iterate(&mut counts); assert!(iter.next().is_none()); assert_eq!(counts.puts, 4); assert_eq!(counts.deletes, 0); } #[test] fn test_get_updates_since_multiple_batches() { let path = DBPath::new("_rust_rocksdb_test_get_updates_since_multiple_batches"); let db = DB::open_default(&path).unwrap(); // add some records and collect sequence numbers, // verify 3 batches of 1 put each were done db.put(b"key1", b"value1").unwrap(); let seq1 = db.latest_sequence_number(); db.put(b"key2", b"value2").unwrap(); db.put(b"key3", b"value3").unwrap(); db.put(b"key4", b"value4").unwrap(); let mut iter = db.get_updates_since(seq1).unwrap(); let mut counts = OperationCounts { puts: 0, deletes: 0, }; let (seq, batch) = iter.next().unwrap().unwrap(); assert_eq!(seq, 2); batch.iterate(&mut counts); let (seq, batch) = iter.next().unwrap().unwrap(); assert_eq!(seq, 3); batch.iterate(&mut counts); let (seq, batch) = iter.next().unwrap().unwrap(); assert_eq!(seq, 4); batch.iterate(&mut counts); assert!(iter.next().is_none()); assert_eq!(counts.puts, 3); assert_eq!(counts.deletes, 0); } #[test] fn test_get_updates_since_one_batch() { let batches = [WriteBatch::default(), WriteBatch::with_capacity_bytes(13)]; for mut batch in batches { let path = DBPath::new("_rust_rocksdb_test_get_updates_since_one_batch"); let db = DB::open_default(&path).unwrap(); db.put(b"key2", b"value2").unwrap(); // some puts and deletes in a single batch, // verify 1 put and 1 delete were done let seq1 = db.latest_sequence_number(); assert_eq!(seq1, 1); batch.put(b"key1", b"value1"); batch.delete(b"key2"); db.write(batch).unwrap(); assert_eq!(db.latest_sequence_number(), 3); let mut iter = db.get_updates_since(seq1).unwrap(); let mut counts = OperationCounts { puts: 0, deletes: 0, }; let (seq, batch) = iter.next().unwrap().unwrap(); assert_eq!(seq, 2); batch.iterate(&mut counts); assert!(iter.next().is_none()); assert_eq!(counts.puts, 1); assert_eq!(counts.deletes, 1); } } #[test] fn test_get_updates_since_batches() { let path = DBPath::new("_rust_rocksdb_test_get_updates_since_one_batch"); let db = DB::open_default(&path).unwrap(); db.put(b"key2", b"value2").unwrap(); assert_eq!(db.latest_sequence_number(), 1); let mut batch = WriteBatch::default(); batch.put(b"key1", b"value1"); batch.delete(b"key2"); db.write(batch).unwrap(); let seq2 = db.latest_sequence_number(); assert_eq!(seq2, 3); let mut batch = WriteBatch::default(); batch.put(b"key3", b"value1"); batch.put(b"key4", b"value1"); db.write(batch).unwrap(); assert_eq!(db.latest_sequence_number(), 5); let mut iter = db.get_updates_since(seq2).unwrap(); let mut counts = OperationCounts { puts: 0, deletes: 0, }; // Verify we get the 2nd batch with 2 puts back and not the first let (seq, batch) = iter.next().unwrap().unwrap(); assert_eq!(seq, 4); batch.iterate(&mut counts); assert!(iter.next().is_none()); assert_eq!(counts.puts, 2); assert_eq!(counts.deletes, 0); } #[test] fn test_get_updates_since_nothing() { let path = DBPath::new("_rust_rocksdb_test_get_updates_since_nothing"); let db = DB::open_default(&path).unwrap(); // get_updates_since() with no new changes db.put(b"key1", b"value1").unwrap(); let seq1 = db.latest_sequence_number(); let mut iter = db.get_updates_since(seq1).unwrap(); assert!(iter.next().is_none()); } #[test] fn test_get_updates_since_out_of_range() { let path = DBPath::new("_rust_rocksdb_test_get_updates_since_out_of_range"); let db = DB::open_default(&path).unwrap(); db.put(b"key1", b"value1").unwrap(); // get_updates_since() with an out of bounds sequence number let result = db.get_updates_since(1000); assert!(result.is_err()); } #[test] fn test_open_as_secondary() { let primary_path = DBPath::new("_rust_rocksdb_test_open_as_secondary_primary"); let db = DB::open_default(&primary_path).unwrap(); db.put(b"key1", b"value1").unwrap(); let mut opts = Options::default(); opts.set_max_open_files(-1); let secondary_path = DBPath::new("_rust_rocksdb_test_open_as_secondary_secondary"); let secondary = DB::open_as_secondary(&opts, &primary_path, &secondary_path).unwrap(); let result = secondary.get(b"key1").unwrap().unwrap(); assert_eq!(get_byte_slice(&result), b"value1"); db.put(b"key1", b"value2").unwrap(); assert!(secondary.try_catch_up_with_primary().is_ok()); let result = secondary.get(b"key1").unwrap().unwrap(); assert_eq!(get_byte_slice(&result), b"value2"); } #[test] fn test_open_cf_descriptors_as_secondary() { let primary_path = DBPath::new("_rust_rocksdb_test_open_cf_descriptors_as_secondary_primary"); let mut primary_opts = Options::default(); primary_opts.create_if_missing(true); primary_opts.create_missing_column_families(true); let cfs = vec!["cf1"]; let primary_db = DB::open_cf(&primary_opts, &primary_path, &cfs).unwrap(); let primary_cf1 = primary_db.cf_handle("cf1").unwrap(); primary_db.put_cf(&primary_cf1, b"k1", b"v1").unwrap(); let secondary_path = DBPath::new("_rust_rocksdb_test_open_cf_descriptors_as_secondary_secondary"); let mut secondary_opts = Options::default(); secondary_opts.set_max_open_files(-1); let cfs = cfs .into_iter() .map(|name| ColumnFamilyDescriptor::new(name, Options::default())); let secondary_db = DB::open_cf_descriptors_as_secondary(&secondary_opts, &primary_path, &secondary_path, cfs) .unwrap(); let secondary_cf1 = secondary_db.cf_handle("cf1").unwrap(); assert_eq!( secondary_db.get_cf(&secondary_cf1, b"k1").unwrap().unwrap(), b"v1" ); assert!(secondary_db.put_cf(&secondary_cf1, b"k2", b"v2").is_err()); primary_db.put_cf(&primary_cf1, b"k1", b"v2").unwrap(); assert_eq!( secondary_db.get_cf(&secondary_cf1, b"k1").unwrap().unwrap(), b"v1" ); assert!(secondary_db.try_catch_up_with_primary().is_ok()); assert_eq!( secondary_db.get_cf(&secondary_cf1, b"k1").unwrap().unwrap(), b"v2" ); } #[test] fn test_open_with_ttl() { let path = DBPath::new("_rust_rocksdb_test_open_with_ttl"); let mut opts = Options::default(); opts.create_if_missing(true); let db = DB::open_with_ttl(&opts, &path, Duration::from_secs(1)).unwrap(); db.put(b"key1", b"value1").unwrap(); thread::sleep(Duration::from_secs(2)); // Trigger a manual compaction, this will check the TTL filter // in the database and drop all expired entries. db.compact_range(None::<&[u8]>, None::<&[u8]>); assert!(db.get(b"key1").unwrap().is_none()); } #[test] fn test_ttl_mix() { let path = DBPath::new("_rust_rocksdb_test_open_with_ttl_mix"); let mut opts = Options::default(); opts.create_if_missing(true); opts.create_missing_column_families(true); let cf1 = ColumnFamilyDescriptor::new_with_ttl( "ttl_1", Options::default(), ColumnFamilyTtl::Duration(Duration::from_secs(1)), ); let no_ttl = ColumnFamilyDescriptor::new_with_ttl( "no_ttl", Options::default(), ColumnFamilyTtl::Disabled, ); let db = DB::open_cf_descriptors_with_ttl(&opts, &path, [cf1, no_ttl], Duration::from_secs(1)) .unwrap(); db.put(b"key1", b"value1").unwrap(); let cf1 = db.cf_handle("ttl_1").unwrap(); let no_ttl = db.cf_handle("no_ttl").unwrap(); db.put_cf(&cf1, b"key2", b"value2").unwrap(); db.put_cf(&no_ttl, b"key3", b"value3").unwrap(); thread::sleep(Duration::from_secs(2)); // Trigger a manual compaction, this will check the TTL filter // in the database and drop all expired entries. db.compact_range(None::<&[u8]>, None::<&[u8]>); db.compact_range_cf(&cf1, None::<&[u8]>, None::<&[u8]>); db.compact_range_cf(&no_ttl, None::<&[u8]>, None::<&[u8]>); assert!(db.get(b"key1").unwrap().is_none()); assert!(db.get_cf(&cf1, b"key2").unwrap().is_none()); assert!(db.get_cf(&no_ttl, b"key3").unwrap().is_some()); } #[test] fn test_open_cf_with_ttl() { let path = DBPath::new("_rust_rocksdb_test_open_cf_with_ttl"); let mut opts = Options::default(); opts.create_if_missing(true); opts.create_missing_column_families(true); let db = DB::open_cf_with_ttl(&opts, &path, ["test_cf"], Duration::from_secs(1)).unwrap(); let cf = db.cf_handle("test_cf").unwrap(); db.put_cf(&cf, b"key1", b"value1").unwrap(); thread::sleep(Duration::from_secs(2)); // Trigger a manual compaction, this will check the TTL filter // in the database and drop all expired entries. db.compact_range_cf(&cf, None::<&[u8]>, None::<&[u8]>); assert!(db.get_cf(&cf, b"key1").unwrap().is_none()); } #[test] fn test_open_as_single_threaded() { let primary_path = DBPath::new("_rust_rocksdb_test_open_as_single_threaded"); let mut db = DBWithThreadMode::::open_default(&primary_path).unwrap(); let db_ref1 = &mut db; let opts = Options::default(); db_ref1.create_cf("cf1", &opts).unwrap(); } #[test] fn test_open_with_multiple_refs_as_multi_threaded() { // This tests multiple references can be allowed while creating column families let primary_path = DBPath::new("_rust_rocksdb_test_open_as_multi_threaded"); let db = DBWithThreadMode::::open_default(&primary_path).unwrap(); let db_ref1 = &db; let db_ref2 = &db; let opts = Options::default(); db_ref1.create_cf("cf1", &opts).unwrap(); db_ref2.create_cf("cf2", &opts).unwrap(); } #[test] fn test_open_with_multiple_refs_as_single_threaded() { // This tests multiple references CANNOT be allowed while creating column families let t = trybuild::TestCases::new(); t.compile_fail("tests/fail/open_with_multiple_refs_as_single_threaded.rs"); } #[test] fn test_open_utf8_path() { let path = DBPath::new("_rust_rocksdb_utf8_path_temporärer_Ordner"); { let db = DB::open_default(&path).unwrap(); assert!(db.put(b"k1", b"v1111").is_ok()); let r: Result>, Error> = db.get(b"k1"); assert_eq!(r.unwrap().unwrap(), b"v1111"); assert!(db.delete(b"k1").is_ok()); assert!(db.get(b"k1").unwrap().is_none()); } } #[test] fn compact_range_test() { let path = DBPath::new("_rust_rocksdb_compact_range_test"); { let mut opts = Options::default(); opts.create_if_missing(true); opts.create_missing_column_families(true); // set compaction style { let mut uni_co_opts = UniversalCompactOptions::default(); uni_co_opts.set_size_ratio(2); uni_co_opts.set_stop_style(UniversalCompactionStopStyle::Total); opts.set_compaction_style(DBCompactionStyle::Universal); opts.set_universal_compaction_options(&uni_co_opts); } // set compaction options let mut compact_opts = CompactOptions::default(); compact_opts.set_exclusive_manual_compaction(true); compact_opts.set_target_level(1); compact_opts.set_change_level(true); compact_opts.set_bottommost_level_compaction(BottommostLevelCompaction::ForceOptimized); // put and compact column family cf1 let cfs = vec!["cf1"]; let db = DB::open_cf(&opts, &path, cfs).unwrap(); let cf1 = db.cf_handle("cf1").unwrap(); db.put_cf(&cf1, b"k1", b"v1").unwrap(); db.put_cf(&cf1, b"k2", b"v2").unwrap(); db.put_cf(&cf1, b"k3", b"v3").unwrap(); db.put_cf(&cf1, b"k4", b"v4").unwrap(); db.put_cf(&cf1, b"k5", b"v5").unwrap(); db.compact_range_cf(&cf1, Some(b"k2"), Some(b"k4")); db.compact_range_cf_opt(&cf1, Some(b"k1"), None::<&str>, &compact_opts); // put and compact default column family db.put(b"k1", b"v1").unwrap(); db.put(b"k2", b"v2").unwrap(); db.put(b"k3", b"v3").unwrap(); db.put(b"k4", b"v4").unwrap(); db.put(b"k5", b"v5").unwrap(); db.compact_range(Some(b"k3"), None::<&str>); db.compact_range_opt(None::<&str>, Some(b"k5"), &compact_opts); } } #[test] fn fifo_compaction_test() { let path = DBPath::new("_rust_rocksdb_fifo_compaction_test"); { let mut opts = Options::default(); opts.create_if_missing(true); opts.create_missing_column_families(true); opts.set_level_compaction_dynamic_level_bytes(false); // set compaction style { let mut fifo_co_opts = FifoCompactOptions::default(); fifo_co_opts.set_max_table_files_size(4 << 10); // 4KB opts.set_compaction_style(DBCompactionStyle::Fifo); opts.set_fifo_compaction_options(&fifo_co_opts); } // put and compact column family cf1 let cfs = vec!["cf1"]; let db = DB::open_cf(&opts, &path, cfs).unwrap(); let cf1 = db.cf_handle("cf1").unwrap(); db.put_cf(&cf1, b"k1", b"v1").unwrap(); db.put_cf(&cf1, b"k2", b"v2").unwrap(); db.put_cf(&cf1, b"k3", b"v3").unwrap(); db.put_cf(&cf1, b"k4", b"v4").unwrap(); db.put_cf(&cf1, b"k5", b"v5").unwrap(); db.compact_range_cf(&cf1, Some(b"k2"), Some(b"k4")); // check stats let ctx = PerfContext::default(); let block_cache_hit_count = ctx.metric(PerfMetric::BlockCacheHitCount); if block_cache_hit_count > 0 { let expect = format!("block_cache_hit_count = {block_cache_hit_count}"); assert!(ctx.report(true).contains(&expect)); } // check live files (sst files meta) let livefiles = db.live_files().unwrap(); assert_eq!(livefiles.len(), 1); livefiles.iter().for_each(|f| { assert_eq!(f.level, 6); assert_eq!(f.column_family_name, "cf1"); assert!(!f.name.is_empty()); assert_eq!(f.start_key.as_ref().unwrap().as_slice(), "k1".as_bytes()); assert_eq!(f.end_key.as_ref().unwrap().as_slice(), "k5".as_bytes()); assert_eq!(f.num_entries, 5); assert_eq!(f.num_deletions, 0); }); } } #[test] fn wait_for_compact_test() { let path = DBPath::new("_rust_rocksdb_wait_for_compact_test"); { let mut opts = Options::default(); opts.create_if_missing(true); opts.create_missing_column_families(true); // set wait for compact options let mut wait_for_compact_opts: WaitForCompactOptions = WaitForCompactOptions::default(); wait_for_compact_opts.set_abort_on_pause(false); wait_for_compact_opts.set_flush(true); let cfs = vec!["cf1"]; let db = DB::open_cf(&opts, &path, cfs).unwrap(); let cf1 = db.cf_handle("cf1").unwrap(); db.put_cf(&cf1, b"k1", b"v1").unwrap(); db.put_cf(&cf1, b"k2", b"v2").unwrap(); db.put_cf(&cf1, b"k3", b"v3").unwrap(); db.put_cf(&cf1, b"k4", b"v4").unwrap(); db.put_cf(&cf1, b"k5", b"v5").unwrap(); db.put(b"k1", b"v1").unwrap(); db.put(b"k2", b"v2").unwrap(); db.put(b"k3", b"v3").unwrap(); db.put(b"k4", b"v4").unwrap(); db.put(b"k5", b"v5").unwrap(); db.wait_for_compact(&wait_for_compact_opts).unwrap() } } #[test] fn env_and_dbpaths_test() { let path = DBPath::new("_rust_rocksdb_dbpath_test"); let path1 = DBPath::new("_rust_rocksdb_dbpath_test_1"); let path2 = DBPath::new("_rust_rocksdb_dbpath_test_2"); { let mut opts = Options::default(); opts.create_if_missing(true); opts.create_missing_column_families(true); { let mut env = Env::new().unwrap(); env.lower_high_priority_thread_pool_cpu_priority(); opts.set_env(&env); } { let paths = vec![ rocksdb::DBPath::new(&path1, 20 << 20).unwrap(), rocksdb::DBPath::new(&path2, 30 << 20).unwrap(), ]; opts.set_db_paths(&paths); } let db = DB::open(&opts, &path).unwrap(); db.put(b"k1", b"v1").unwrap(); assert_eq!(db.get(b"k1").unwrap().unwrap(), b"v1"); } } #[test] fn prefix_extract_and_iterate_test() { let path = DBPath::new("_rust_rocksdb_prefix_extract_and_iterate"); { let mut opts = Options::default(); opts.create_if_missing(true); opts.create_missing_column_families(true); opts.set_prefix_extractor(SliceTransform::create_fixed_prefix(2)); let db = DB::open(&opts, &path).unwrap(); db.put(b"p1_k1", b"v1").unwrap(); db.put(b"p2_k2", b"v2").unwrap(); db.put(b"p1_k3", b"v3").unwrap(); db.put(b"p1_k4", b"v4").unwrap(); db.put(b"p2_k5", b"v5").unwrap(); let mut readopts = ReadOptions::default(); readopts.set_prefix_same_as_start(true); readopts.set_iterate_lower_bound(b"p1".to_vec()); readopts.set_pin_data(true); assert_iter( db.iterator_opt(IteratorMode::Start, readopts), &[ pair(b"p1_k1", b"v1"), pair(b"p1_k3", b"v3"), pair(b"p1_k4", b"v4"), ], ); } } #[test] fn get_with_cache_and_bulkload_test() { let batches = [ WriteBatch::default(), WriteBatch::with_capacity_bytes(13), WriteBatch::with_capacity_bytes(100_000), ]; for mut batch in batches { let path = DBPath::new("_rust_rocksdb_get_with_cache_and_bulkload_test"); let log_path = DBPath::new("_rust_rocksdb_log_path_test"); // create options let mut opts = Options::default(); opts.create_if_missing(true); opts.create_missing_column_families(true); opts.set_wal_bytes_per_sync(8 << 10); // 8KB opts.set_writable_file_max_buffer_size(512 << 10); // 512KB opts.set_enable_write_thread_adaptive_yield(true); opts.set_unordered_write(true); opts.set_max_subcompactions(2); opts.set_max_background_jobs(4); opts.set_use_adaptive_mutex(true); opts.set_db_log_dir(&log_path); opts.set_memtable_whole_key_filtering(true); opts.set_dump_malloc_stats(true); opts.set_level_compaction_dynamic_level_bytes(false); // trigger all sst files in L1/2 instead of L0 opts.set_max_bytes_for_level_base(64 << 10); // 64KB { // set block based table and cache let cache = Cache::new_lru_cache(512 << 10); assert_eq!(cache.get_usage(), 0); let mut block_based_opts = BlockBasedOptions::default(); block_based_opts.set_block_cache(&cache); block_based_opts.set_cache_index_and_filter_blocks(true); opts.set_block_based_table_factory(&block_based_opts); // open db let db = DB::open(&opts, &path).unwrap(); // write a lot for i in 0..10_000 { batch.put(format!("{i:0>4}").as_bytes(), b"v"); } assert!(db.write(batch).is_ok()); // flush memory table to sst and manual compaction assert!(db.flush().is_ok()); db.compact_range(Some(format!("{:0>4}", 0).as_bytes()), None::>); // get -> trigger caching let _ = db.get(b"1"); assert!(cache.get_usage() > 0); // get approximated memory usage let mem_usage = get_memory_usage_stats(Some(&[&db]), None).unwrap(); assert!(mem_usage.mem_table_total > 0); // get approximated cache usage let mem_usage = get_memory_usage_stats(None, Some(&[&cache])).unwrap(); assert!(mem_usage.cache_total > 0); } // bulk loading { // open db let db = DB::open(&opts, &path).unwrap(); // try to get key let iter = db.iterator(IteratorMode::Start); for (expected, (k, _)) in iter.map(Result::unwrap).enumerate() { assert_eq!(k.as_ref(), format!("{expected:0>4}").as_bytes()); } // check live files (sst files meta) let livefiles = db.live_files().unwrap(); assert_eq!(livefiles.len(), 1); livefiles.iter().for_each(|f| { assert_eq!(f.level, 2); assert_eq!(f.column_family_name, "default"); assert!(!f.name.is_empty()); assert_eq!( f.start_key.as_ref().unwrap().as_slice(), format!("{:0>4}", 0).as_bytes() ); assert_eq!( f.end_key.as_ref().unwrap().as_slice(), format!("{:0>4}", 9999).as_bytes() ); assert_eq!(f.num_entries, 10000); assert_eq!(f.num_deletions, 0); }); // delete sst file in range (except L0) assert!(db .delete_file_in_range( format!("{:0>4}", 0).as_bytes(), format!("{:0>4}", 9999).as_bytes() ) .is_ok()); let livefiles = db.live_files().unwrap(); assert_eq!(livefiles.len(), 0); // try to get a deleted key assert!(db.get(format!("{:0>4}", 123).as_bytes()).unwrap().is_none()); } // raise error when db exists { opts.set_error_if_exists(true); assert!(DB::open(&opts, &path).is_err()); } // disable all threads { // create new options let mut opts = Options::default(); opts.set_max_background_jobs(0); opts.set_stats_dump_period_sec(0); opts.set_stats_persist_period_sec(0); // test Env::Default()->SetBackgroundThreads(0, Env::Priority::BOTTOM); let mut env = Env::new().unwrap(); env.set_bottom_priority_background_threads(0); opts.set_env(&env); // open db let db = DB::open(&opts, &path).unwrap(); // try to get key let iter = db.iterator(IteratorMode::Start); for (expected, (k, _)) in iter.map(Result::unwrap).enumerate() { assert_eq!(k.as_ref(), format!("{expected:0>4}").as_bytes()); } } } } #[test] fn get_with_cache_and_bulkload_and_blobs_test() { let path = DBPath::new("_rust_rocksdb_get_with_cache_and_bulkload_and_blobs_test"); let log_path = DBPath::new("_rust_rocksdb_log_path_test"); // create options let mut opts = Options::default(); opts.create_if_missing(true); opts.create_missing_column_families(true); opts.set_wal_bytes_per_sync(8 << 10); // 8KB opts.set_writable_file_max_buffer_size(512 << 10); // 512KB opts.set_enable_write_thread_adaptive_yield(true); opts.set_unordered_write(true); opts.set_max_subcompactions(2); opts.set_max_background_jobs(4); opts.set_use_adaptive_mutex(true); opts.set_db_log_dir(&log_path); opts.set_memtable_whole_key_filtering(true); opts.set_dump_malloc_stats(true); opts.set_enable_blob_files(true); opts.set_min_blob_size(256); // set small to ensure it is actually used opts.set_level_compaction_dynamic_level_bytes(false); // trigger all sst files in L1/2 instead of L0 opts.set_max_bytes_for_level_base(64 << 10); // 64KB { // set block based table and cache let cache = Cache::new_lru_cache(512 << 10); assert_eq!(cache.get_usage(), 0); let mut block_based_opts = BlockBasedOptions::default(); block_based_opts.set_block_cache(&cache); block_based_opts.set_cache_index_and_filter_blocks(true); opts.set_block_based_table_factory(&block_based_opts); // open db let db = DB::open(&opts, &path).unwrap(); // write a lot let mut batch = WriteBatch::default(); for i in 0..10_000 { batch.put(format!("{i:0>4}").as_bytes(), b"v"); } assert!(db.write(batch).is_ok()); // flush memory table to sst and manual compaction assert!(db.flush().is_ok()); db.compact_range(Some(format!("{:0>4}", 0).as_bytes()), None::>); // get -> trigger caching let _ = db.get(b"1"); assert!(cache.get_usage() > 0); // get approximated memory usage let mem_usage = get_memory_usage_stats(Some(&[&db]), None).unwrap(); assert!(mem_usage.mem_table_total > 0); // get approximated cache usage let mem_usage = get_memory_usage_stats(None, Some(&[&cache])).unwrap(); assert!(mem_usage.cache_total > 0); } // bulk loading { // open db let db = DB::open(&opts, &path).unwrap(); // try to get key let iter = db.iterator(IteratorMode::Start); for (expected, (k, _)) in iter.map(Result::unwrap).enumerate() { assert_eq!(k.as_ref(), format!("{expected:0>4}").as_bytes()); } // check live files (sst files meta) let livefiles = db.live_files().unwrap(); assert_eq!(livefiles.len(), 1); livefiles.iter().for_each(|f| { assert_eq!(f.level, 2); assert_eq!(f.column_family_name, "default"); assert!(!f.name.is_empty()); assert_eq!( f.start_key.as_ref().unwrap().as_slice(), format!("{:0>4}", 0).as_bytes() ); assert_eq!( f.end_key.as_ref().unwrap().as_slice(), format!("{:0>4}", 9999).as_bytes() ); assert_eq!(f.num_entries, 10000); assert_eq!(f.num_deletions, 0); }); // delete sst file in range (except L0) assert!(db .delete_file_in_range( format!("{:0>4}", 0).as_bytes(), format!("{:0>4}", 9999).as_bytes() ) .is_ok()); let livefiles = db.live_files().unwrap(); assert_eq!(livefiles.len(), 0); // try to get a deleted key assert!(db.get(format!("{:0>4}", 123).as_bytes()).unwrap().is_none()); } // raise error when db exists { opts.set_error_if_exists(true); assert!(DB::open(&opts, &path).is_err()); } // disable all threads { // create new options let mut opts = Options::default(); opts.set_max_background_jobs(0); opts.set_stats_dump_period_sec(0); opts.set_stats_persist_period_sec(0); // test Env::Default()->SetBackgroundThreads(0, Env::Priority::BOTTOM); let mut env = Env::new().unwrap(); env.set_bottom_priority_background_threads(0); opts.set_env(&env); // open db let db = DB::open(&opts, &path).unwrap(); // try to get key let iter = db.iterator(IteratorMode::Start); for (expected, (k, _)) in iter.map(Result::unwrap).enumerate() { assert_eq!(k.as_ref(), format!("{expected:0>4}").as_bytes()); } } } #[test] fn test_open_for_read_only() { let path = DBPath::new("_rust_rocksdb_test_open_for_read_only"); { let db = DB::open_default(&path).unwrap(); db.put(b"k1", b"v1").unwrap(); } { let opts = Options::default(); let error_if_log_file_exist = false; let db = DB::open_for_read_only(&opts, &path, error_if_log_file_exist).unwrap(); assert_eq!(db.get(b"k1").unwrap().unwrap(), b"v1"); assert!(db.put(b"k2", b"v2").is_err()); } } #[test] fn test_open_cf_for_read_only() { let path = DBPath::new("_rust_rocksdb_test_open_cf_for_read_only"); let cfs = vec!["cf1"]; { let mut opts = Options::default(); opts.create_if_missing(true); opts.create_missing_column_families(true); let db = DB::open_cf(&opts, &path, cfs.clone()).unwrap(); let cf1 = db.cf_handle("cf1").unwrap(); db.put_cf(&cf1, b"k1", b"v1").unwrap(); } { let opts = Options::default(); let error_if_log_file_exist = false; let db = DB::open_cf_for_read_only(&opts, &path, cfs, error_if_log_file_exist).unwrap(); let cf1 = db.cf_handle("cf1").unwrap(); assert_eq!(db.get_cf(&cf1, b"k1").unwrap().unwrap(), b"v1"); assert!(db.put_cf(&cf1, b"k2", b"v2").is_err()); } } #[test] fn test_open_cf_descriptors_for_read_only() { let path = DBPath::new("_rust_rocksdb_test_open_cf_descriptors_for_read_only"); let cfs = vec!["cf1"]; { let mut opts = Options::default(); opts.create_if_missing(true); opts.create_missing_column_families(true); let db = DB::open_cf(&opts, &path, &cfs).unwrap(); let cf1 = db.cf_handle("cf1").unwrap(); db.put_cf(&cf1, b"k1", b"v1").unwrap(); } { let opts = Options::default(); let error_if_log_file_exist = false; let cfs = cfs .into_iter() .map(|name| ColumnFamilyDescriptor::new(name, Options::default())); let db = DB::open_cf_descriptors_read_only(&opts, &path, cfs, error_if_log_file_exist).unwrap(); let cf1 = db.cf_handle("cf1").unwrap(); assert_eq!(db.get_cf(&cf1, b"k1").unwrap().unwrap(), b"v1"); assert!(db.put_cf(&cf1, b"k2", b"v2").is_err()); } } #[test] fn delete_range_test() { let path = DBPath::new("_rust_rocksdb_delete_range_test"); { let mut opts = Options::default(); opts.create_if_missing(true); opts.create_missing_column_families(true); let cfs = vec!["cf1"]; let db = DB::open_cf(&opts, &path, cfs).unwrap(); let cf1 = db.cf_handle("cf1").unwrap(); db.put_cf(&cf1, b"k1", b"v1").unwrap(); db.put_cf(&cf1, b"k2", b"v2").unwrap(); db.put_cf(&cf1, b"k3", b"v3").unwrap(); db.put_cf(&cf1, b"k4", b"v4").unwrap(); db.put_cf(&cf1, b"k5", b"v5").unwrap(); db.delete_range_cf(&cf1, b"k2", b"k4").unwrap(); assert_eq!(db.get_cf(&cf1, b"k1").unwrap().unwrap(), b"v1"); assert_eq!(db.get_cf(&cf1, b"k4").unwrap().unwrap(), b"v4"); assert_eq!(db.get_cf(&cf1, b"k5").unwrap().unwrap(), b"v5"); assert!(db.get_cf(&cf1, b"k2").unwrap().is_none()); assert!(db.get_cf(&cf1, b"k3").unwrap().is_none()); } } #[test] fn multi_get() { let path = DBPath::new("_rust_rocksdb_multi_get"); { let db = DB::open_default(&path).unwrap(); let initial_snap = db.snapshot(); db.put(b"k1", b"v1").unwrap(); let k1_snap = db.snapshot(); db.put(b"k2", b"v2").unwrap(); let _ = db.multi_get([b"k0"; 40]); let assert_values = |values: Vec<_>| { assert_eq!(3, values.len()); assert_eq!(values[0], None); assert_eq!(values[1], Some(b"v1".to_vec())); assert_eq!(values[2], Some(b"v2".to_vec())); }; let values = db .multi_get([b"k0", b"k1", b"k2"]) .into_iter() .map(Result::unwrap) .collect::>(); assert_values(values); let values = DBAccess::multi_get_opt(&db, [b"k0", b"k1", b"k2"], &Default::default()) .into_iter() .map(Result::unwrap) .collect::>(); assert_values(values); let values = db .snapshot() .multi_get([b"k0", b"k1", b"k2"]) .into_iter() .map(Result::unwrap) .collect::>(); assert_values(values); let none_values = initial_snap .multi_get([b"k0", b"k1", b"k2"]) .into_iter() .map(Result::unwrap) .collect::>(); assert_eq!(none_values, vec![None; 3]); let k1_only = k1_snap .multi_get([b"k0", b"k1", b"k2"]) .into_iter() .map(Result::unwrap) .collect::>(); assert_eq!(k1_only, vec![None, Some(b"v1".to_vec()), None]); } } #[test] fn multi_get_cf() { let path = DBPath::new("_rust_rocksdb_multi_get_cf"); { let mut opts = Options::default(); opts.create_if_missing(true); opts.create_missing_column_families(true); let db = DB::open_cf(&opts, &path, ["cf0", "cf1", "cf2"]).unwrap(); let cf0 = db.cf_handle("cf0").unwrap(); let cf1 = db.cf_handle("cf1").unwrap(); db.put_cf(&cf1, b"k1", b"v1").unwrap(); let cf2 = db.cf_handle("cf2").unwrap(); db.put_cf(&cf2, b"k2", b"v2").unwrap(); let values = db .multi_get_cf(vec![(&cf0, b"k0"), (&cf1, b"k1"), (&cf2, b"k2")]) .into_iter() .map(Result::unwrap) .collect::>(); assert_eq!(3, values.len()); assert_eq!(values[0], None); assert_eq!(values[1], Some(b"v1".to_vec())); assert_eq!(values[2], Some(b"v2".to_vec())); } } #[test] fn batched_multi_get_cf() { let path = DBPath::new("_rust_rocksdb_batched_multi_get_cf"); { let mut opts = Options::default(); opts.create_if_missing(true); opts.create_missing_column_families(true); let db = DB::open_cf(&opts, &path, ["cf0"]).unwrap(); let cf = db.cf_handle("cf0").unwrap(); db.put_cf(&cf, b"k1", b"v1").unwrap(); db.put_cf(&cf, b"k2", b"v2").unwrap(); let values = db .batched_multi_get_cf(&cf, vec![b"k0", b"k1", b"k2"], true) // sorted_input .into_iter() .map(Result::unwrap) .collect::>(); assert_eq!(3, values.len()); assert!(values[0].is_none()); assert!(values[1].is_some()); assert_eq!(&(values[1].as_ref().unwrap())[0..2], b"v1"); assert_eq!(&(values[2].as_ref().unwrap())[0..2], b"v2"); } } #[test] fn key_may_exist() { let path = DBPath::new("_rust_key_may_exist"); { let db = DB::open_default(&path).unwrap(); assert!(!db.key_may_exist("nonexistent")); assert!(!db.key_may_exist_opt("nonexistent", &ReadOptions::default())); } } #[test] fn key_may_exist_cf() { let path = DBPath::new("_rust_key_may_exist_cf"); { let mut opts = Options::default(); opts.create_if_missing(true); opts.create_missing_column_families(true); let db = DB::open_cf(&opts, &path, ["cf"]).unwrap(); let cf = db.cf_handle("cf").unwrap(); assert!(!db.key_may_exist_cf(&cf, "nonexistent")); assert!(!db.key_may_exist_cf_opt(&cf, "nonexistent", &ReadOptions::default())); } } #[test] fn key_may_exist_cf_value() { let path = DBPath::new("_rust_key_may_exist_cf_value"); { let mut opts = Options::default(); opts.create_if_missing(true); opts.create_missing_column_families(true); let db = DB::open_cf(&opts, &path, ["cf"]).unwrap(); let cf = db.cf_handle("cf").unwrap(); // put some entry into db for i in 0..10000i32 { let _ = db.put_cf(&cf, i.to_le_bytes(), i.to_le_bytes()); } // call `key_may_exist_cf_opt_value` for i in 0..10000i32 { let (may_exist, value) = db.key_may_exist_cf_opt_value(&cf, i.to_le_bytes(), &ReadOptions::default()); // all these numbers may exist assert!(may_exist); // check value correctness if let Some(value) = value { assert_eq!(i32::from_le_bytes(value.as_ref().try_into().unwrap()), i); } } } } #[test] fn test_snapshot_outlive_db() { let t = trybuild::TestCases::new(); t.compile_fail("tests/fail/snapshot_outlive_db.rs"); } #[test] fn cuckoo() { let path = DBPath::new("_rust_rocksdb_cuckoo"); { let mut opts = Options::default(); let mut factory_opts = CuckooTableOptions::default(); factory_opts.set_hash_ratio(0.8); factory_opts.set_max_search_depth(20); factory_opts.set_cuckoo_block_size(10); factory_opts.set_identity_as_first_hash(true); factory_opts.set_use_module_hash(false); opts.set_cuckoo_table_factory(&factory_opts); opts.create_if_missing(true); let db = DB::open(&opts, &path).unwrap(); db.put(b"k1", b"v1").unwrap(); db.put(b"k2", b"v2").unwrap(); let r: Result>, Error> = db.get(b"k1"); assert_eq!(r.unwrap().unwrap(), b"v1"); let r: Result>, Error> = db.get(b"k2"); assert_eq!(r.unwrap().unwrap(), b"v2"); assert!(db.delete(b"k1").is_ok()); assert!(db.get(b"k1").unwrap().is_none()); } } #[derive(Default)] struct EvilAsRef { toggle: AtomicUsize, } impl AsRef<[u8]> for EvilAsRef { fn as_ref(&self) -> &[u8] { if self.toggle.fetch_xor(1, Ordering::Relaxed) == 0 { b"" } else { b"lorem ipsum dolor sit amet" } } } #[test] fn evil_as_ref() { let path = DBPath::new("_rust_rocksdb_evil_as_ref"); let db = DB::open_default(&path).unwrap(); let evil = EvilAsRef { toggle: AtomicUsize::new(0), }; let result = &db.multi_get([evil])[0]; assert!(result.as_ref().unwrap().is_none()); } #[test] fn test_atomic_flush_cfs() { let n = DBPath::new("_rust_rocksdb_atomic_flush_cfs"); { let mut opts = Options::default(); opts.create_if_missing(true); opts.create_missing_column_families(true); opts.set_atomic_flush(true); let db = DB::open_cf(&opts, &n, ["cf1", "cf2"]).unwrap(); let cf1 = db.cf_handle("cf1").unwrap(); let cf2 = db.cf_handle("cf2").unwrap(); let mut write_options = rocksdb::WriteOptions::new(); write_options.disable_wal(true); db.put_cf_opt(&cf1, "k11", "v11", &write_options).unwrap(); db.put_cf_opt(&cf2, "k21", "v21", &write_options).unwrap(); let mut opts = rocksdb::FlushOptions::new(); opts.set_wait(true); db.flush_cfs_opt(&[&cf1, &cf2], &opts).unwrap(); } { let mut opts = Options::default(); opts.create_if_missing(true); opts.create_missing_column_families(true); opts.set_atomic_flush(true); let db = DB::open_cf(&opts, &n, ["cf1", "cf2"]).unwrap(); let cf1 = db.cf_handle("cf1").unwrap(); let cf2 = db.cf_handle("cf2").unwrap(); assert_eq!( db.get_cf(&cf1, "k11").unwrap(), Some("v11".as_bytes().to_vec()) ); assert_eq!( db.get_cf(&cf2, "k21").unwrap(), Some("v21".as_bytes().to_vec()) ); } } #[test] fn test_full_history_ts_low() { let path = DBPath::new("_rust_full_history_ts_low"); let _ = DB::destroy(&Options::default(), &path); { let mut opts = Options::default(); opts.create_if_missing(true); opts.create_missing_column_families(true); let mut cf_opts = Options::default(); cf_opts.set_comparator_with_ts( U64Comparator::NAME, U64Timestamp::SIZE, Box::new(U64Comparator::compare), Box::new(U64Comparator::compare_ts), Box::new(U64Comparator::compare_without_ts), ); let cfs = vec![("cf", cf_opts)]; let db = DB::open_cf_with_opts(&opts, &path, cfs).unwrap(); let cf = db.cf_handle("cf").unwrap(); let ts = U64Timestamp::new(1); db.increase_full_history_ts_low(&cf, ts).unwrap(); let ret = U64Timestamp::from(db.get_full_history_ts_low(&cf).unwrap().as_slice()); assert_eq!(ts, ret); let _ = DB::destroy(&Options::default(), &path); } } rocksdb-0.23.0/tests/test_iterator.rs000064400000000000000000000316521046102023000157440ustar 00000000000000// Copyright 2020 Tyler Neely // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. mod util; use pretty_assertions::assert_eq; use rocksdb::{Direction, IteratorMode, MemtableFactory, Options, DB}; use util::{assert_iter, assert_iter_reversed, pair, DBPath}; #[test] #[allow(clippy::cognitive_complexity)] fn test_iterator() { let n = DBPath::new("_rust_rocksdb_iterator_test"); { const K1: &[u8] = b"k1"; const K2: &[u8] = b"k2"; const K3: &[u8] = b"k3"; const K4: &[u8] = b"k4"; const V1: &[u8] = b"v1111"; const V2: &[u8] = b"v2222"; const V3: &[u8] = b"v3333"; const V4: &[u8] = b"v4444"; let db = DB::open_default(&n).unwrap(); assert!(db.put(K1, V1).is_ok()); assert!(db.put(K2, V2).is_ok()); assert!(db.put(K3, V3).is_ok()); let expected = [pair(K1, V1), pair(K2, V2), pair(K3, V3)]; assert_iter(db.iterator(IteratorMode::Start), &expected); // Test that it's idempotent assert_iter(db.iterator(IteratorMode::Start), &expected); assert_iter(db.iterator(IteratorMode::Start), &expected); assert_iter(db.iterator(IteratorMode::Start), &expected); // Test it in reverse a few times assert_iter_reversed(db.iterator(IteratorMode::End), &expected); assert_iter_reversed(db.iterator(IteratorMode::End), &expected); assert_iter_reversed(db.iterator(IteratorMode::End), &expected); assert_iter_reversed(db.iterator(IteratorMode::End), &expected); // Try it forward again assert_iter(db.iterator(IteratorMode::Start), &expected); assert_iter(db.iterator(IteratorMode::Start), &expected); { let old_iterator = db.iterator(IteratorMode::Start); assert!(db.put(K4, V4).is_ok()); assert_iter(old_iterator, &expected); } let expected2 = [pair(K1, V1), pair(K2, V2), pair(K3, V3), pair(K4, V4)]; assert_iter(db.iterator(IteratorMode::Start), &expected2); assert_iter( db.iterator(IteratorMode::From(b"k2", Direction::Forward)), &expected2[1..], ); assert_iter_reversed( db.iterator(IteratorMode::From(b"k2", Direction::Reverse)), &expected[..2], ); assert_iter_reversed( db.iterator(IteratorMode::From(b"zz", Direction::Reverse)), &expected2, ); { let test = |valid, key, dir| { let mut it = db.iterator(IteratorMode::From(key, dir)); let value = it.next(); if valid { let expect = format!("{value:?}"); assert!(matches!(value, Some(Ok(_))), "{:?}", &expect); } else { assert_eq!(None, value); assert_eq!(None, it.next()); // Iterator is fused } }; test(true, b"k0", Direction::Forward); test(true, b"k1", Direction::Forward); test(true, b"k11", Direction::Forward); test(false, b"k5", Direction::Forward); test(false, b"k0", Direction::Reverse); test(true, b"k1", Direction::Reverse); test(true, b"k11", Direction::Reverse); test(true, b"k5", Direction::Reverse); } { let mut iterator1 = db.iterator(IteratorMode::From(b"k4", Direction::Forward)); iterator1.next().unwrap().unwrap(); assert_eq!(None, iterator1.next()); assert_eq!(None, iterator1.next()); } { // Check that set_mode resets the iterator let mode = IteratorMode::From(K3, Direction::Forward); let mut iterator = db.iterator(mode); assert_iter(&mut iterator, &expected2[2..]); iterator.set_mode(mode); assert_iter(&mut iterator, &expected2[2..]); } } } #[test] fn test_prefix_iterator() { let n = DBPath::new("_rust_rocksdb_prefix_iterator_test"); { const A1: &[u8] = b"aaa1"; const A2: &[u8] = b"aaa2"; const B1: &[u8] = b"bbb1"; const B2: &[u8] = b"bbb2"; let prefix_extractor = rocksdb::SliceTransform::create_fixed_prefix(3); let mut opts = Options::default(); opts.create_if_missing(true); opts.set_prefix_extractor(prefix_extractor); let db = DB::open(&opts, &n).unwrap(); assert!(db.put(A1, A1).is_ok()); assert!(db.put(A2, A2).is_ok()); assert!(db.put(B1, B1).is_ok()); assert!(db.put(B2, B2).is_ok()); assert_iter(db.prefix_iterator(b"aaa"), &[pair(A1, A1), pair(A2, A2)]); assert_iter(db.prefix_iterator(b"bbb"), &[pair(B1, B1), pair(B2, B2)]); assert_iter(db.prefix_iterator(A2), &[pair(A2, A2)]); } } #[test] fn test_prefix_iterator_uses_full_prefix() { // Test scenario derived from GitHub issue #221 // Explanation: `db.prefix_iterator` sets the underlying // options to seek to the first key that matches the *entire* // `prefix`. From there, the iterator will continue to read pairs // as long as the prefix extracted from `key` matches the // prefix extracted from `prefix`. let path = DBPath::new("_rust_rocksdb_prefix_iterator_uses_full_prefix_test"); { let data = [ ([0, 0, 0, 0], b"111"), ([0, 0, 0, 1], b"222"), ([0, 1, 0, 1], b"333"), ([0, 1, 1, 1], b"444"), ([0, 1, 2, 1], b"555"), ([0, 2, 0, 0], b"666"), ([2, 0, 0, 0], b"777"), ([2, 2, 2, 2], b"888"), ]; let prefix_extractor = rocksdb::SliceTransform::create_fixed_prefix(1); let mut opts = Options::default(); opts.create_if_missing(true); opts.set_prefix_extractor(prefix_extractor); let db = DB::open(&opts, &path).unwrap(); for (key, value) in &data { assert!(db.put(key, *value).is_ok()); } assert_iter( db.prefix_iterator([0, 1, 1]), &[ pair(&[0, 1, 1, 1], b"444"), pair(&[0, 1, 2, 1], b"555"), pair(&[0, 2, 0, 0], b"666"), ], ); } } #[test] fn test_full_iterator() { let path = DBPath::new("full_iterator_test"); { const A1: &[u8] = b"aaa1"; const A2: &[u8] = b"aaa2"; const B1: &[u8] = b"bbb1"; const B2: &[u8] = b"bbb2"; let prefix_extractor = rocksdb::SliceTransform::create_fixed_prefix(3); let factory = MemtableFactory::HashSkipList { bucket_count: 1_000_000, height: 4, branching_factor: 4, }; let mut opts = Options::default(); opts.create_if_missing(true); opts.set_prefix_extractor(prefix_extractor); opts.set_allow_concurrent_memtable_write(false); opts.set_memtable_factory(factory); let db = DB::open(&opts, &path).unwrap(); assert!(db.put(A1, A1).is_ok()); assert!(db.put(A2, A2).is_ok()); assert!(db.put(B1, B1).is_ok()); assert!(db.put(B2, B2).is_ok()); // A normal iterator won't work here since we're using a HashSkipList for our memory table // implementation (which buckets keys based on their prefix): let bad_iterator = db.iterator(IteratorMode::Start); assert_eq!(bad_iterator.collect::>(), vec![]); assert_iter( db.full_iterator(IteratorMode::Start), &[pair(A1, A1), pair(A2, A2), pair(B1, B1), pair(B2, B2)], ); } } fn custom_iter(db: &'_ DB) -> impl Iterator + '_ { db.iterator(IteratorMode::Start) .map(Result::unwrap) .map(|(_, db_value)| db_value.len()) } #[test] fn test_custom_iterator() { let path = DBPath::new("_rust_rocksdb_custom_iterator_test"); { let mut opts = Options::default(); opts.create_if_missing(true); let db = DB::open(&opts, &path).unwrap(); let _data = custom_iter(&db).collect::>(); } } #[test] fn test_iterator_outlive_db() { let t = trybuild::TestCases::new(); t.compile_fail("tests/fail/iterator_outlive_db.rs"); } #[test] fn test_iter_range() { #[rustfmt::skip] const ALL_KEYS: [&[u8]; 12] = [ /* 0 */ b"a0", /* 1 */ b"a1", /* 2 */ b"a11", /* 3 */ b"a2", /* 4 */ b"a\xff0", /* 5 */ b"a\xff1", /* 6 */ b"b0", /* 7 */ b"b1", /* 8 */ b"\xff", /* 9 */ b"\xff0", /* 10 */ b"\xff1", /* 11 */ b"\xff2", ]; let path = DBPath::new("_rust_rocksdb_iter_range_test"); let db = DB::open_default(&path).unwrap(); for key in ALL_KEYS.iter() { assert!(db.put(key, key).is_ok()); } fn test( db: &DB, mode: IteratorMode, range: impl rocksdb::IterateBounds, want: std::ops::Range, reverse: bool, ) { let mut ro = rocksdb::ReadOptions::default(); // Set bounds to test that set_iterate_range clears old bounds. ro.set_iterate_lower_bound(vec![b'z']); ro.set_iterate_upper_bound(vec![b'z']); ro.set_iterate_range(range); let got = db .iterator_opt(mode, ro) .map(Result::unwrap) .map(|(key, _value)| key) .collect::>(); let mut got = got.iter().map(Box::as_ref).collect::>(); if reverse { got.reverse(); } assert_eq!(&ALL_KEYS[want], got); } fn prefix(key: &[u8]) -> rocksdb::PrefixRange<&[u8]> { rocksdb::PrefixRange(key) } // Test Start and End modes { fn check(db: &DB, range: R, want: std::ops::Range) where R: rocksdb::IterateBounds + Clone, { test(db, IteratorMode::Start, range.clone(), want.clone(), false); test(db, IteratorMode::End, range, want, true); } check(&db, .., 0..12); check(&db, "b1".as_bytes().., 7..12); check(&db, .."b1".as_bytes(), 0..7); check(&db, "a1".as_bytes().."b1".as_bytes(), 1..7); check(&db, prefix(b""), 0..12); check(&db, prefix(b"a"), 0..6); check(&db, prefix(b"a1"), 1..3); check(&db, prefix(b"a\xff"), 4..6); check(&db, prefix(b"\xff"), 8..12); } // Test From mode with Forward direction { fn check(db: &DB, from: &[u8], range: R, want: std::ops::Range) where R: rocksdb::IterateBounds + Clone, { let mode = IteratorMode::From(from, Direction::Forward); test(db, mode, range, want, false); } check(&db, b"b0", .., 6..12); check(&db, b"b0", "a2".as_bytes().., 6..12); check(&db, b"b0", .."a1".as_bytes(), 0..0); check(&db, b"b0", .."b0".as_bytes(), 0..0); check(&db, b"b0", .."b1".as_bytes(), 6..7); check(&db, b"b0", "a1".as_bytes().."b0".as_bytes(), 0..0); check(&db, b"b0", "a1".as_bytes().."b1".as_bytes(), 6..7); check(&db, b"b0", prefix(b""), 6..12); check(&db, b"a1", prefix(b"a"), 1..6); check(&db, b"b0", prefix(b"a"), 0..0); check(&db, b"a1", prefix(b"a1"), 1..3); check(&db, b"b0", prefix(b"a1"), 0..0); check(&db, b"a1", prefix(b"a\xff"), 4..6); check(&db, b"b0", prefix(b"a\xff"), 0..0); check(&db, b"b0", prefix(b"\xff"), 8..12); } // Test From mode with Reverse direction { fn check(db: &DB, from: &[u8], range: R, want: std::ops::Range) where R: rocksdb::IterateBounds + Clone, { let mode = IteratorMode::From(from, Direction::Reverse); test(db, mode, range, want, true); } check(&db, b"b0", .., 0..7); check(&db, b"b0", "a2".as_bytes().., 3..7); check(&db, b"b0", .."a1".as_bytes(), 0..1); check(&db, b"b0", .."b0".as_bytes(), 0..6); check(&db, b"b0", .."b1".as_bytes(), 0..7); check(&db, b"b0", "a1".as_bytes().."b0".as_bytes(), 1..6); check(&db, b"b0", "a1".as_bytes().."b1".as_bytes(), 1..7); check(&db, b"b0", prefix(b""), 0..7); check(&db, b"a1", prefix(b"a"), 0..2); check(&db, b"b0", prefix(b"a"), 0..6); check(&db, b"a1", prefix(b"a1"), 1..2); check(&db, b"b0", prefix(b"a1"), 1..3); check(&db, b"a1", prefix(b"a\xff"), 0..0); check(&db, b"b0", prefix(b"a\xff"), 4..6); check(&db, b"b0", prefix(b"\xff"), 0..0); } } rocksdb-0.23.0/tests/test_merge_operator.rs000064400000000000000000000242051046102023000171210ustar 00000000000000// Copyright 2020 Tyler Neely // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. mod util; use pretty_assertions::assert_eq; use rocksdb::{merge_operator::MergeFn, DBCompactionStyle, MergeOperands, Options, DB}; use serde::{Deserialize, Serialize}; use util::DBPath; fn test_provided_merge( _new_key: &[u8], existing_val: Option<&[u8]>, operands: &MergeOperands, ) -> Option> { let nops = operands.len(); let mut result: Vec = Vec::with_capacity(nops); if let Some(v) = existing_val { for e in v { result.push(*e); } } for op in operands { for e in op { result.push(*e); } } Some(result) } #[test] fn merge_test() { use crate::{Options, DB}; let db_path = DBPath::new("_rust_rocksdb_merge_test"); let mut opts = Options::default(); opts.create_if_missing(true); opts.set_merge_operator_associative("test operator", test_provided_merge); let db = DB::open(&opts, &db_path).unwrap(); let p = db.put(b"k1", b"a"); assert!(p.is_ok()); let _ = db.merge(b"k1", b"b"); let _ = db.merge(b"k1", b"c"); let _ = db.merge(b"k1", b"d"); let _ = db.merge(b"k1", b"efg"); let m = db.merge(b"k1", b"h"); assert!(m.is_ok()); match db.get(b"k1") { Ok(Some(value)) => { if let Ok(v) = std::str::from_utf8(&value) { println!("retrieved utf8 value: {v}") } else { println!("did not read valid utf-8 out of the db") } } Err(_) => println!("error reading value"), _ => panic!("value not present"), } assert!(m.is_ok()); let r = db.get(b"k1"); assert_eq!(r.unwrap().unwrap(), b"abcdefgh"); assert!(db.delete(b"k1").is_ok()); assert!(db.get(b"k1").unwrap().is_none()); } #[derive(Serialize, Deserialize, Copy, Clone, Debug, Default)] struct ValueCounts { num_a: u32, num_b: u32, num_c: u32, num_d: u32, } impl ValueCounts { fn from_slice(slice: &[u8]) -> Option { bincode::deserialize::(slice).ok() } fn as_bytes(&self) -> Option> { bincode::serialize(self).ok() } } fn test_counting_partial_merge( _new_key: &[u8], _existing_val: Option<&[u8]>, operands: &MergeOperands, ) -> Option> { let nops = operands.len(); let mut result: Vec = Vec::with_capacity(nops); for op in operands { for e in op { result.push(*e); } } Some(result) } fn test_counting_full_merge( _new_key: &[u8], existing_val: Option<&[u8]>, operands: &MergeOperands, ) -> Option> { let mut counts = existing_val .and_then(ValueCounts::from_slice) .unwrap_or_default(); for op in operands { for e in op { match *e { b'a' => counts.num_a += 1, b'b' => counts.num_b += 1, b'c' => counts.num_c += 1, b'd' => counts.num_d += 1, _ => {} } } } counts.as_bytes() } #[test] fn counting_merge_test() { use std::{sync::Arc, thread}; let db_path = DBPath::new("_rust_rocksdb_partial_merge_test"); let mut opts = Options::default(); opts.create_if_missing(true); opts.set_compaction_style(DBCompactionStyle::Universal); opts.set_min_write_buffer_number_to_merge(10); opts.set_merge_operator( "sort operator", test_counting_full_merge, test_counting_partial_merge, ); let db = Arc::new(DB::open(&opts, &db_path).unwrap()); let _ = db.delete(b"k1"); let _ = db.delete(b"k2"); let _ = db.merge(b"k1", b"a"); let _ = db.merge(b"k1", b"b"); let _ = db.merge(b"k1", b"d"); let _ = db.merge(b"k1", b"a"); let _ = db.merge(b"k1", b"a"); let _ = db.merge(b"k1", b"efg"); for i in 0..500 { let _ = db.merge(b"k2", b"c"); if i % 20 == 0 { let _ = db.get(b"k2"); } } for i in 0..500 { let _ = db.merge(b"k2", b"c"); if i % 20 == 0 { let _ = db.get(b"k2"); } } db.compact_range(None::<&[u8]>, None::<&[u8]>); let d1 = db.clone(); let d2 = db.clone(); let d3 = db.clone(); let h1 = thread::spawn(move || { for i in 0..500 { let _ = d1.merge(b"k2", b"c"); if i % 20 == 0 { let _ = d1.get(b"k2"); } } for i in 0..500 { let _ = d1.merge(b"k2", b"a"); if i % 20 == 0 { let _ = d1.get(b"k2"); } } }); let h2 = thread::spawn(move || { for i in 0..500 { let _ = d2.merge(b"k2", b"b"); if i % 20 == 0 { let _ = d2.get(b"k2"); } } for i in 0..500 { let _ = d2.merge(b"k2", b"d"); if i % 20 == 0 { let _ = d2.get(b"k2"); } } d2.compact_range(None::<&[u8]>, None::<&[u8]>); }); h2.join().unwrap(); let h3 = thread::spawn(move || { for i in 0..500 { let _ = d3.merge(b"k2", b"a"); if i % 20 == 0 { let _ = d3.get(b"k2"); } } for i in 0..500 { let _ = d3.merge(b"k2", b"c"); if i % 20 == 0 { let _ = d3.get(b"k2"); } } }); let m = db.merge(b"k1", b"b"); assert!(m.is_ok()); h3.join().unwrap(); h1.join().unwrap(); let value_getter = |key| match db.get(key) { Ok(Some(value)) => ValueCounts::from_slice(&value) .map_or_else(|| panic!("unable to create ValueCounts from bytes"), |v| v), Ok(None) => panic!("value not present"), Err(e) => panic!("error reading value {:?}", e), }; let counts = value_getter(b"k2"); assert_eq!(counts.num_a, 1000); assert_eq!(counts.num_b, 500); assert_eq!(counts.num_c, 2000); assert_eq!(counts.num_d, 500); let counts = value_getter(b"k1"); assert_eq!(counts.num_a, 3); assert_eq!(counts.num_b, 2); assert_eq!(counts.num_c, 0); assert_eq!(counts.num_d, 1); } #[test] fn failed_merge_test() { fn test_failing_merge( _key: &[u8], _val: Option<&[u8]>, _operands: &MergeOperands, ) -> Option> { None } use crate::{Options, DB}; let db_path = DBPath::new("_rust_rocksdb_failed_merge_test"); let mut opts = Options::default(); opts.create_if_missing(true); opts.set_merge_operator_associative("test operator", test_failing_merge); let db = DB::open(&opts, &db_path).expect("open with a merge operator"); db.put(b"key", b"value").expect("put_ok"); let res = db.merge(b"key", b"new value"); match res.and_then(|_e| db.get(b"key")) { Ok(val) => panic!("expected merge failure to propagate, got: {:?}", val), Err(e) => { let msg = e.into_string(); assert!( msg.contains("Merge operator failed"), "unexpected merge error message: {}", msg ); } } } fn make_merge_max_with_limit(limit: u64) -> impl MergeFn + Clone { move |_key: &[u8], first: Option<&[u8]>, rest: &MergeOperands| { let max = first .into_iter() .chain(rest) .map(|slice| { let mut bytes: [u8; 8] = Default::default(); bytes.clone_from_slice(slice); u64::from_ne_bytes(bytes) }) .fold(0, u64::max); let new_value = max.min(limit); Some(Vec::from(new_value.to_ne_bytes().as_ref())) } } #[test] fn test_merge_state() { use {Options, DB}; let tempdir = tempfile::Builder::new() .prefix("_rust_rocksdb_merge_test_state") .tempdir() .expect("Failed to create temporary path for the _rust_rocksdb_merge_test_state."); let path = tempdir.path(); let mut opts = Options::default(); opts.create_if_missing(true); opts.set_merge_operator_associative("max-limit-12", make_merge_max_with_limit(12)); { let db = DB::open(&opts, path).unwrap(); let p = db.put(b"k1", 1u64.to_ne_bytes()); assert!(p.is_ok()); let _ = db.merge(b"k1", 7u64.to_ne_bytes()); let m = db.merge(b"k1", 64u64.to_ne_bytes()); assert!(m.is_ok()); match db.get(b"k1") { Ok(Some(value)) => { let mut bytes: [u8; 8] = Default::default(); bytes.copy_from_slice(&value); assert_eq!(u64::from_ne_bytes(bytes), 12); } Err(_) => println!("error reading value"), _ => panic!("value not present"), } assert!(db.delete(b"k1").is_ok()); assert!(db.get(b"k1").unwrap().is_none()); } assert!(DB::destroy(&opts, path).is_ok()); opts.set_merge_operator_associative("max-limit-128", make_merge_max_with_limit(128)); { let db = DB::open(&opts, path).unwrap(); let p = db.put(b"k1", 1u64.to_ne_bytes()); assert!(p.is_ok()); let _ = db.merge(b"k1", 7u64.to_ne_bytes()); let m = db.merge(b"k1", 64u64.to_ne_bytes()); assert!(m.is_ok()); match db.get(b"k1") { Ok(Some(value)) => { let mut bytes: [u8; 8] = Default::default(); bytes.copy_from_slice(&value); assert_eq!(u64::from_ne_bytes(bytes), 64); } Err(_) => println!("error reading value"), _ => panic!("value not present"), } assert!(db.delete(b"k1").is_ok()); assert!(db.get(b"k1").unwrap().is_none()); } assert!(DB::destroy(&opts, path).is_ok()); } rocksdb-0.23.0/tests/test_multithreaded.rs000064400000000000000000000032201046102023000167340ustar 00000000000000// Copyright 2020 Tyler Neely // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. mod util; use std::{sync::Arc, thread}; use rocksdb::DB; use util::DBPath; const N: usize = 100_000; #[test] pub fn test_multithreaded() { let n = DBPath::new("_rust_rocksdb_multithreadtest"); { let db = DB::open_default(&n).unwrap(); let db = Arc::new(db); db.put(b"key", b"value1").unwrap(); let db1 = db.clone(); let j1 = thread::spawn(move || { for _ in 1..N { db1.put(b"key", b"value1").unwrap(); } }); let db2 = db.clone(); let j2 = thread::spawn(move || { for _ in 1..N { db2.put(b"key", b"value2").unwrap(); } }); let j3 = thread::spawn(move || { for _ in 1..N { let result = match db.get(b"key") { Ok(Some(v)) => !(&v[..] != b"value1" && &v[..] != b"value2"), _ => false, }; assert!(result); } }); j1.join().unwrap(); j2.join().unwrap(); j3.join().unwrap(); } } rocksdb-0.23.0/tests/test_optimistic_transaction_db.rs000064400000000000000000000522451046102023000213520ustar 00000000000000// Copyright 2021 Yiyuan Liu // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. // mod util; use rocksdb::{ CuckooTableOptions, DBAccess, Direction, Error, ErrorKind, IteratorMode, OptimisticTransactionDB, OptimisticTransactionOptions, Options, ReadOptions, SingleThreaded, SliceTransform, SnapshotWithThreadMode, WriteBatchWithTransaction, WriteOptions, DB, }; use util::DBPath; #[test] fn open_default() { let path = DBPath::new("_rust_rocksdb_optimistic_transaction_db_open_default"); { let db: OptimisticTransactionDB = OptimisticTransactionDB::open_default(&path).unwrap(); assert!(db.put(b"k1", b"v1111").is_ok()); let r: Result>, Error> = db.get(b"k1"); assert_eq!(r.unwrap().unwrap(), b"v1111"); assert!(db.delete(b"k1").is_ok()); assert!(db.get(b"k1").unwrap().is_none()); } } #[test] fn open_cf() { let path = DBPath::new("_rust_rocksdb_optimistic_transaction_db_open_cf"); { let mut opts = Options::default(); opts.create_if_missing(true); opts.create_missing_column_families(true); let db: OptimisticTransactionDB = OptimisticTransactionDB::open_cf(&opts, &path, ["cf1", "cf2"]).unwrap(); let cf1 = db.cf_handle("cf1").unwrap(); let cf2 = db.cf_handle("cf2").unwrap(); db.put(b"k0", b"v0").unwrap(); db.put_cf(&cf1, b"k1", b"v1").unwrap(); db.put_cf(&cf2, b"k2", b"v2").unwrap(); assert_eq!(db.get(b"k0").unwrap().unwrap(), b"v0"); assert!(db.get(b"k1").unwrap().is_none()); assert!(db.get(b"k2").unwrap().is_none()); assert!(db.get_cf(&cf1, b"k0").unwrap().is_none()); assert_eq!(db.get_cf(&cf1, b"k1").unwrap().unwrap(), b"v1"); assert!(db.get_cf(&cf1, b"k2").unwrap().is_none()); assert!(db.get_cf(&cf2, b"k0").unwrap().is_none()); assert!(db.get_cf(&cf2, b"k1").unwrap().is_none()); assert_eq!(db.get_cf(&cf2, b"k2").unwrap().unwrap(), b"v2"); } } #[test] fn multi_get() { let path = DBPath::new("_rust_rocksdb_multi_get"); { let db: OptimisticTransactionDB = OptimisticTransactionDB::open_default(&path).unwrap(); let initial_snap = db.snapshot(); db.put(b"k1", b"v1").unwrap(); let k1_snap = db.snapshot(); db.put(b"k2", b"v2").unwrap(); let _ = db.multi_get([b"k0"; 40]); let assert_values = |values: Vec<_>| { assert_eq!(3, values.len()); assert_eq!(values[0], None); assert_eq!(values[1], Some(b"v1".to_vec())); assert_eq!(values[2], Some(b"v2".to_vec())); }; let values = db .multi_get([b"k0", b"k1", b"k2"]) .into_iter() .map(Result::unwrap) .collect::>(); assert_values(values); let values = DBAccess::multi_get_opt(&db, [b"k0", b"k1", b"k2"], &Default::default()) .into_iter() .map(Result::unwrap) .collect::>(); assert_values(values); let values = db .snapshot() .multi_get([b"k0", b"k1", b"k2"]) .into_iter() .map(Result::unwrap) .collect::>(); assert_values(values); let none_values = initial_snap .multi_get([b"k0", b"k1", b"k2"]) .into_iter() .map(Result::unwrap) .collect::>(); assert_eq!(none_values, vec![None; 3]); let k1_only = k1_snap .multi_get([b"k0", b"k1", b"k2"]) .into_iter() .map(Result::unwrap) .collect::>(); assert_eq!(k1_only, vec![None, Some(b"v1".to_vec()), None]); let txn = db.transaction(); let values = txn .multi_get([b"k0", b"k1", b"k2"]) .into_iter() .map(Result::unwrap) .collect::>(); assert_values(values); } } #[test] fn multi_get_cf() { let path = DBPath::new("_rust_rocksdb_multi_get_cf"); { let mut opts = Options::default(); opts.create_if_missing(true); opts.create_missing_column_families(true); let db: OptimisticTransactionDB = OptimisticTransactionDB::open_cf(&opts, &path, ["cf0", "cf1", "cf2"]).unwrap(); let cf0 = db.cf_handle("cf0").unwrap(); let cf1 = db.cf_handle("cf1").unwrap(); db.put_cf(&cf1, b"k1", b"v1").unwrap(); let cf2 = db.cf_handle("cf2").unwrap(); db.put_cf(&cf2, b"k2", b"v2").unwrap(); let values = db .multi_get_cf(vec![(&cf0, b"k0"), (&cf1, b"k1"), (&cf2, b"k2")]) .into_iter() .map(Result::unwrap) .collect::>(); assert_eq!(3, values.len()); assert_eq!(values[0], None); assert_eq!(values[1], Some(b"v1".to_vec())); assert_eq!(values[2], Some(b"v2".to_vec())); let txn = db.transaction(); let values = txn .multi_get_cf(vec![(&cf0, b"k0"), (&cf1, b"k1"), (&cf2, b"k2")]) .into_iter() .map(Result::unwrap) .collect::>(); assert_eq!(3, values.len()); assert_eq!(values[0], None); assert_eq!(values[1], Some(b"v1".to_vec())); assert_eq!(values[2], Some(b"v2".to_vec())); } } #[test] fn destroy_on_open() { let path = DBPath::new("_rust_rocksdb_optimistic_transaction_db_destroy_on_open"); let _db: OptimisticTransactionDB = OptimisticTransactionDB::open_default(&path).unwrap(); let opts = Options::default(); // The TransactionDB will still be open when we try to destroy it and the lock should fail. match DB::destroy(&opts, &path) { Err(s) => { let message = s.to_string(); assert_eq!(s.kind(), ErrorKind::IOError); assert!(message.contains("_rust_rocksdb_optimistic_transaction_db_destroy_on_open")); assert!(message.contains("/LOCK:")); } Ok(_) => panic!("should fail"), } } #[test] fn writebatch() { let path = DBPath::new("_rust_rocksdb_optimistic_transaction_db_writebatch"); { let db: OptimisticTransactionDB = OptimisticTransactionDB::open_default(&path).unwrap(); { // test put let mut batch = WriteBatchWithTransaction::::default(); assert!(db.get(b"k1").unwrap().is_none()); assert_eq!(batch.len(), 0); assert!(batch.is_empty()); batch.put(b"k1", b"v1111"); batch.put(b"k2", b"v2222"); batch.put(b"k3", b"v3333"); assert_eq!(batch.len(), 3); assert!(!batch.is_empty()); assert!(db.get(b"k1").unwrap().is_none()); let p = db.write(batch); assert!(p.is_ok()); let r: Result>, Error> = db.get(b"k1"); assert_eq!(r.unwrap().unwrap(), b"v1111"); } { // test delete let mut batch = WriteBatchWithTransaction::::default(); batch.delete(b"k1"); assert_eq!(batch.len(), 1); assert!(!batch.is_empty()); let p = db.write(batch); assert!(p.is_ok()); assert!(db.get(b"k1").unwrap().is_none()); } { // test size_in_bytes let mut batch = WriteBatchWithTransaction::::default(); let before = batch.size_in_bytes(); batch.put(b"k1", b"v1234567890"); let after = batch.size_in_bytes(); assert!(before + 10 <= after); } } } #[test] fn iterator_test() { let path = DBPath::new("_rust_rocksdb_optimistic_transaction_db_iteratortest"); { let db: OptimisticTransactionDB = OptimisticTransactionDB::open_default(&path).unwrap(); let k1: Box<[u8]> = b"k1".to_vec().into_boxed_slice(); let k2: Box<[u8]> = b"k2".to_vec().into_boxed_slice(); let k3: Box<[u8]> = b"k3".to_vec().into_boxed_slice(); let k4: Box<[u8]> = b"k4".to_vec().into_boxed_slice(); let v1: Box<[u8]> = b"v1111".to_vec().into_boxed_slice(); let v2: Box<[u8]> = b"v2222".to_vec().into_boxed_slice(); let v3: Box<[u8]> = b"v3333".to_vec().into_boxed_slice(); let v4: Box<[u8]> = b"v4444".to_vec().into_boxed_slice(); db.put(&*k1, &*v1).unwrap(); db.put(&*k2, &*v2).unwrap(); db.put(&*k3, &*v3).unwrap(); let expected = vec![ (k1.clone(), v1.clone()), (k2.clone(), v2.clone()), (k3.clone(), v3.clone()), ]; let iter = db.iterator(IteratorMode::Start); assert_eq!(iter.map(Result::unwrap).collect::>(), expected); // Test that it's idempotent let iter = db.iterator(IteratorMode::Start); assert_eq!(iter.map(Result::unwrap).collect::>(), expected); let iter = db.iterator(IteratorMode::Start); assert_eq!(iter.map(Result::unwrap).collect::>(), expected); // Test in reverse let iter = db.iterator(IteratorMode::End); let mut tmp_vec = iter.collect::>(); tmp_vec.reverse(); let old_iter = db.iterator(IteratorMode::Start); db.put(&*k4, &*v4).unwrap(); let expected2 = vec![ (k1, v1), (k2, v2), (k3.clone(), v3.clone()), (k4.clone(), v4.clone()), ]; assert_eq!(old_iter.map(Result::unwrap).collect::>(), expected); let iter = db.iterator(IteratorMode::Start); assert_eq!(iter.map(Result::unwrap).collect::>(), expected2); let iter = db.iterator(IteratorMode::From(b"k3", Direction::Forward)); assert_eq!( iter.map(Result::unwrap).collect::>(), vec![(k3, v3), (k4, v4)] ); } } #[test] fn snapshot_test() { let path = DBPath::new("_rust_rocksdb_optimistic_transaction_db_snapshottest"); { let db: OptimisticTransactionDB = OptimisticTransactionDB::open_default(&path).unwrap(); assert!(db.put(b"k1", b"v1111").is_ok()); let snap = db.snapshot(); assert_eq!(snap.get(b"k1").unwrap().unwrap(), b"v1111"); assert!(db.put(b"k2", b"v2222").is_ok()); assert!(db.get(b"k2").unwrap().is_some()); assert!(snap.get(b"k2").unwrap().is_none()); } } #[test] fn prefix_extract_and_iterate_test() { let path = DBPath::new("_rust_rocksdb_optimistic_transaction_db_prefix_extract_and_iterate"); { let mut opts = Options::default(); opts.create_if_missing(true); opts.create_missing_column_families(true); opts.set_prefix_extractor(SliceTransform::create_fixed_prefix(2)); let db: OptimisticTransactionDB = OptimisticTransactionDB::open(&opts, &path).unwrap(); db.put(b"p1_k1", b"v1").unwrap(); db.put(b"p2_k2", b"v2").unwrap(); db.put(b"p1_k3", b"v3").unwrap(); db.put(b"p1_k4", b"v4").unwrap(); db.put(b"p2_k5", b"v5").unwrap(); let mut readopts = ReadOptions::default(); readopts.set_prefix_same_as_start(true); readopts.set_iterate_lower_bound(b"p1".to_vec()); readopts.set_pin_data(true); let iter = db.iterator_opt(IteratorMode::Start, readopts); let expected: Vec<_> = vec![(b"p1_k1", b"v1"), (b"p1_k3", b"v3"), (b"p1_k4", b"v4")] .into_iter() .map(|(k, v)| (k.to_vec().into_boxed_slice(), v.to_vec().into_boxed_slice())) .collect(); assert_eq!(expected, iter.map(Result::unwrap).collect::>()); } } #[test] fn cuckoo() { let path = DBPath::new("_rust_rocksdb_optimistic_transaction_db_cuckoo"); { let mut opts = Options::default(); let mut factory_opts = CuckooTableOptions::default(); factory_opts.set_hash_ratio(0.8); factory_opts.set_max_search_depth(20); factory_opts.set_cuckoo_block_size(10); factory_opts.set_identity_as_first_hash(true); factory_opts.set_use_module_hash(false); opts.set_cuckoo_table_factory(&factory_opts); opts.create_if_missing(true); let db: OptimisticTransactionDB = OptimisticTransactionDB::open(&opts, &path).unwrap(); db.put(b"k1", b"v1").unwrap(); db.put(b"k2", b"v2").unwrap(); let r: Result>, Error> = db.get(b"k1"); assert_eq!(r.unwrap().unwrap(), b"v1"); let r: Result>, Error> = db.get(b"k2"); assert_eq!(r.unwrap().unwrap(), b"v2"); assert!(db.delete(b"k1").is_ok()); assert!(db.get(b"k1").unwrap().is_none()); } } #[test] fn transaction() { let path = DBPath::new("_rust_rocksdb_optimistic_transaction_db_transaction"); { let mut opts = Options::default(); opts.create_if_missing(true); let db: OptimisticTransactionDB = OptimisticTransactionDB::open(&opts, &path).unwrap(); // put outside of transaction db.put(b"k1", b"v1").unwrap(); assert_eq!(db.get(b"k1").unwrap().unwrap(), b"v1"); { let txn1 = db.transaction(); txn1.put(b"k1", b"v2").unwrap(); // get outside of transaction assert_eq!(db.get(b"k1").unwrap().unwrap().as_slice(), b"v1"); // modify same key in another transaction let txn2 = db.transaction(); txn2.put(b"k1", b"v3").unwrap(); txn2.commit().unwrap(); // txn1 should fail with ErrorKind::Busy let err = txn1.commit().unwrap_err(); assert_eq!(err.kind(), ErrorKind::Busy); } { let txn1 = db.transaction(); txn1.put(b"k2", b"v2").unwrap(); let txn2 = db.transaction(); assert!(txn2.get_for_update(b"k2", true).unwrap().is_none()); // txn1 commit, txn2 should fail with Busy. txn1.commit().unwrap(); assert_eq!(txn2.commit().unwrap_err().kind(), ErrorKind::Busy); } } } #[test] fn transaction_iterator() { let path = DBPath::new("_rust_rocksdb_optimistic_transaction_db_transaction_iterator"); { let db: OptimisticTransactionDB = OptimisticTransactionDB::open_default(&path).unwrap(); let k1: Box<[u8]> = b"k1".to_vec().into_boxed_slice(); let k2: Box<[u8]> = b"k2".to_vec().into_boxed_slice(); let k3: Box<[u8]> = b"k3".to_vec().into_boxed_slice(); let k4: Box<[u8]> = b"k4".to_vec().into_boxed_slice(); let v1: Box<[u8]> = b"v1111".to_vec().into_boxed_slice(); let v2: Box<[u8]> = b"v2222".to_vec().into_boxed_slice(); let v3: Box<[u8]> = b"v3333".to_vec().into_boxed_slice(); let v4: Box<[u8]> = b"v4444".to_vec().into_boxed_slice(); db.put(&*k1, &*v1).unwrap(); db.put(&*k2, &*v2).unwrap(); db.put(&*k3, &*v3).unwrap(); let expected = vec![ (k1.clone(), v1.clone()), (k2.clone(), v2.clone()), (k3.clone(), v3.clone()), ]; let txn = db.transaction(); let iter = txn.iterator(IteratorMode::Start); assert_eq!(iter.map(Result::unwrap).collect::>(), expected); // Test that it's idempotent let iter = txn.iterator(IteratorMode::Start); assert_eq!(iter.map(Result::unwrap).collect::>(), expected); let iter = txn.iterator(IteratorMode::Start); assert_eq!(iter.map(Result::unwrap).collect::>(), expected); // Test in reverse let iter = txn.iterator(IteratorMode::End); let mut tmp_vec = iter.collect::>(); tmp_vec.reverse(); let old_iter = txn.iterator(IteratorMode::Start); txn.put(&*k4, &*v4).unwrap(); let expected2 = vec![ (k1, v1), (k2, v2), (k3.clone(), v3.clone()), (k4.clone(), v4.clone()), ]; assert_eq!(old_iter.map(Result::unwrap).collect::>(), expected); let iter = txn.iterator(IteratorMode::Start); assert_eq!(iter.map(Result::unwrap).collect::>(), expected2); let iter = txn.iterator(IteratorMode::From(b"k3", Direction::Forward)); assert_eq!( iter.map(Result::unwrap).collect::>(), vec![(k3, v3), (k4, v4)] ); } } #[test] fn transaction_rollback() { let path = DBPath::new("_rust_rocksdb_optimistic_transaction_db_transaction_rollback"); { let db: OptimisticTransactionDB = OptimisticTransactionDB::open_default(&path).unwrap(); let txn = db.transaction(); txn.rollback().unwrap(); txn.put(b"k1", b"v1").unwrap(); txn.set_savepoint(); txn.put(b"k2", b"v2").unwrap(); assert_eq!(txn.get(b"k1").unwrap().unwrap(), b"v1"); assert_eq!(txn.get(b"k2").unwrap().unwrap(), b"v2"); txn.rollback_to_savepoint().unwrap(); assert_eq!(txn.get(b"k1").unwrap().unwrap(), b"v1"); assert!(txn.get(b"k2").unwrap().is_none()); txn.rollback().unwrap(); assert!(txn.get(b"k1").unwrap().is_none()); txn.commit().unwrap(); assert!(db.get(b"k2").unwrap().is_none()); } } #[test] fn transaction_cf() { let path = DBPath::new("_rust_rocksdb_optimistic_transaction_db_transaction_cf"); { let mut opts = Options::default(); opts.create_if_missing(true); opts.create_missing_column_families(true); let db: OptimisticTransactionDB = OptimisticTransactionDB::open_cf(&opts, &path, ["cf1", "cf2"]).unwrap(); let cf1 = db.cf_handle("cf1").unwrap(); let cf2 = db.cf_handle("cf2").unwrap(); let txn = db.transaction(); txn.put(b"k0", b"v0").unwrap(); txn.put_cf(&cf1, b"k1", b"v1").unwrap(); txn.put_cf(&cf2, b"k2", b"v2").unwrap(); assert_eq!(txn.get(b"k0").unwrap().unwrap(), b"v0"); assert!(txn.get(b"k1").unwrap().is_none()); assert!(txn.get(b"k2").unwrap().is_none()); assert!(txn.get_cf(&cf1, b"k0").unwrap().is_none()); assert_eq!(txn.get_cf(&cf1, b"k1").unwrap().unwrap(), b"v1"); assert!(txn.get_cf(&cf1, b"k2").unwrap().is_none()); assert!(txn.get_cf(&cf2, b"k0").unwrap().is_none()); assert!(txn.get_cf(&cf2, b"k1").unwrap().is_none()); assert_eq!(txn.get_cf(&cf2, b"k2").unwrap().unwrap(), b"v2"); txn.commit().unwrap(); } } #[test] fn transaction_snapshot() { let path = DBPath::new("_rust_rocksdb_optimistic_transaction_db_transaction_snapshot"); { let db: OptimisticTransactionDB = OptimisticTransactionDB::open_default(&path).unwrap(); let txn = db.transaction(); let snapshot = txn.snapshot(); assert!(snapshot.get(b"k1").unwrap().is_none()); db.put(b"k1", b"v1").unwrap(); assert_eq!(snapshot.get(b"k1").unwrap().unwrap(), b"v1"); let mut opts = OptimisticTransactionOptions::default(); opts.set_snapshot(true); let txn = db.transaction_opt(&WriteOptions::default(), &opts); db.put(b"k2", b"v2").unwrap(); { let snapshot = SnapshotWithThreadMode::new(&txn); assert!(snapshot.get(b"k2").unwrap().is_none()); assert_eq!(txn.get(b"k2").unwrap().unwrap(), b"v2"); } txn.get_for_update(b"k2", true).unwrap(); assert_eq!(txn.commit().unwrap_err().kind(), ErrorKind::Busy); let txn = db.transaction_opt(&WriteOptions::default(), &opts); let snapshot = txn.snapshot(); txn.put(b"k3", b"v3").unwrap(); assert!(db.get(b"k3").unwrap().is_none()); // put operation should also visible to snapshot, // because this snapshot is associated with a transaction assert_eq!(snapshot.get(b"k3").unwrap().unwrap(), b"v3"); } } #[test] fn delete_range_test() { let path = DBPath::new("_rust_rocksdb_optimistic_transaction_db_delete_range_test"); { let mut opts = Options::default(); opts.create_if_missing(true); opts.create_missing_column_families(true); let cfs = vec!["cf1"]; let db: OptimisticTransactionDB = OptimisticTransactionDB::open_cf(&opts, &path, cfs).unwrap(); let cf1 = db.cf_handle("cf1").unwrap(); db.put_cf(&cf1, b"k1", b"v1").unwrap(); db.put_cf(&cf1, b"k2", b"v2").unwrap(); db.put_cf(&cf1, b"k3", b"v3").unwrap(); db.put_cf(&cf1, b"k4", b"v4").unwrap(); db.put_cf(&cf1, b"k5", b"v5").unwrap(); db.delete_range_cf(&cf1, b"k2", b"k4").unwrap(); assert_eq!(db.get_cf(&cf1, b"k1").unwrap().unwrap(), b"v1"); assert_eq!(db.get_cf(&cf1, b"k4").unwrap().unwrap(), b"v4"); assert_eq!(db.get_cf(&cf1, b"k5").unwrap().unwrap(), b"v5"); assert!(db.get_cf(&cf1, b"k2").unwrap().is_none()); assert!(db.get_cf(&cf1, b"k3").unwrap().is_none()); } } rocksdb-0.23.0/tests/test_optimistic_transaction_db_memory_usage.rs000064400000000000000000000044141046102023000241210ustar 00000000000000// Copyright 2021 Yiyuan Liu // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. // mod util; use rocksdb::{OptimisticTransactionDB, Options, SingleThreaded}; use util::DBPath; #[test] fn test_optimistic_transaction_db_memory_usage() { let path = DBPath::new("_rust_rocksdb_optimistic_transaction_db memory_usage_test"); { let mut options = Options::default(); options.create_if_missing(true); options.enable_statistics(); // setup cache: let cache = rocksdb::Cache::new_lru_cache(1 << 20); // 1 MB cache let mut block_based_options = rocksdb::BlockBasedOptions::default(); block_based_options.set_block_cache(&cache); options.set_block_based_table_factory(&block_based_options); let db: OptimisticTransactionDB = OptimisticTransactionDB::open(&options, &path).unwrap(); let mut builder = rocksdb::perf::MemoryUsageBuilder::new().unwrap(); builder.add_db(&db); builder.add_cache(&cache); let memory_usage = builder.build().unwrap(); for i in 1..=1000 { let key = format!("key{}", i); let value = format!("value{}", i); db.put(&key, &value).unwrap(); } for i in 1..=1000 { let key = format!("key{}", i); let result = db.get(&key).unwrap().unwrap(); let result_str = String::from_utf8(result).unwrap(); assert_eq!(result_str, format!("value{}", i)); } assert_ne!(memory_usage.approximate_mem_table_total(), 0); assert_eq!(memory_usage.approximate_mem_table_readers_total(), 0); // Equals zero! assert_ne!(memory_usage.approximate_cache_total(), 0); assert_ne!(memory_usage.approximate_mem_table_unflushed(), 0); } } rocksdb-0.23.0/tests/test_pinnable_slice.rs000064400000000000000000000033441046102023000170570ustar 00000000000000// Copyright 2020 Tyler Neely // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. mod util; use pretty_assertions::assert_eq; use rocksdb::{Options, DB}; use util::DBPath; #[test] fn test_pinnable_slice() { let path = DBPath::new("_rust_rocksdb_pinnable_slice_test"); let mut opts = Options::default(); opts.create_if_missing(true); let db = DB::open(&opts, &path).unwrap(); db.put(b"k1", b"value12345").unwrap(); let result = db.get_pinned(b"k1"); assert!(result.is_ok()); let value = result.unwrap(); assert!(value.is_some()); let pinnable_slice = value.unwrap(); assert_eq!(b"12345", &pinnable_slice[5..10]); } #[test] fn test_snapshot_pinnable_slice() { let path = DBPath::new("_rust_rocksdb_snapshot_pinnable_slice_test"); let mut opts = Options::default(); opts.create_if_missing(true); let db = DB::open(&opts, &path).unwrap(); db.put(b"k1", b"value12345").unwrap(); let snap = db.snapshot(); assert!(db.put(b"k1", b"value23456").is_ok()); let result = snap.get_pinned(b"k1"); assert!(result.is_ok()); let value = result.unwrap(); assert!(value.is_some()); let pinnable_slice = value.unwrap(); assert_eq!(b"12345", &pinnable_slice[5..10]); } rocksdb-0.23.0/tests/test_property.rs000064400000000000000000000057151046102023000160000ustar 00000000000000// Copyright 2020 Tyler Neely // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. mod util; use pretty_assertions::assert_eq; use rocksdb::{properties, Options, DB}; use util::DBPath; #[test] fn property_test() { let n = DBPath::new("_rust_rocksdb_property_test"); { let db = DB::open_default(&n).unwrap(); let prop_name: &std::ffi::CStr = properties::STATS; let value = db.property_value(prop_name).unwrap().unwrap(); assert!(value.contains("Stats")); } { let db = DB::open_default(&n).unwrap(); let prop_name: properties::PropertyName = properties::STATS.to_owned(); let value = db.property_value(&prop_name).unwrap().unwrap(); assert!(value.contains("Stats")); } { let db = DB::open_default(&n).unwrap(); let prop_name: String = properties::STATS.to_owned().into_string(); let value = db.property_value(&prop_name).unwrap().unwrap(); assert!(value.contains("Stats")); } } #[test] fn property_cf_test() { let n = DBPath::new("_rust_rocksdb_property_cf_test"); { let opts = Options::default(); #[cfg(feature = "multi-threaded-cf")] let db = DB::open_default(&n).unwrap(); #[cfg(not(feature = "multi-threaded-cf"))] let mut db = DB::open_default(&n).unwrap(); db.create_cf("cf1", &opts).unwrap(); let cf = db.cf_handle("cf1").unwrap(); let value = db .property_value_cf(&cf, properties::STATS) .unwrap() .unwrap(); assert!(value.contains("Stats")); } } #[test] fn property_int_test() { let n = DBPath::new("_rust_rocksdb_property_int_test"); { let db = DB::open_default(&n).unwrap(); let value = db .property_int_value(properties::ESTIMATE_LIVE_DATA_SIZE) .unwrap(); assert_eq!(value, Some(0)); } } #[test] fn property_int_cf_test() { let n = DBPath::new("_rust_rocksdb_property_int_cf_test"); { let opts = Options::default(); #[cfg(feature = "multi-threaded-cf")] let db = DB::open_default(&n).unwrap(); #[cfg(not(feature = "multi-threaded-cf"))] let mut db = DB::open_default(&n).unwrap(); db.create_cf("cf1", &opts).unwrap(); let cf = db.cf_handle("cf1").unwrap(); let total_keys = db .property_int_value_cf(&cf, properties::ESTIMATE_NUM_KEYS) .unwrap(); assert_eq!(total_keys, Some(0)); } } rocksdb-0.23.0/tests/test_raw_iterator.rs000064400000000000000000000100121046102023000166000ustar 00000000000000// Copyright 2020 Tyler Neely // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. mod util; use pretty_assertions::assert_eq; use rocksdb::{DBAccess, DBRawIteratorWithThreadMode, DB}; use util::DBPath; fn assert_item(iter: &DBRawIteratorWithThreadMode<'_, D>, key: &[u8], value: &[u8]) { assert!(iter.valid()); assert_eq!(iter.key(), Some(key)); assert_eq!(iter.value(), Some(value)); assert_eq!(iter.item(), Some((key, value))); } fn assert_no_item(iter: &DBRawIteratorWithThreadMode<'_, D>) { assert!(!iter.valid()); assert_eq!(iter.key(), None); assert_eq!(iter.value(), None); assert_eq!(iter.item(), None); } #[test] pub fn test_forwards_iteration() { let n = DBPath::new("forwards_iteration"); { let db = DB::open_default(&n).unwrap(); db.put(b"k1", b"v1").unwrap(); db.put(b"k2", b"v2").unwrap(); db.put(b"k3", b"v3").unwrap(); db.put(b"k4", b"v4").unwrap(); let mut iter = db.raw_iterator(); iter.seek_to_first(); assert_item(&iter, b"k1", b"v1"); iter.next(); assert_item(&iter, b"k2", b"v2"); iter.next(); // k3 iter.next(); // k4 iter.next(); // invalid! assert_no_item(&iter); } } #[test] pub fn test_seek_last() { let n = DBPath::new("backwards_iteration"); { let db = DB::open_default(&n).unwrap(); db.put(b"k1", b"v1").unwrap(); db.put(b"k2", b"v2").unwrap(); db.put(b"k3", b"v3").unwrap(); db.put(b"k4", b"v4").unwrap(); let mut iter = db.raw_iterator(); iter.seek_to_last(); assert_item(&iter, b"k4", b"v4"); iter.prev(); assert_item(&iter, b"k3", b"v3"); iter.prev(); // k2 iter.prev(); // k1 iter.prev(); // invalid! assert_no_item(&iter); } } #[test] pub fn test_seek() { let n = DBPath::new("seek"); { let db = DB::open_default(&n).unwrap(); db.put(b"k1", b"v1").unwrap(); db.put(b"k2", b"v2").unwrap(); db.put(b"k4", b"v4").unwrap(); let mut iter = db.raw_iterator(); iter.seek(b"k2"); assert_item(&iter, b"k2", b"v2"); // Check it gets the next key when the key doesn't exist iter.seek(b"k3"); assert_item(&iter, b"k4", b"v4"); } } #[test] pub fn test_seek_to_nonexistant() { let n = DBPath::new("seek_to_nonexistant"); { let db = DB::open_default(&n).unwrap(); db.put(b"k1", b"v1").unwrap(); db.put(b"k3", b"v3").unwrap(); db.put(b"k4", b"v4").unwrap(); let mut iter = db.raw_iterator(); iter.seek(b"k2"); assert_item(&iter, b"k3", b"v3"); } } #[test] pub fn test_seek_for_prev() { let n = DBPath::new("seek_for_prev"); { let db = DB::open_default(&n).unwrap(); db.put(b"k1", b"v1").unwrap(); db.put(b"k2", b"v2").unwrap(); db.put(b"k4", b"v4").unwrap(); let mut iter = db.raw_iterator(); iter.seek(b"k2"); assert_item(&iter, b"k2", b"v2"); // Check it gets the previous key when the key doesn't exist iter.seek_for_prev(b"k3"); assert_item(&iter, b"k2", b"v2"); } } #[test] pub fn test_next_without_seek() { let n = DBPath::new("test_forgot_seek"); { let db = DB::open_default(&n).unwrap(); db.put(b"k1", b"v1").unwrap(); db.put(b"k2", b"v2").unwrap(); db.put(b"k4", b"v4").unwrap(); let mut iter = db.raw_iterator(); iter.next(); } } rocksdb-0.23.0/tests/test_rocksdb_options.rs000064400000000000000000000330221046102023000173060ustar 00000000000000// Copyright 2020 Tyler Neely // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. mod util; use std::{fs, io::Read as _}; use rocksdb::checkpoint::Checkpoint; use rocksdb::{ BlockBasedOptions, Cache, DBCompressionType, DataBlockIndexType, Env, LruCacheOptions, Options, ReadOptions, DB, }; use util::DBPath; #[test] fn test_load_latest() { let n = DBPath::new("_rust_rocksdb_test_load_latest"); { let mut opts = Options::default(); opts.create_if_missing(true); opts.create_missing_column_families(true); let _ = DB::open_cf(&opts, &n, vec!["cf0", "cf1"]).unwrap(); } let (_, cfs) = Options::load_latest( &n, Env::new().unwrap(), true, Cache::new_lru_cache(1024 * 8), ) .unwrap(); assert!(cfs.iter().any(|cf| cf.name() == "default")); assert!(cfs.iter().any(|cf| cf.name() == "cf0")); assert!(cfs.iter().any(|cf| cf.name() == "cf1")); } #[test] fn test_set_num_levels() { let n = DBPath::new("_rust_rocksdb_test_set_num_levels"); { let mut opts = Options::default(); opts.create_if_missing(true); opts.set_num_levels(2); let _db = DB::open(&opts, &n).unwrap(); } } #[test] fn test_increase_parallelism() { let n = DBPath::new("_rust_rocksdb_test_increase_parallelism"); { let mut opts = Options::default(); opts.create_if_missing(true); opts.increase_parallelism(4); let _db = DB::open(&opts, &n).unwrap(); } } #[test] fn test_set_level_compaction_dynamic_level_bytes() { let n = DBPath::new("_rust_rocksdb_test_set_level_compaction_dynamic_level_bytes"); { let mut opts = Options::default(); opts.create_if_missing(true); opts.set_level_compaction_dynamic_level_bytes(true); let _db = DB::open(&opts, &n).unwrap(); } } #[test] fn test_block_based_options() { let path = "_rust_rocksdb_test_block_based_options"; let n = DBPath::new(path); { let mut opts = Options::default(); opts.create_if_missing(true); let mut block_opts = BlockBasedOptions::default(); block_opts.set_cache_index_and_filter_blocks(true); block_opts.set_pin_l0_filter_and_index_blocks_in_cache(true); block_opts.set_format_version(4); block_opts.set_index_block_restart_interval(16); opts.set_block_based_table_factory(&block_opts); let _db = DB::open(&opts, &n).unwrap(); // read the setting from the LOG file let mut rocksdb_log = fs::File::open(format!("{}/LOG", (&n).as_ref().to_str().unwrap())) .expect("rocksdb creates a LOG file"); let mut settings = String::new(); rocksdb_log.read_to_string(&mut settings).unwrap(); // check the settings are set in the LOG file assert!(settings.contains("cache_index_and_filter_blocks: 1")); assert!(settings.contains("pin_l0_filter_and_index_blocks_in_cache: 1")); assert!(settings.contains("format_version: 4")); assert!(settings.contains("index_block_restart_interval: 16")); } } #[test] fn test_read_options() { let mut read_opts = ReadOptions::default(); read_opts.set_verify_checksums(false); } #[test] fn test_set_data_block_index_type() { let path = "_rust_rocksdb_test_set_data_block_index_type"; let n = DBPath::new(path); // Default is `BinarySearch` { let mut opts = Options::default(); opts.create_if_missing(true); let block_opts = BlockBasedOptions::default(); opts.set_block_based_table_factory(&block_opts); let _db = DB::open(&opts, &n).expect("open a db works"); let mut rocksdb_log = fs::File::open(format!("{}/LOG", (&n).as_ref().to_str().unwrap())) .expect("rocksdb creates a LOG file"); let mut settings = String::new(); rocksdb_log .read_to_string(&mut settings) .expect("can read the LOG file"); assert!(settings.contains("data_block_index_type: 0")); assert!(settings.contains("data_block_hash_table_util_ratio: 0.750000")); } // Setting the index type and hash table utilization ratio works { let mut opts = Options::default(); opts.create_if_missing(false); let mut block_opts = BlockBasedOptions::default(); block_opts.set_data_block_index_type(DataBlockIndexType::BinaryAndHash); block_opts.set_data_block_hash_ratio(0.35); opts.set_block_based_table_factory(&block_opts); let _db = DB::open(&opts, &n).expect("open a db works"); let mut rocksdb_log = fs::File::open(format!("{}/LOG", (&n).as_ref().to_str().unwrap())) .expect("rocksdb creates a LOG file"); let mut settings = String::new(); rocksdb_log .read_to_string(&mut settings) .expect("can read the LOG file"); assert!(settings.contains("data_block_index_type: 1")); assert!(settings.contains("data_block_hash_table_util_ratio: 0.350000")); } } #[test] #[cfg(feature = "zstd")] fn set_compression_options_zstd_max_train_bytes() { let path = DBPath::new("_rust_set_compression_options_zstd_max_train_bytes"); { let mut opts = Options::default(); opts.create_if_missing(true); opts.set_compression_options(4, 5, 6, 7); opts.set_zstd_max_train_bytes(100); let _db = DB::open(&opts, &path).unwrap(); } } #[test] fn set_wal_compression_zstd() { let path = DBPath::new("_set_wal_compression_zstd"); { let mut opts = Options::default(); opts.create_if_missing(true); opts.set_wal_compression_type(DBCompressionType::None); opts.set_wal_compression_type(DBCompressionType::Zstd); let _db = DB::open(&opts, &path).unwrap(); } } #[test] #[should_panic(expected = "Lz4 is not supported for WAL compression")] fn set_wal_compression_unsupported() { { let mut opts = Options::default(); opts.create_if_missing(true); opts.set_wal_compression_type(DBCompressionType::Lz4); } } fn test_compression_type(ty: DBCompressionType) { let path = DBPath::new("_test_compression_type"); let mut opts = Options::default(); opts.set_compression_type(ty); opts.create_if_missing(true); let db = DB::open(&opts, &path); let should_open = match ty { DBCompressionType::None => true, DBCompressionType::Snappy => cfg!(feature = "snappy"), DBCompressionType::Zlib => cfg!(feature = "zlib"), DBCompressionType::Bz2 => cfg!(feature = "bzip2"), DBCompressionType::Lz4 | DBCompressionType::Lz4hc => cfg!(feature = "lz4"), DBCompressionType::Zstd => cfg!(feature = "zstd"), }; if should_open { let _db = db.unwrap(); } else { let _err = db.unwrap_err(); } } #[test] fn test_none_compression() { test_compression_type(DBCompressionType::None); } #[test] fn test_snappy_compression() { test_compression_type(DBCompressionType::Snappy); } #[test] fn test_zlib_compression() { test_compression_type(DBCompressionType::Zlib); } #[test] fn test_bz2_compression() { test_compression_type(DBCompressionType::Bz2); } #[test] fn test_lz4_compression() { test_compression_type(DBCompressionType::Lz4); test_compression_type(DBCompressionType::Lz4hc); } #[test] fn test_zstd_compression() { test_compression_type(DBCompressionType::Zstd); } #[test] fn test_add_compact_on_deletion_collector_factory() { let n = DBPath::new("_rust_rocksdb_test_add_compact_on_deletion_collector_factory"); let mut opts = Options::default(); opts.create_if_missing(true); opts.add_compact_on_deletion_collector_factory(5, 10, 0.5); let _db = DB::open(&opts, &n).unwrap(); let mut rocksdb_log = fs::File::open(format!("{}/LOG", (&n).as_ref().to_str().unwrap())) .expect("rocksdb creates a LOG file"); let mut settings = String::new(); rocksdb_log .read_to_string(&mut settings) .expect("can read the LOG file"); assert!(settings.contains("CompactOnDeletionCollector (Sliding window size = 5 Deletion trigger = 10 Deletion ratio = 0.5)")); } #[test] fn test_set_avoid_unnecessary_blocking_io() { let path = DBPath::new("_set_avoid_unnecessary_blocking_io"); { let mut opts = Options::default(); opts.create_if_missing(true); opts.set_avoid_unnecessary_blocking_io(true); let db = DB::open(&opts, &path).unwrap(); let _ = db.put(b"k1", b"a"); assert_eq!(&*db.get(b"k1").unwrap().unwrap(), b"a"); } } #[test] fn test_set_track_and_verify_wals_in_manifest() { let path = DBPath::new("_set_track_and_verify_wals_in_manifest"); // test the defaults and the setter/accessor let mut opts = Options::default(); assert!(!opts.get_track_and_verify_wals_in_manifest()); opts.set_track_and_verify_wals_in_manifest(true); assert!(opts.get_track_and_verify_wals_in_manifest()); opts.set_track_and_verify_wals_in_manifest(false); assert!(!opts.get_track_and_verify_wals_in_manifest()); // verify that a database created with this option works // TODO: Check that the MANIFEST actually contains WalAddition/WalDeletion records opts.create_if_missing(true); opts.set_track_and_verify_wals_in_manifest(true); let db = DB::open(&opts, &path).unwrap(); db.put(b"k1", b"a").expect("put must work"); assert_eq!(db.get(b"k1").unwrap().unwrap(), b"a"); } #[test] fn test_set_periodic_compaction_seconds() { let path = DBPath::new("_set_periodic_compaction_seconds"); { let mut opts = Options::default(); opts.create_if_missing(true); opts.set_periodic_compaction_seconds(5); let _db = DB::open(&opts, &path).unwrap(); } } #[test] fn test_set_ratelimiter() { let path = DBPath::new("_set_ratelimiter"); { let mut opts = Options::default(); opts.create_if_missing(true); opts.set_ratelimiter(1024000, 1000, 1); let db = DB::open(&opts, &path).unwrap(); let _ = db.put(b"k1", b"a"); assert_eq!(&*db.get(b"k1").unwrap().unwrap(), b"a"); } { let mut opts = Options::default(); opts.create_if_missing(true); opts.set_auto_tuned_ratelimiter(1024000, 1000, 1); let db = DB::open(&opts, &path).unwrap(); let _ = db.put(b"k2", b"a"); assert_eq!(&*db.get(b"k2").unwrap().unwrap(), b"a"); } } #[test] fn test_set_blob_cache() { let path = DBPath::new("_set_blob_cache"); let cache = Cache::new_hyper_clock_cache(1024 * 1024, 4 * 1024); let mut opts = Options::default(); opts.create_if_missing(true); opts.set_enable_blob_files(true); opts.set_min_blob_size(16); opts.set_blob_cache(&cache); let db = DB::open(&opts, &path).unwrap(); const KEY: &[u8] = b"k1"; const VALUE: &[u8] = b"01234567890123456789"; db.put(KEY, VALUE).unwrap(); // Cache miss assert_eq!(&*db.get(KEY).unwrap().unwrap(), VALUE); // Cache hit assert_eq!(&*db.get(KEY).unwrap().unwrap(), VALUE); } #[test] fn test_lru_cache_custom_opts() { let path = DBPath::new("_set_blob_cache"); let mut lru_opts = LruCacheOptions::default(); lru_opts.set_capacity(16 * 1024 * 1024); lru_opts.set_num_shard_bits(2); let cache = Cache::new_lru_cache_opts(&lru_opts); let mut opts = Options::default(); opts.create_if_missing(true); opts.set_row_cache(&cache); // Must work even if we dropped the options: test that. drop(lru_opts); let db = DB::open(&opts, &path).unwrap(); const KEY: &[u8] = b"k1"; const VALUE: &[u8] = b"01234567890123456789"; db.put(KEY, VALUE).unwrap(); // Cache miss assert_eq!(&*db.get(KEY).unwrap().unwrap(), VALUE); // Cache hit assert_eq!(&*db.get(KEY).unwrap().unwrap(), VALUE); } #[test] fn test_set_write_dbid_to_manifest() { let path = DBPath::new("_set_write_dbid_to_manifest"); // test the defaults and the setter/accessor let mut opts = Options::default(); assert!(opts.get_write_dbid_to_manifest()); opts.set_write_dbid_to_manifest(false); assert!(!opts.get_write_dbid_to_manifest()); opts.set_write_dbid_to_manifest(true); assert!(opts.get_write_dbid_to_manifest()); // verify the DBID is preserved across checkpoints. If set to false this is not true opts.create_if_missing(true); opts.set_write_dbid_to_manifest(true); let db_orig = DB::open(&opts, &path).unwrap(); let db_orig_id = db_orig.get_db_identity().unwrap(); // a checkpoint from this database has the SAME DBID if it is in the manifest let checkpoint_path = DBPath::new("set_write_dbid_checkpoint"); let checkpoint = Checkpoint::new(&db_orig).unwrap(); checkpoint.create_checkpoint(&checkpoint_path).unwrap(); let db_checkpoint = DB::open(&opts, &checkpoint_path).unwrap(); let db_checkpoint_id = db_checkpoint.get_db_identity().unwrap(); assert_eq!( db_orig_id, db_checkpoint_id, "expected database identity to be preserved across checkpoints; db_orig={} db_checkpoint={}", String::from_utf8_lossy(&db_orig_id), String::from_utf8_lossy(&db_checkpoint_id) ); } rocksdb-0.23.0/tests/test_slice_transform.rs000064400000000000000000000046031046102023000173010ustar 00000000000000// Copyright 2020 Tyler Neely // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. mod util; use pretty_assertions::assert_eq; use rocksdb::{Options, SliceTransform, DB}; use util::{assert_iter, pair, DBPath}; #[test] pub fn test_slice_transform() { let db_path = DBPath::new("_rust_rocksdb_slice_transform_test"); { const A1: &[u8] = b"aaa1"; const A2: &[u8] = b"aaa2"; const B1: &[u8] = b"bbb1"; const B2: &[u8] = b"bbb2"; fn first_three(k: &[u8]) -> &[u8] { &k[..3] } let prefix_extractor = SliceTransform::create("first_three", first_three, None); let mut opts = Options::default(); opts.create_if_missing(true); opts.set_prefix_extractor(prefix_extractor); let db = DB::open(&opts, &db_path).unwrap(); assert!(db.put(A1, A1).is_ok()); assert!(db.put(A2, A2).is_ok()); assert!(db.put(B1, B1).is_ok()); assert!(db.put(B2, B2).is_ok()); assert_iter(db.prefix_iterator(b"aaa"), &[pair(A1, A1), pair(A2, A2)]); assert_iter(db.prefix_iterator(b"bbb"), &[pair(B1, B1), pair(B2, B2)]); } } #[test] fn test_no_in_domain() { fn extract_suffix(slice: &[u8]) -> &[u8] { if slice.len() > 4 { &slice[slice.len() - 4..slice.len()] } else { slice } } let db_path = DBPath::new("_rust_rocksdb_prefix_test"); { let mut opts = Options::default(); opts.create_if_missing(true); opts.set_prefix_extractor(SliceTransform::create( "test slice transform", extract_suffix, None, )); opts.set_memtable_prefix_bloom_ratio(0.1); let db = DB::open(&opts, &db_path).unwrap(); db.put(b"key_sfx1", b"a").unwrap(); db.put(b"key_sfx2", b"b").unwrap(); assert_eq!(db.get(b"key_sfx1").unwrap().unwrap(), b"a"); } } rocksdb-0.23.0/tests/test_sst_file_writer.rs000064400000000000000000000104511046102023000173110ustar 00000000000000// Copyright 2020 Lucjan Suski // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. mod util; use pretty_assertions::assert_eq; use rocksdb::{Error, Options, ReadOptions, SstFileWriter, DB}; use util::{DBPath, U64Comparator, U64Timestamp}; #[test] fn sst_file_writer_works() { let db_path = DBPath::new("_rust_rocksdb_sstfilewritertest"); let dir = tempfile::Builder::new() .prefix("_rust_rocksdb_sstfilewritertest") .tempdir() .expect("Failed to create temporary path for file writer."); let writer_path = dir.path().join("filewriter"); { let opts = Options::default(); let mut writer = SstFileWriter::create(&opts); writer.open(&writer_path).unwrap(); writer.put(b"k1", b"v1").unwrap(); writer.put(b"k2", b"v2").unwrap(); writer.delete(b"k3").unwrap(); writer.finish().unwrap(); assert!(writer.file_size() > 0); } { let db = DB::open_default(&db_path).unwrap(); db.put(b"k3", b"v3").unwrap(); db.ingest_external_file(vec![&writer_path]).unwrap(); let r: Result>, Error> = db.get(b"k1"); assert_eq!(r.unwrap().unwrap(), b"v1"); let r: Result>, Error> = db.get(b"k2"); assert_eq!(r.unwrap().unwrap(), b"v2"); assert!(db.get(b"k3").unwrap().is_none()); } } #[test] fn sst_file_writer_with_ts_works() { let db_path = DBPath::new("_rust_rocksdb_sstfilewritertest_with_ts"); let dir = tempfile::Builder::new() .prefix("_rust_rocksdb_sstfilewritertest_with_ts") .tempdir() .expect("Failed to create temporary path for file writer."); let writer_path = dir.path().join("filewriter"); let ts = U64Timestamp::new(1); let ts2 = U64Timestamp::new(2); let ts3 = U64Timestamp::new(3); { let mut opts = Options::default(); opts.set_comparator_with_ts( U64Comparator::NAME, U64Timestamp::SIZE, Box::new(U64Comparator::compare), Box::new(U64Comparator::compare_ts), Box::new(U64Comparator::compare_without_ts), ); let mut writer = SstFileWriter::create(&opts); writer.open(&writer_path).unwrap(); writer.put_with_ts(b"k1", ts, b"v1").unwrap(); writer.put_with_ts(b"k2", ts2, b"v2").unwrap(); writer.put_with_ts(b"k3", ts2, b"v3").unwrap(); writer.finish().unwrap(); assert!(writer.file_size() > 0); } { let _ = DB::destroy(&Options::default(), &db_path); let mut db_opts = Options::default(); db_opts.create_missing_column_families(true); db_opts.create_if_missing(true); db_opts.set_comparator_with_ts( U64Comparator::NAME, U64Timestamp::SIZE, Box::new(U64Comparator::compare), Box::new(U64Comparator::compare_ts), Box::new(U64Comparator::compare_without_ts), ); let db = DB::open(&db_opts, &db_path).unwrap(); db.ingest_external_file(vec![&writer_path]).unwrap(); db.delete_with_ts(b"k3", ts3).unwrap(); let mut opts = ReadOptions::default(); opts.set_timestamp(ts); let r: Result>, Error> = db.get_opt(b"k1", &opts); assert_eq!(r.unwrap().unwrap(), b"v1"); // at ts1 k2 should be invisible assert!(db.get_opt(b"k2", &opts).unwrap().is_none()); // at ts2 k2 and k3 should be visible opts.set_timestamp(ts2); let r: Result>, Error> = db.get_opt(b"k2", &opts); assert_eq!(r.unwrap().unwrap(), b"v2"); let r = db.get_opt(b"k3", &opts); assert_eq!(r.unwrap().unwrap(), b"v3"); // at ts3 the k3 should be deleted opts.set_timestamp(ts3); assert!(db.get_opt(b"k3", &opts).unwrap().is_none()); } } rocksdb-0.23.0/tests/test_transaction_db.rs000064400000000000000000000553501046102023000171060ustar 00000000000000// Copyright 2021 Yiyuan Liu // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. // mod util; use pretty_assertions::assert_eq; use rocksdb::{ CuckooTableOptions, DBAccess, Direction, Error, ErrorKind, IteratorMode, Options, ReadOptions, SliceTransform, TransactionDB, TransactionDBOptions, TransactionOptions, WriteBatchWithTransaction, WriteOptions, DB, }; use util::DBPath; #[test] fn open_default() { let path = DBPath::new("_rust_rocksdb_transaction_db_open_default"); { let db: TransactionDB = TransactionDB::open_default(&path).unwrap(); assert!(db.put(b"k1", b"v1111").is_ok()); let r: Result>, Error> = db.get(b"k1"); assert_eq!(r.unwrap().unwrap(), b"v1111"); assert!(db.delete(b"k1").is_ok()); assert!(db.get(b"k1").unwrap().is_none()); } } #[test] fn open_cf() { let path = DBPath::new("_rust_rocksdb_transaction_db_open_cf"); { let mut opts = Options::default(); opts.create_if_missing(true); opts.create_missing_column_families(true); let db: TransactionDB = TransactionDB::open_cf( &opts, &TransactionDBOptions::default(), &path, ["cf1", "cf2"], ) .unwrap(); let cf1 = db.cf_handle("cf1").unwrap(); let cf2 = db.cf_handle("cf2").unwrap(); db.put(b"k0", b"v0").unwrap(); db.put_cf(&cf1, b"k1", b"v1").unwrap(); db.put_cf(&cf2, b"k2", b"v2").unwrap(); assert_eq!(db.get(b"k0").unwrap().unwrap(), b"v0"); assert!(db.get(b"k1").unwrap().is_none()); assert!(db.get(b"k2").unwrap().is_none()); assert!(db.get_cf(&cf1, b"k0").unwrap().is_none()); assert_eq!(db.get_cf(&cf1, b"k1").unwrap().unwrap(), b"v1"); assert!(db.get_cf(&cf1, b"k2").unwrap().is_none()); assert!(db.get_cf(&cf2, b"k0").unwrap().is_none()); assert!(db.get_cf(&cf2, b"k1").unwrap().is_none()); assert_eq!(db.get_cf(&cf2, b"k2").unwrap().unwrap(), b"v2"); } } #[test] fn put_get() { let path = DBPath::new("_rust_rocksdb_transaction_db_put_get"); { let db: TransactionDB = TransactionDB::open_default(&path).unwrap(); assert!(db.put(b"k1", b"v1111").is_ok()); assert!(db.put(b"k2", b"v22222222").is_ok()); let v1 = db.get(b"k1").unwrap().unwrap(); let v2 = db.get(b"k2").unwrap().unwrap(); assert_eq!(v1.as_slice(), b"v1111"); assert_eq!(v2.as_slice(), b"v22222222"); } } #[test] fn multi_get() { let path = DBPath::new("_rust_rocksdb_multi_get"); { let db: TransactionDB = TransactionDB::open_default(&path).unwrap(); let initial_snap = db.snapshot(); db.put(b"k1", b"v1").unwrap(); let k1_snap = db.snapshot(); db.put(b"k2", b"v2").unwrap(); let _ = db.multi_get([b"k0"; 40]); let assert_values = |values: Vec<_>| { assert_eq!(3, values.len()); assert_eq!(values[0], None); assert_eq!(values[1], Some(b"v1".to_vec())); assert_eq!(values[2], Some(b"v2".to_vec())); }; let values = db .multi_get([b"k0", b"k1", b"k2"]) .into_iter() .map(Result::unwrap) .collect::>(); assert_values(values); let values = DBAccess::multi_get_opt(&db, [b"k0", b"k1", b"k2"], &Default::default()) .into_iter() .map(Result::unwrap) .collect::>(); assert_values(values); let values = db .snapshot() .multi_get([b"k0", b"k1", b"k2"]) .into_iter() .map(Result::unwrap) .collect::>(); assert_values(values); let none_values = initial_snap .multi_get([b"k0", b"k1", b"k2"]) .into_iter() .map(Result::unwrap) .collect::>(); assert_eq!(none_values, vec![None; 3]); let k1_only = k1_snap .multi_get([b"k0", b"k1", b"k2"]) .into_iter() .map(Result::unwrap) .collect::>(); assert_eq!(k1_only, vec![None, Some(b"v1".to_vec()), None]); let txn = db.transaction(); let values = txn .multi_get([b"k0", b"k1", b"k2"]) .into_iter() .map(Result::unwrap) .collect::>(); assert_values(values); } } #[test] fn multi_get_cf() { let path = DBPath::new("_rust_rocksdb_multi_get_cf"); { let mut opts = Options::default(); opts.create_if_missing(true); opts.create_missing_column_families(true); let db: TransactionDB = TransactionDB::open_cf( &opts, &TransactionDBOptions::default(), &path, ["cf0", "cf1", "cf2"], ) .unwrap(); let cf0 = db.cf_handle("cf0").unwrap(); let cf1 = db.cf_handle("cf1").unwrap(); db.put_cf(&cf1, b"k1", b"v1").unwrap(); let cf2 = db.cf_handle("cf2").unwrap(); db.put_cf(&cf2, b"k2", b"v2").unwrap(); let values = db .multi_get_cf(vec![(&cf0, b"k0"), (&cf1, b"k1"), (&cf2, b"k2")]) .into_iter() .map(Result::unwrap) .collect::>(); assert_eq!(3, values.len()); assert_eq!(values[0], None); assert_eq!(values[1], Some(b"v1".to_vec())); assert_eq!(values[2], Some(b"v2".to_vec())); let txn = db.transaction(); let values = txn .multi_get_cf(vec![(&cf0, b"k0"), (&cf1, b"k1"), (&cf2, b"k2")]) .into_iter() .map(Result::unwrap) .collect::>(); assert_eq!(3, values.len()); assert_eq!(values[0], None); assert_eq!(values[1], Some(b"v1".to_vec())); assert_eq!(values[2], Some(b"v2".to_vec())); } } #[test] fn destroy_on_open() { let path = DBPath::new("_rust_rocksdb_transaction_db_destroy_on_open"); let _db: TransactionDB = TransactionDB::open_default(&path).unwrap(); let opts = Options::default(); // The TransactionDB will still be open when we try to destroy it and the lock should fail. match DB::destroy(&opts, &path) { Err(s) => { let message = s.to_string(); assert_eq!(s.kind(), ErrorKind::IOError); assert!(message.contains("_rust_rocksdb_transaction_db_destroy_on_open")); assert!(message.contains("/LOCK:")); } Ok(_) => panic!("should fail"), } } #[test] fn writebatch() { let path = DBPath::new("_rust_rocksdb_transaction_db_writebatch"); { let db: TransactionDB = TransactionDB::open_default(&path).unwrap(); { // test put let mut batch = WriteBatchWithTransaction::::default(); assert!(db.get(b"k1").unwrap().is_none()); assert_eq!(batch.len(), 0); assert!(batch.is_empty()); batch.put(b"k1", b"v1111"); batch.put(b"k2", b"v2222"); batch.put(b"k3", b"v3333"); assert_eq!(batch.len(), 3); assert!(!batch.is_empty()); assert!(db.get(b"k1").unwrap().is_none()); let p = db.write(batch); assert!(p.is_ok()); let r: Result>, Error> = db.get(b"k1"); assert_eq!(r.unwrap().unwrap(), b"v1111"); } { // test delete let mut batch = WriteBatchWithTransaction::::default(); batch.delete(b"k1"); assert_eq!(batch.len(), 1); assert!(!batch.is_empty()); let p = db.write(batch); assert!(p.is_ok()); assert!(db.get(b"k1").unwrap().is_none()); } { // test size_in_bytes let mut batch = WriteBatchWithTransaction::::default(); let before = batch.size_in_bytes(); batch.put(b"k1", b"v1234567890"); let after = batch.size_in_bytes(); assert!(before + 10 <= after); } } } #[test] fn iterator_test() { let path = DBPath::new("_rust_rocksdb_transaction_db_iteratortest"); { let db: TransactionDB = TransactionDB::open_default(&path).unwrap(); let k1: Box<[u8]> = b"k1".to_vec().into_boxed_slice(); let k2: Box<[u8]> = b"k2".to_vec().into_boxed_slice(); let k3: Box<[u8]> = b"k3".to_vec().into_boxed_slice(); let k4: Box<[u8]> = b"k4".to_vec().into_boxed_slice(); let v1: Box<[u8]> = b"v1111".to_vec().into_boxed_slice(); let v2: Box<[u8]> = b"v2222".to_vec().into_boxed_slice(); let v3: Box<[u8]> = b"v3333".to_vec().into_boxed_slice(); let v4: Box<[u8]> = b"v4444".to_vec().into_boxed_slice(); db.put(&*k1, &*v1).unwrap(); db.put(&*k2, &*v2).unwrap(); db.put(&*k3, &*v3).unwrap(); let expected = vec![ (k1.clone(), v1.clone()), (k2.clone(), v2.clone()), (k3.clone(), v3.clone()), ]; let iter = db.iterator(IteratorMode::Start); assert_eq!(iter.map(Result::unwrap).collect::>(), expected); // Test that it's idempotent let iter = db.iterator(IteratorMode::Start); assert_eq!(iter.map(Result::unwrap).collect::>(), expected); let iter = db.iterator(IteratorMode::Start); assert_eq!(iter.map(Result::unwrap).collect::>(), expected); // Test in reverse let iter = db.iterator(IteratorMode::End); let mut tmp_vec = iter.map(Result::unwrap).collect::>(); tmp_vec.reverse(); let old_iter = db.iterator(IteratorMode::Start); db.put(&*k4, &*v4).unwrap(); let expected2 = vec![ (k1, v1), (k2, v2), (k3.clone(), v3.clone()), (k4.clone(), v4.clone()), ]; assert_eq!(old_iter.map(Result::unwrap).collect::>(), expected); let iter = db.iterator(IteratorMode::Start); assert_eq!(iter.map(Result::unwrap).collect::>(), expected2); let iter = db.iterator(IteratorMode::From(b"k3", Direction::Forward)); assert_eq!( iter.map(Result::unwrap).collect::>(), vec![(k3, v3), (k4, v4)] ); } } #[test] fn snapshot_test() { let path = DBPath::new("_rust_rocksdb_transaction_db_snapshottest"); { let db: TransactionDB = TransactionDB::open_default(&path).unwrap(); assert!(db.put(b"k1", b"v1111").is_ok()); let snap = db.snapshot(); assert_eq!(snap.get(b"k1").unwrap().unwrap(), b"v1111"); assert!(db.put(b"k2", b"v2222").is_ok()); assert!(db.get(b"k2").unwrap().is_some()); assert!(snap.get(b"k2").unwrap().is_none()); } } #[test] fn prefix_extract_and_iterate_test() { let path = DBPath::new("_rust_rocksdb_transaction_db_prefix_extract_and_iterate"); { let mut opts = Options::default(); opts.create_if_missing(true); opts.create_missing_column_families(true); opts.set_prefix_extractor(SliceTransform::create_fixed_prefix(2)); let txn_db_opts = TransactionDBOptions::default(); let db: TransactionDB = TransactionDB::open(&opts, &txn_db_opts, &path).unwrap(); db.put(b"p1_k1", b"v1").unwrap(); db.put(b"p2_k2", b"v2").unwrap(); db.put(b"p1_k3", b"v3").unwrap(); db.put(b"p1_k4", b"v4").unwrap(); db.put(b"p2_k5", b"v5").unwrap(); let mut readopts = ReadOptions::default(); readopts.set_prefix_same_as_start(true); readopts.set_iterate_lower_bound(b"p1".to_vec()); readopts.set_pin_data(true); let iter = db.iterator_opt(IteratorMode::Start, readopts); let expected: Vec<_> = vec![(b"p1_k1", b"v1"), (b"p1_k3", b"v3"), (b"p1_k4", b"v4")] .into_iter() .map(|(k, v)| (k.to_vec().into_boxed_slice(), v.to_vec().into_boxed_slice())) .collect(); assert_eq!(expected, iter.map(Result::unwrap).collect::>()); } } #[test] fn cuckoo() { let path = DBPath::new("_rust_rocksdb_transaction_db_cuckoo"); { let mut opts = Options::default(); let txn_db_opts = TransactionDBOptions::default(); let mut factory_opts = CuckooTableOptions::default(); factory_opts.set_hash_ratio(0.8); factory_opts.set_max_search_depth(20); factory_opts.set_cuckoo_block_size(10); factory_opts.set_identity_as_first_hash(true); factory_opts.set_use_module_hash(false); opts.set_cuckoo_table_factory(&factory_opts); opts.create_if_missing(true); let db: TransactionDB = TransactionDB::open(&opts, &txn_db_opts, &path).unwrap(); db.put(b"k1", b"v1").unwrap(); db.put(b"k2", b"v2").unwrap(); let r: Result>, Error> = db.get(b"k1"); assert_eq!(r.unwrap().unwrap(), b"v1"); let r: Result>, Error> = db.get(b"k2"); assert_eq!(r.unwrap().unwrap(), b"v2"); assert!(db.delete(b"k1").is_ok()); assert!(db.get(b"k1").unwrap().is_none()); } } #[test] fn transaction() { let path = DBPath::new("_rust_rocksdb_transaction_db_transaction"); { let mut opts = Options::default(); opts.create_if_missing(true); let mut txn_db_opts = TransactionDBOptions::default(); txn_db_opts.set_txn_lock_timeout(10); let db: TransactionDB = TransactionDB::open(&opts, &txn_db_opts, &path).unwrap(); // put outside of transaction db.put(b"k1", b"v1").unwrap(); assert_eq!(db.get(b"k1").unwrap().unwrap(), b"v1"); let txn1 = db.transaction(); txn1.put(b"k1", b"v2").unwrap(); // get outside of transaction assert_eq!(db.get(b"k1").unwrap().unwrap().as_slice(), b"v1"); // modify same key in another transaction, should get TimedOut let txn2 = db.transaction(); let err = txn2.put(b"k1", b"v3").unwrap_err(); assert_eq!(err.kind(), ErrorKind::TimedOut); // modify same key directly, should also get TimedOut let err = db.put(b"k1", b"v4").unwrap_err(); assert_eq!(err.kind(), ErrorKind::TimedOut); txn1.commit().unwrap(); assert_eq!(db.get(b"k1").unwrap().unwrap().as_slice(), b"v2"); } } #[test] fn transaction_iterator() { let path = DBPath::new("_rust_rocksdb_transaction_db_transaction_iterator"); { let db: TransactionDB = TransactionDB::open_default(&path).unwrap(); let k1: Box<[u8]> = b"k1".to_vec().into_boxed_slice(); let k2: Box<[u8]> = b"k2".to_vec().into_boxed_slice(); let k3: Box<[u8]> = b"k3".to_vec().into_boxed_slice(); let k4: Box<[u8]> = b"k4".to_vec().into_boxed_slice(); let v1: Box<[u8]> = b"v1111".to_vec().into_boxed_slice(); let v2: Box<[u8]> = b"v2222".to_vec().into_boxed_slice(); let v3: Box<[u8]> = b"v3333".to_vec().into_boxed_slice(); let v4: Box<[u8]> = b"v4444".to_vec().into_boxed_slice(); db.put(&*k1, &*v1).unwrap(); db.put(&*k2, &*v2).unwrap(); db.put(&*k3, &*v3).unwrap(); let expected = vec![ (k1.clone(), v1.clone()), (k2.clone(), v2.clone()), (k3.clone(), v3.clone()), ]; let txn = db.transaction(); let iter = txn.iterator(IteratorMode::Start); assert_eq!(iter.map(Result::unwrap).collect::>(), expected); // Test that it's idempotent let iter = txn.iterator(IteratorMode::Start); assert_eq!(iter.map(Result::unwrap).collect::>(), expected); let iter = txn.iterator(IteratorMode::Start); assert_eq!(iter.map(Result::unwrap).collect::>(), expected); // Test in reverse let iter = txn.iterator(IteratorMode::End); let mut tmp_vec = iter.map(Result::unwrap).collect::>(); tmp_vec.reverse(); let old_iter = txn.iterator(IteratorMode::Start); txn.put(&*k4, &*v4).unwrap(); let expected2 = vec![ (k1, v1), (k2, v2), (k3.clone(), v3.clone()), (k4.clone(), v4.clone()), ]; assert_eq!(old_iter.map(Result::unwrap).collect::>(), expected); let iter = txn.iterator(IteratorMode::Start); assert_eq!(iter.map(Result::unwrap).collect::>(), expected2); let iter = txn.iterator(IteratorMode::From(b"k3", Direction::Forward)); assert_eq!( iter.map(Result::unwrap).collect::>(), vec![(k3, v3), (k4, v4)] ); } } #[test] fn transaction_rollback() { let path = DBPath::new("_rust_rocksdb_transaction_db_transaction_rollback"); { let db: TransactionDB = TransactionDB::open_default(&path).unwrap(); let txn = db.transaction(); txn.rollback().unwrap(); txn.put(b"k1", b"v1").unwrap(); txn.set_savepoint(); txn.put(b"k2", b"v2").unwrap(); assert_eq!(txn.get(b"k1").unwrap().unwrap(), b"v1"); assert_eq!(txn.get(b"k2").unwrap().unwrap(), b"v2"); txn.rollback_to_savepoint().unwrap(); assert_eq!(txn.get(b"k1").unwrap().unwrap(), b"v1"); assert!(txn.get(b"k2").unwrap().is_none()); txn.rollback().unwrap(); assert!(txn.get(b"k1").unwrap().is_none()); txn.commit().unwrap(); assert!(db.get(b"k2").unwrap().is_none()); } } #[test] fn transaction_cf() { let path = DBPath::new("_rust_rocksdb_transaction_db_transaction_cf"); { let mut opts = Options::default(); opts.create_if_missing(true); opts.create_missing_column_families(true); let db: TransactionDB = TransactionDB::open_cf( &opts, &TransactionDBOptions::default(), &path, ["cf1", "cf2"], ) .unwrap(); let cf1 = db.cf_handle("cf1").unwrap(); let cf2 = db.cf_handle("cf2").unwrap(); let txn = db.transaction(); txn.put(b"k0", b"v0").unwrap(); txn.put_cf(&cf1, b"k1", b"v1").unwrap(); txn.put_cf(&cf2, b"k2", b"v2").unwrap(); assert_eq!(txn.get(b"k0").unwrap().unwrap(), b"v0"); assert!(txn.get(b"k1").unwrap().is_none()); assert!(txn.get(b"k2").unwrap().is_none()); assert!(txn.get_cf(&cf1, b"k0").unwrap().is_none()); assert_eq!(txn.get_cf(&cf1, b"k1").unwrap().unwrap(), b"v1"); assert!(txn.get_cf(&cf1, b"k2").unwrap().is_none()); assert!(txn.get_cf(&cf2, b"k0").unwrap().is_none()); assert!(txn.get_cf(&cf2, b"k1").unwrap().is_none()); assert_eq!(txn.get_cf(&cf2, b"k2").unwrap().unwrap(), b"v2"); txn.commit().unwrap(); } } #[test] fn transaction_snapshot() { let path = DBPath::new("_rust_rocksdb_transaction_db_transaction_snapshot"); { let db: TransactionDB = TransactionDB::open_default(&path).unwrap(); let txn = db.transaction(); let snapshot = txn.snapshot(); assert!(snapshot.get(b"k1").unwrap().is_none()); db.put(b"k1", b"v1").unwrap(); assert_eq!(snapshot.get(b"k1").unwrap().unwrap(), b"v1"); let mut opts = TransactionOptions::default(); opts.set_snapshot(true); let txn = db.transaction_opt(&WriteOptions::default(), &opts); db.put(b"k2", b"v2").unwrap(); let snapshot = txn.snapshot(); assert!(snapshot.get(b"k2").unwrap().is_none()); assert_eq!(txn.get(b"k2").unwrap().unwrap(), b"v2"); assert_eq!( txn.get_for_update(b"k2", true).unwrap_err().kind(), ErrorKind::Busy ); } } #[test] fn two_phase_commit() { let path = DBPath::new("_rust_rocksdb_transaction_db_2pc"); { let db: TransactionDB = TransactionDB::open_default(&path).unwrap(); let txn = db.transaction(); txn.put(b"k1", b"v1").unwrap(); txn.set_name(b"txn1").unwrap(); txn.prepare().unwrap(); txn.commit().unwrap(); let txn = db.transaction(); txn.put(b"k2", b"v2").unwrap(); let err = txn.prepare().unwrap_err(); assert_eq!(err.kind(), ErrorKind::InvalidArgument); let mut opt = TransactionOptions::new(); opt.set_skip_prepare(false); let txn = db.transaction_opt(&WriteOptions::default(), &opt); txn.put(b"k3", b"v3").unwrap(); let err = txn.prepare().unwrap_err(); assert_eq!(err.kind(), ErrorKind::InvalidArgument); } DB::destroy(&Options::default(), &path).unwrap(); { let db: TransactionDB = TransactionDB::open_default(&path).unwrap(); let txn = db.transaction(); txn.put(b"k1", b"v1").unwrap(); txn.set_name(b"t1").unwrap(); txn.prepare().unwrap(); let txn2 = db.transaction(); txn2.put(b"k2", b"v1").unwrap(); txn2.set_name(b"t2").unwrap(); txn2.prepare().unwrap(); let txn3 = db.transaction(); let err = txn3.set_name(b"t1").unwrap_err(); assert_eq!(err.kind(), ErrorKind::InvalidArgument); // k1 and k2 should locked after we restore prepared transactions. let err = db.put(b"k1", b"v2").unwrap_err(); assert_eq!(err.kind(), ErrorKind::TimedOut); } { // recovery let mut opt = TransactionDBOptions::new(); opt.set_default_lock_timeout(1); let db: TransactionDB = TransactionDB::open_default(&path).unwrap(); // get prepared transactions let txns = db.prepared_transactions(); assert_eq!(txns.len(), 2); for txn in txns.into_iter() { let name = txn.get_name().unwrap(); if name == b"t1" { txn.commit().unwrap(); } else if name == b"t2" { txn.rollback().unwrap(); } else { unreachable!(); } } assert_eq!(db.get(b"k1").unwrap().unwrap(), b"v1"); assert!(db.get(b"k2").unwrap().is_none()); } } #[test] fn test_snapshot_outlive_transaction_db() { let t = trybuild::TestCases::new(); t.compile_fail("tests/fail/snapshot_outlive_transaction_db.rs"); } #[test] fn test_txn_outlive_transaction_db() { let t = trybuild::TestCases::new(); t.compile_fail("tests/fail/transaction_outlive_transaction_db.rs"); } #[test] fn test_snapshot_outlive_txn() { let t = trybuild::TestCases::new(); t.compile_fail("tests/fail/snapshot_outlive_transaction.rs"); } rocksdb-0.23.0/tests/test_transaction_db_memory_usage.rs000064400000000000000000000050051046102023000216520ustar 00000000000000// Copyright 2020 Tyler Neely // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. mod util; use pretty_assertions::assert_eq; use rocksdb::{perf, Options, TransactionDB, TransactionDBOptions}; use util::DBPath; #[cfg(not(feature = "multi-threaded-cf"))] type DefaultThreadMode = rocksdb::SingleThreaded; #[cfg(feature = "multi-threaded-cf")] type DefaultThreadMode = rocksdb::MultiThreaded; #[test] fn test_transaction_db_memory_usage() { let path = DBPath::new("_rust_rocksdb_transaction_db_memory_usage_test"); { let mut options = Options::default(); options.create_if_missing(true); options.enable_statistics(); // setup cache: let cache = rocksdb::Cache::new_lru_cache(1 << 20); // 1 MB cache let mut block_based_options = rocksdb::BlockBasedOptions::default(); block_based_options.set_block_cache(&cache); options.set_block_based_table_factory(&block_based_options); let tx_db_options = TransactionDBOptions::default(); let db: TransactionDB = TransactionDB::open(&options, &tx_db_options, &path).unwrap(); let mut builder = perf::MemoryUsageBuilder::new().unwrap(); builder.add_tx_db(&db); builder.add_cache(&cache); let memory_usage = builder.build().unwrap(); for i in 1..=1000 { let key = format!("key{}", i); let value = format!("value{}", i); db.put(&key, &value).unwrap(); } for i in 1..=1000 { let key = format!("key{}", i); let result = db.get(&key).unwrap().unwrap(); let result_str = String::from_utf8(result).unwrap(); assert_eq!(result_str, format!("value{}", i)); } assert_ne!(memory_usage.approximate_mem_table_total(), 0); assert_eq!(memory_usage.approximate_mem_table_readers_total(), 0); // Equals zero! assert_ne!(memory_usage.approximate_cache_total(), 0); assert_ne!(memory_usage.approximate_mem_table_unflushed(), 0); } } rocksdb-0.23.0/tests/test_transaction_db_property.rs000064400000000000000000000042131046102023000210420ustar 00000000000000// Copyright 2020 Tyler Neely // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. mod util; use pretty_assertions::assert_eq; use rocksdb::{properties, Options, TransactionDB, TransactionDBOptions}; use util::DBPath; #[test] fn transaction_db_property_test() { let path = DBPath::new("_rust_rocksdb_transaction_db_property_test"); { let mut options = Options::default(); options.create_if_missing(true); options.enable_statistics(); let tx_db_options = TransactionDBOptions::default(); let db = TransactionDB::open(&options, &tx_db_options, &path).unwrap(); db.put("key1", "value1").unwrap(); db.put("key2", "value2").unwrap(); db.put("key3", "value3").unwrap(); let prop_name: &std::ffi::CStr = properties::STATS; let value = db.property_value(prop_name).unwrap().unwrap(); assert!(value.contains("Compaction Stats")); assert!(value.contains("Cumulative writes: 3 writes")); } } #[test] fn transaction_db_int_property_test() { let path = DBPath::new("_rust_rocksdb_transaction_db_int_property_test"); { let mut options = Options::default(); options.create_if_missing(true); options.enable_statistics(); let tx_db_options = TransactionDBOptions::default(); let db = TransactionDB::open(&options, &tx_db_options, &path).unwrap(); db.put("key1", "value1").unwrap(); db.put("key2", "value2").unwrap(); let prop_name: properties::PropertyName = properties::ESTIMATE_NUM_KEYS.to_owned(); let value = db.property_int_value(&prop_name).unwrap().unwrap(); assert_eq!(value, 2); } } rocksdb-0.23.0/tests/test_write_batch.rs000064400000000000000000000036461046102023000164100ustar 00000000000000// Copyright 2020 Tyler Neely // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. use std::collections::HashMap; use pretty_assertions::assert_eq; use rocksdb::{WriteBatch, WriteBatchIterator}; #[test] fn test_write_batch_clear() { let mut batch = WriteBatch::default(); batch.put(b"1", b"2"); assert_eq!(batch.len(), 1); batch.clear(); assert_eq!(batch.len(), 0); assert!(batch.is_empty()); } #[test] fn test_write_batch_with_serialized_data() { struct Iterator { data: HashMap, Vec>, } impl WriteBatchIterator for Iterator { fn put(&mut self, key: Box<[u8]>, value: Box<[u8]>) { match self.data.remove(key.as_ref()) { Some(expect) => { assert_eq!(value.as_ref(), expect.as_slice()); } None => { panic!("key not exists"); } } } fn delete(&mut self, _: Box<[u8]>) { panic!("invalid delete operation"); } } let mut kvs: HashMap, Vec> = HashMap::default(); kvs.insert(vec![1], vec![2]); kvs.insert(vec![2], vec![3]); kvs.insert(vec![1, 2, 3, 4, 5], vec![4]); let mut b1 = WriteBatch::default(); for (k, v) in &kvs { b1.put(k, v); } let data = b1.data(); let b2 = WriteBatch::from_data(data); let mut it = Iterator { data: kvs }; b2.iterate(&mut it); } rocksdb-0.23.0/tests/util/mod.rs000064400000000000000000000117151046102023000146060ustar 00000000000000#![allow(dead_code)] use std::{ cmp::Ordering, convert::TryInto, path::{Path, PathBuf}, }; use rocksdb::{Error, Options, DB}; /// Temporary database path which calls DB::Destroy when DBPath is dropped. pub struct DBPath { dir: tempfile::TempDir, // kept for cleaning up during drop path: PathBuf, } impl DBPath { /// Produces a fresh (non-existent) temporary path which will be DB::destroy'ed automatically. pub fn new(prefix: &str) -> DBPath { let dir = tempfile::Builder::new() .prefix(prefix) .tempdir() .expect("Failed to create temporary path for db."); let path = dir.path().join("db"); DBPath { dir, path } } } impl Drop for DBPath { fn drop(&mut self) { let opts = Options::default(); DB::destroy(&opts, &self.path).expect("Failed to destroy temporary DB"); } } /// Convert a DBPath ref to a Path ref. /// We don't implement this for DBPath values because we want them to /// exist until the end of their scope, not get passed into functions and /// dropped early. impl AsRef for &DBPath { fn as_ref(&self) -> &Path { &self.path } } type Pair = (Box<[u8]>, Box<[u8]>); pub fn pair(left: &[u8], right: &[u8]) -> Pair { (Box::from(left), Box::from(right)) } #[track_caller] pub fn assert_iter(iter: impl Iterator>, want: &[Pair]) { let got = iter.collect::, _>>().unwrap(); assert_eq!(got.as_slice(), want); } #[track_caller] pub fn assert_iter_reversed(iter: impl Iterator>, want: &[Pair]) { let mut got = iter.collect::, _>>().unwrap(); got.reverse(); assert_eq!(got.as_slice(), want); } /// A timestamp type we use in testing [user-defined timestamp](https://github.com/facebook/rocksdb/wiki/User-defined-Timestamp). /// This is a `u64` in little endian encoding. #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub struct U64Timestamp([u8; Self::SIZE]); impl U64Timestamp { pub const SIZE: usize = 8; pub fn new(ts: u64) -> Self { Self(ts.to_le_bytes()) } } impl From<&[u8]> for U64Timestamp { fn from(slice: &[u8]) -> Self { assert_eq!( slice.len(), Self::SIZE, "incorrect timestamp length: {}, should be {}", slice.len(), Self::SIZE ); Self(slice.try_into().unwrap()) } } impl From for Vec { fn from(ts: U64Timestamp) -> Self { ts.0.to_vec() } } impl AsRef<[u8]> for U64Timestamp { fn as_ref(&self) -> &[u8] { &self.0 } } impl PartialOrd for U64Timestamp { fn partial_cmp(&self, other: &Self) -> Option { Some(self.cmp(other)) } } impl Ord for U64Timestamp { fn cmp(&self, other: &Self) -> Ordering { let lhs = u64::from_le_bytes(self.0); let rhs = u64::from_le_bytes(other.0); lhs.cmp(&rhs) } } /// A comparator for use in column families with [user-defined timestamp](https://github.com/facebook/rocksdb/wiki/User-defined-Timestamp) /// enabled. This comparator assumes `u64` timestamp in little endian encoding. /// This is the same behavior as RocksDB's built-in comparator. /// /// Adapted from C++ and Golang implementations from: /// - [rocksdb](https://github.com/facebook/rocksdb/blob/v9.4.0/test_util/testutil.cc#L112) /// - [gorocksdb](https://github.com/linxGnu/grocksdb/blob/v1.9.2/db_ts_test.go#L167) /// - [SeiDB](https://github.com/sei-protocol/sei-db/blob/v0.0.41/ss/rocksdb/comparator.go) pub struct U64Comparator; impl U64Comparator { pub const NAME: &'static str = "rust-rocksdb.U64Comparator"; pub fn compare(a: &[u8], b: &[u8]) -> Ordering { // First, compare the keys without timestamps. If the keys are different, // then we don't have to consider the timestamps at all. let ord = Self::compare_without_ts(a, true, b, true); if ord != Ordering::Equal { return ord; } // The keys are the same, so now we compare the timestamps. // The larger (i.e. newer) key should come first, hence the `reverse`. Self::compare_ts( extract_timestamp_from_user_key(a), extract_timestamp_from_user_key(b), ) .reverse() } pub fn compare_ts(bz1: &[u8], bz2: &[u8]) -> Ordering { let ts1 = U64Timestamp::from(bz1); let ts2 = U64Timestamp::from(bz2); ts1.cmp(&ts2) } pub fn compare_without_ts( mut a: &[u8], a_has_ts: bool, mut b: &[u8], b_has_ts: bool, ) -> Ordering { if a_has_ts { a = strip_timestamp_from_user_key(a); } if b_has_ts { b = strip_timestamp_from_user_key(b); } a.cmp(b) } } fn extract_timestamp_from_user_key(key: &[u8]) -> &[u8] { &key[(key.len() - U64Timestamp::SIZE)..] } fn strip_timestamp_from_user_key(key: &[u8]) -> &[u8] { &key[..(key.len() - U64Timestamp::SIZE)] }