patricia_tree-0.9.0/.cargo_vcs_info.json0000644000000001360000000000100136550ustar { "git": { "sha1": "1ed258dcc5244bbc559ef24cfe4c70bb3a78597d" }, "path_in_vcs": "" }patricia_tree-0.9.0/.github/workflows/ci.yml000064400000000000000000000017211046102023000171610ustar 00000000000000name: CI on: [push] jobs: check: name: Check runs-on: ubuntu-latest strategy: matrix: toolchain: [stable, beta, nightly] steps: - name: Checkout sources uses: actions/checkout@v4 - run: rustup update ${{ matrix.toolchain }} - run: cargo check --all-features --all test: name: Test Suite runs-on: ubuntu-latest strategy: matrix: toolchain: [stable, beta, nightly] steps: - name: Checkout sources uses: actions/checkout@v4 - run: rustup update ${{ matrix.toolchain }} - run: cargo test --all-features --all lints: name: Lints runs-on: ubuntu-latest strategy: matrix: toolchain: [stable, beta, nightly] steps: - name: Checkout sources uses: actions/checkout@v4 - run: rustup update ${{ matrix.toolchain }} - run: cargo fmt --all -- --check - run: cargo clippy --all-features --all -- -D warnings patricia_tree-0.9.0/.gitignore000064400000000000000000000000371046102023000144350ustar 00000000000000/target/ **/*.rs.bk Cargo.lock patricia_tree-0.9.0/Cargo.lock0000644000000244560000000000100116430ustar # This file is automatically @generated by Cargo. # It is not intended for manual editing. version = 4 [[package]] name = "anstream" version = "0.6.18" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8acc5369981196006228e28809f761875c0327210a891e941f4c683b3a99529b" dependencies = [ "anstyle", "anstyle-parse", "anstyle-query", "anstyle-wincon", "colorchoice", "is_terminal_polyfill", "utf8parse", ] [[package]] name = "anstyle" version = "1.0.10" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "55cc3b69f167a1ef2e161439aa98aed94e6028e5f9a59be9a6ffb47aef1651f9" [[package]] name = "anstyle-parse" version = "0.2.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3b2d16507662817a6a20a9ea92df6652ee4f94f914589377d69f3b21bc5798a9" dependencies = [ "utf8parse", ] [[package]] name = "anstyle-query" version = "1.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "79947af37f4177cfead1110013d678905c37501914fba0efea834c3fe9a8d60c" dependencies = [ "windows-sys", ] [[package]] name = "anstyle-wincon" version = "3.0.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2109dbce0e72be3ec00bed26e6a7479ca384ad226efdd66db8fa2e3a38c83125" dependencies = [ "anstyle", "windows-sys", ] [[package]] name = "bitflags" version = "2.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1be3f42a67d6d345ecd59f675f3f012d6974981560836e938c22b424b85ce1be" [[package]] name = "byteorder" version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" [[package]] name = "cfg-if" version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" [[package]] name = "clap" version = "4.5.26" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a8eb5e908ef3a6efbe1ed62520fb7287959888c88485abe072543190ecc66783" dependencies = [ "clap_builder", "clap_derive", ] [[package]] name = "clap_builder" version = "4.5.26" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "96b01801b5fc6a0a232407abc821660c9c6d25a1cafc0d4f85f29fb8d9afc121" dependencies = [ "anstream", "anstyle", "clap_lex", "strsim", ] [[package]] name = "clap_derive" version = "4.5.24" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "54b755194d6389280185988721fffba69495eed5ee9feeee9a599b53db80318c" dependencies = [ "heck", "proc-macro2", "quote", "syn", ] [[package]] name = "clap_lex" version = "0.7.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f46ad14479a25103f283c0f10005961cf086d8dc42205bb44c46ac563475dca6" [[package]] name = "colorchoice" version = "1.0.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5b63caa9aa9397e2d9480a9b13673856c78d8ac123288526c37d7839f2a86990" [[package]] name = "getrandom" version = "0.2.15" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c4567c8db10ae91089c99af84c68c38da3ec2f087c3f82960bcdbf3656b6f4d7" dependencies = [ "cfg-if", "libc", "wasi", ] [[package]] name = "heck" version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" [[package]] name = "is_terminal_polyfill" version = "1.70.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7943c866cc5cd64cbc25b2e01621d07fa8eb2a1a23160ee81ce38704e97b8ecf" [[package]] name = "itoa" version = "1.0.14" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d75a2a4b1b190afb6f5425f10f6a8f959d2ea0b9c2b1d79553551850539e4674" [[package]] name = "libc" version = "0.2.169" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b5aba8db14291edd000dfcc4d620c7ebfb122c613afb886ca8803fa4e128a20a" [[package]] name = "memchr" version = "2.7.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3" [[package]] name = "patricia_tree" version = "0.9.0" dependencies = [ "bitflags", "clap", "rand", "serde", "serde_json", ] [[package]] name = "ppv-lite86" version = "0.2.20" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "77957b295656769bb8ad2b6a6b09d897d94f05c41b069aede1fcdaa675eaea04" dependencies = [ "zerocopy", ] [[package]] name = "proc-macro2" version = "1.0.93" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "60946a68e5f9d28b0dc1c21bb8a97ee7d018a8b322fa57838ba31cc878e22d99" dependencies = [ "unicode-ident", ] [[package]] name = "quote" version = "1.0.38" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0e4dccaaaf89514f546c693ddc140f729f958c247918a13380cccc6078391acc" dependencies = [ "proc-macro2", ] [[package]] name = "rand" version = "0.8.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" dependencies = [ "libc", "rand_chacha", "rand_core", ] [[package]] name = "rand_chacha" version = "0.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88" dependencies = [ "ppv-lite86", "rand_core", ] [[package]] name = "rand_core" version = "0.6.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" dependencies = [ "getrandom", ] [[package]] name = "ryu" version = "1.0.18" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f3cb5ba0dc43242ce17de99c180e96db90b235b8a9fdc9543c96d2209116bd9f" [[package]] name = "serde" version = "1.0.217" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "02fc4265df13d6fa1d00ecff087228cc0a2b5f3c0e87e258d8b94a156e984c70" dependencies = [ "serde_derive", ] [[package]] name = "serde_derive" version = "1.0.217" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5a9bf7cf98d04a2b28aead066b7496853d4779c9cc183c440dbac457641e19a0" dependencies = [ "proc-macro2", "quote", "syn", ] [[package]] name = "serde_json" version = "1.0.135" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2b0d7ba2887406110130a978386c4e1befb98c674b4fba677954e4db976630d9" dependencies = [ "itoa", "memchr", "ryu", "serde", ] [[package]] name = "strsim" version = "0.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f" [[package]] name = "syn" version = "2.0.96" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d5d0adab1ae378d7f53bdebc67a39f1f151407ef230f0ce2883572f5d8985c80" dependencies = [ "proc-macro2", "quote", "unicode-ident", ] [[package]] name = "unicode-ident" version = "1.0.14" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "adb9e6ca4f869e1180728b7950e35922a7fc6397f7b641499e8f3ef06e50dc83" [[package]] name = "utf8parse" version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" [[package]] name = "wasi" version = "0.11.0+wasi-snapshot-preview1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" [[package]] name = "windows-sys" version = "0.59.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b" dependencies = [ "windows-targets", ] [[package]] name = "windows-targets" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973" dependencies = [ "windows_aarch64_gnullvm", "windows_aarch64_msvc", "windows_i686_gnu", "windows_i686_gnullvm", "windows_i686_msvc", "windows_x86_64_gnu", "windows_x86_64_gnullvm", "windows_x86_64_msvc", ] [[package]] name = "windows_aarch64_gnullvm" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3" [[package]] name = "windows_aarch64_msvc" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469" [[package]] name = "windows_i686_gnu" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b" [[package]] name = "windows_i686_gnullvm" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66" [[package]] name = "windows_i686_msvc" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66" [[package]] name = "windows_x86_64_gnu" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78" [[package]] name = "windows_x86_64_gnullvm" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d" [[package]] name = "windows_x86_64_msvc" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" [[package]] name = "zerocopy" version = "0.7.35" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1b9b4fd18abc82b8136838da5d50bae7bdea537c574d8dc1a34ed098d6c166f0" dependencies = [ "byteorder", "zerocopy-derive", ] [[package]] name = "zerocopy-derive" version = "0.7.35" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fa4f8080344d4671fb4e831a13ad1e68092748387dfc4f55e356242fae12ce3e" dependencies = [ "proc-macro2", "quote", "syn", ] patricia_tree-0.9.0/Cargo.toml0000644000000026770000000000100116670ustar # THIS FILE IS AUTOMATICALLY GENERATED BY CARGO # # When uploading crates to the registry Cargo will automatically # "normalize" Cargo.toml files for maximal compatibility # with all versions of Cargo and also rewrite `path` dependencies # to registry (e.g., crates.io) dependencies. # # If you are reading this file be aware that the original Cargo.toml # will likely look very different (and much more reasonable). # See Cargo.toml.orig for the original contents. [package] edition = "2021" rust-version = "1.82.0" name = "patricia_tree" version = "0.9.0" authors = ["Takeru Ohta "] build = false autolib = false autobins = false autoexamples = false autotests = false autobenches = false description = "Memory-efficient data structures based on patricia tree" homepage = "https://github.com/sile/patricia_tree" readme = "README.md" keywords = [ "patricia", "radix", "trie", ] categories = ["data-structures"] license = "MIT" repository = "https://github.com/sile/patricia_tree" [package.metadata.docs.rs] all-features = true [lib] name = "patricia_tree" path = "src/lib.rs" [[example]] name = "insert_lines" path = "examples/insert_lines.rs" [[bench]] name = "bench" path = "benches/bench.rs" [dependencies.bitflags] version = "2" [dependencies.serde] version = "1" optional = true [dev-dependencies.clap] version = "4" features = ["derive"] [dev-dependencies.rand] version = "0.8" [dev-dependencies.serde_json] version = "1" patricia_tree-0.9.0/Cargo.toml.orig000064400000000000000000000013311046102023000153320ustar 00000000000000[package] name = "patricia_tree" version = "0.9.0" authors = ["Takeru Ohta "] description = "Memory-efficient data structures based on patricia tree" homepage = "https://github.com/sile/patricia_tree" repository = "https://github.com/sile/patricia_tree" readme = "README.md" keywords = ["patricia", "radix", "trie"] categories = ["data-structures"] license = "MIT" edition = "2021" rust-version = "1.82.0" [dependencies] bitflags = "2" serde = { version = "1", optional = true } [dev-dependencies] clap = { version = "4", features = ["derive"] } rand = "0.8" serde_json = { version = "1" } [package.metadata.docs.rs] all-features = true [[example]] name = "insert_lines" path = "examples/insert_lines.rs" patricia_tree-0.9.0/LICENSE000064400000000000000000000021051046102023000134500ustar 00000000000000The MIT License Copyright (c) 2017 Takeru Ohta Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. patricia_tree-0.9.0/README.md000064400000000000000000000062111046102023000137240ustar 00000000000000patricia_tree ============= [![patricia_tree](https://img.shields.io/crates/v/patricia_tree.svg)](https://crates.io/crates/patricia_tree) [![Documentation](https://docs.rs/patricia_tree/badge.svg)](https://docs.rs/patricia_tree) [![Actions Status](https://github.com/sile/patricia_tree/workflows/CI/badge.svg)](https://github.com/sile/patricia_tree/actions) [![License: MIT](https://img.shields.io/badge/license-MIT-blue.svg)](LICENSE) Memory-efficient data structures based on patricia tree (a.k.a, radix tree). [Documentation](https://docs.rs/patricia_tree) A common prefixes of the keys in a patricia tree are represented by a shared path. So if the prefixes of the key set is highly redundant, the memory usage of the resulting patricia tree will be drastically less than more generic data structures (e.g., `BTreeMap`). See [Radix tree](https://en.wikipedia.org/wiki/Radix_tree) for more details. Examples --------- ```rust use patricia_tree::PatriciaMap; let mut map = PatriciaMap::new(); map.insert("foo", 1); map.insert("bar", 2); map.insert("baz", 3); assert_eq!(map.len(), 3); assert_eq!(map.get("foo"), Some(&1)); assert_eq!(map.get("bar"), Some(&2)); assert_eq!(map.get("baz"), Some(&3)); ``` Benchmarks ----------- ```console $ cargo run --example insert_lines --release -- --version 2> /dev/null insert_lines 0.1.0 /// /// INPUT: Wikipedia /// $ curl -s https://dumps.wikimedia.org/enwiki/latest/enwiki-latest-all-titles-in-ns0.gz | gzip -d > enwiki-latest-all-titles-in-ns0 $ du -hs enwiki-latest-all-titles-in-ns0 271M enwiki-latest-all-titles-in-ns0 // HashSet $ /usr/bin/time -f "# ELAPSED: %E\n# MEMORY: %M" cargo run --example insert_lines --release -- --kind hash < enwiki-latest-all-titles-in-ns0 # LINES: 13450823 # ELAPSED: 0:10.23 # MEMORY: 1001548 // 978 MB // BTreeSet $ /usr/bin/time -f "# ELAPSED: %E\n# MEMORY: %M" cargo run --example insert_lines --release -- --kind btree < enwiki-latest-all-titles-in-ns0 # LINES: 13450823 # ELAPSED: 0:10.90 # MEMORY: 1112068 // 1,086 MB // PatriciaSet $ /usr/bin/time -f "# ELAPSED: %E\n# MEMORY: %M" cargo run --example insert_lines --release -- --kind patricia < enwiki-latest-all-titles-in-ns0 # LINES: 13450823 # ELAPSED: 1:12.55 # MEMORY: 434340 // 424 MB /// /// INPUT: Google 5-gram /// $ curl -s http://storage.googleapis.com/books/ngrams/books/googlebooks-eng-all-5gram-20120701-0.gz | gzip -d > googlebooks-eng-all-5gram-20120701-0 $ du -hs googlebooks-eng-all-5gram-20120701-0 331M googlebooks-eng-all-5gram-20120701-0 // HashSet $ /usr/bin/time -f "# ELAPSED: %E\n# MEMORY: %M" cargo run --example insert_lines --release -- --kind hash < googlebooks-eng-all-5gram-20120701-0 # LINES: 9814743 # ELAPSED: 0:08.36 # MEMORY: 1115544 // 1,089 MB // BTreeSet $ /usr/bin/time -f "# ELAPSED: %E\n# MEMORY: %M" cargo run --example insert_lines --release -- --kind btree < googlebooks-eng-all-5gram-20120701-0 # LINES: 9814743 # ELAPSED: 0:06.85 # MEMORY: 942236 // 920 MB // PatriciaSet $ /usr/bin/time -f "# ELAPSED: %E\n# MEMORY: %M" cargo run --example insert_lines --release -- --kind patricia < googlebooks-eng-all-5gram-20120701-0 # LINES: 9814743 # ELAPSED: 0:25.62 # MEMORY: 223616 // 218 MB ``` patricia_tree-0.9.0/benches/bench.rs000064400000000000000000000020311046102023000154750ustar 00000000000000#![feature(test)] extern crate patricia_tree; extern crate rand; extern crate test; use patricia_tree::PatriciaSet; use rand::seq::SliceRandom; #[bench] fn bench_insertion(b: &mut test::Bencher) { let mut set = PatriciaSet::new(); b.iter(|| { set.insert(rand::random::().to_string()); }); } #[bench] fn bench_retrieval(b: &mut test::Bencher) { const MAX: usize = 1_000_000; let mut set = PatriciaSet::new(); for _ in 0..MAX / 2 { set.insert((rand::random::() % MAX).to_string()); } b.iter(|| { set.contains((rand::random::() % MAX).to_string()); }); } #[bench] fn bench_removal(b: &mut test::Bencher) { const MAX: usize = 5_000_000; let mut set = PatriciaSet::new(); for i in 0..MAX { set.insert(i.to_string()); } let mut values = (0..MAX).collect::>(); values.shuffle(&mut rand::thread_rng()); let mut values = values.iter().cycle(); b.iter(|| { set.remove(values.next().unwrap().to_string()); }); } patricia_tree-0.9.0/examples/insert_lines.rs000064400000000000000000000026001046102023000173250ustar 00000000000000use clap::Parser; use patricia_tree::PatriciaSet; use std::collections::{BTreeSet, HashSet}; use std::io::BufRead; #[derive(Parser)] struct Args { #[clap( long, default_value = "patricia", value_parser = clap::builder::PossibleValuesParser::new(["patricia", "hash", "btree", "count"]) )] kind: String, } fn main() { let args = Args::parse(); match args.kind.as_str() { "patricia" => { let mut set = PatriciaSet::new(); each_line(|line| { set.insert(line); }); println!("# LINES: {}", set.len()); } "hash" => { let mut set = HashSet::new(); each_line(|line| { set.insert(line); }); println!("# LINES: {}", set.len()); } "btree" => { let mut set = BTreeSet::new(); each_line(|line| { set.insert(line); }); println!("# LINES: {}", set.len()); } "count" => { let mut count = 0; each_line(|_| { count += 1; }); println!("# LINES: {}", count); } _ => unreachable!(), } } fn each_line(mut f: F) where F: FnMut(String), { let stdin = std::io::stdin(); for line in stdin.lock().lines() { f(line.unwrap()); } } patricia_tree-0.9.0/src/lib.rs000064400000000000000000000104771046102023000143610ustar 00000000000000//! Memory-efficient data structures based on patricia tree (a.k.a, radix tree). //! //! A common prefixes of the keys in a patricia tree are represented by a shared path. //! So if the prefixes of the key set is highly redundant, //! the memory usage of the resulting patricia tree will be drastically less than //! more generic data structures (e.g., `BTreeMap`). //! //! See [Radix tree](https://en.wikipedia.org/wiki/Radix_tree) for more details. //! //! # Examples //! //! ``` //! use patricia_tree::PatriciaMap; //! //! let mut map = PatriciaMap::new(); //! map.insert("foo", 1); //! map.insert("bar", 2); //! map.insert("baz", 3); //! assert_eq!(map.len(), 3); //! //! assert_eq!(map.get("foo"), Some(&1)); //! assert_eq!(map.get("bar"), Some(&2)); //! assert_eq!(map.get("baz"), Some(&3)); //! ``` #![warn(missing_docs)] #![allow(clippy::cast_ptr_alignment)] #[macro_use] extern crate bitflags; #[cfg(test)] extern crate rand; use std::cmp::Ordering; pub use map::{GenericPatriciaMap, PatriciaMap, StringPatriciaMap}; pub use set::{GenericPatriciaSet, PatriciaSet, StringPatriciaSet}; pub mod map; pub mod set; mod node; #[cfg(feature = "serde")] mod serialization; mod tree; /// This trait represents a bytes type that can be used as the key type of patricia trees. pub trait Bytes { /// Borrowed type of this type. type Borrowed: ?Sized + BorrowedBytes + ToOwned; } impl Bytes for Vec { type Borrowed = [u8]; } impl Bytes for String { type Borrowed = str; } /// Borrowed type of [`Bytes`]. pub trait BorrowedBytes { /// Returns the byte representation of this instance. fn as_bytes(&self) -> &[u8]; /// Returns `true` if the given bytes is a valid representation of this type, otherwise `false`. fn is_valid_bytes(bytes: &[u8]) -> bool; /// Converts the given bytes to an instance of this type. /// /// Caller can assume that `is_valid_bytes(bytes)` is `true`. fn from_bytes(bytes: &[u8]) -> &Self; /// Returns a suffix of this instance not containing the common prefix with the given bytes. fn strip_common_prefix(&self, bytes: &[u8]) -> &Self; /// Same as [`strip_common_prefix()`], but also returns the length of the common prefix. fn strip_common_prefix_and_len(&self, bytes: &[u8]) -> (&Self, usize) { let next = self.strip_common_prefix(bytes); let common_prefix_len = self.as_bytes().len() - next.as_bytes().len(); (next, common_prefix_len) } /// Compares the first item of this instance with the first item represented in the the given bytes. fn cmp_first_item(&self, bytes: &[u8]) -> Ordering; /// Returns `true` if this instance is empty, otherwise `false`. fn is_empty(&self) -> bool { self.as_bytes().is_empty() } /// Returns a suffix of this instance not containing the first `n` bytes. fn strip_n_prefix(&self, n: usize) -> &Self; } impl BorrowedBytes for [u8] { fn as_bytes(&self) -> &[u8] { self } fn is_valid_bytes(_bytes: &[u8]) -> bool { true } fn from_bytes(bytes: &[u8]) -> &Self { bytes } fn strip_common_prefix(&self, bytes: &[u8]) -> &Self { let i = self .iter() .zip(bytes.iter()) .take_while(|(a, b)| a == b) .count(); &self[i..] } fn cmp_first_item(&self, bytes: &[u8]) -> Ordering { self.first().cmp(&bytes.first()) } fn strip_n_prefix(&self, n: usize) -> &Self { &self[n..] } } impl BorrowedBytes for str { fn as_bytes(&self) -> &[u8] { self.as_bytes() } fn is_valid_bytes(bytes: &[u8]) -> bool { std::str::from_utf8(bytes).is_ok() } fn from_bytes(bytes: &[u8]) -> &Self { std::str::from_utf8(bytes).expect("unreachable") } fn strip_common_prefix(&self, bytes: &[u8]) -> &Self { for (i, c) in self.char_indices() { let n = c.len_utf8(); if self.as_bytes()[i..i + n] .iter() .ne(bytes[i..].iter().take(n)) { return &self[i..]; } } "" } fn cmp_first_item(&self, bytes: &[u8]) -> Ordering { self.chars() .next() .cmp(&Self::from_bytes(bytes).chars().next()) } fn strip_n_prefix(&self, n: usize) -> &Self { &self[n..] } } patricia_tree-0.9.0/src/map.rs000064400000000000000000000732511046102023000143670ustar 00000000000000//! A map based on a patricia tree. use crate::node; #[cfg(any(test, feature = "serde"))] use crate::node::Node; use crate::tree::{self, PatriciaTree}; use crate::{BorrowedBytes, Bytes}; use std::fmt; use std::iter::FromIterator; use std::marker::PhantomData; /// Patricia tree based map with [`Vec`] as key. pub type PatriciaMap = GenericPatriciaMap, V>; /// Patricia tree based map with [`String`] as key. pub type StringPatriciaMap = GenericPatriciaMap; /// Patricia tree based map. pub struct GenericPatriciaMap { tree: PatriciaTree, _key: PhantomData, } impl GenericPatriciaMap { /// Makes a new empty `PatriciaMap` instance. /// /// # Examples /// /// ``` /// use patricia_tree::PatriciaMap; /// /// let mut map = PatriciaMap::new(); /// assert!(map.is_empty()); /// /// map.insert("foo", 10); /// assert_eq!(map.len(), 1); /// assert_eq!(map.get("foo"), Some(&10)); /// /// map.remove("foo"); /// assert_eq!(map.get("foo"), None); /// ``` pub fn new() -> Self { GenericPatriciaMap { tree: PatriciaTree::new(), _key: PhantomData, } } /// Clears this map, removing all values. /// /// # Examples /// /// ``` /// use patricia_tree::PatriciaMap; /// /// let mut map = PatriciaMap::new(); /// map.insert("foo", 1); /// map.clear(); /// assert!(map.is_empty()); /// ``` pub fn clear(&mut self) { self.tree.clear(); } /// Returns the number of elements in this map. /// /// # Examples /// /// ``` /// use patricia_tree::PatriciaMap; /// /// let mut map = PatriciaMap::new(); /// map.insert("foo", 1); /// map.insert("bar", 2); /// assert_eq!(map.len(), 2); /// ``` pub fn len(&self) -> usize { self.tree.len() } /// Returns `true` if this map contains no elements. /// /// # Examples /// /// ``` /// use patricia_tree::PatriciaMap; /// /// let mut map = PatriciaMap::new(); /// assert!(map.is_empty()); /// /// map.insert("foo", 1); /// assert!(!map.is_empty()); /// /// map.clear(); /// assert!(map.is_empty()); /// ``` pub fn is_empty(&self) -> bool { self.len() == 0 } #[cfg(feature = "serde")] pub(crate) fn from_node(node: Node) -> Self { Self { tree: node.into(), _key: PhantomData, } } #[cfg(any(test, feature = "serde"))] pub(crate) fn as_node(&self) -> &Node { self.tree.root() } #[cfg(test)] pub(crate) fn into_node(self) -> Node { self.tree.into_root() } } impl GenericPatriciaMap { /// Returns `true` if this map contains a value for the specified key. /// /// # Examples /// /// ``` /// use patricia_tree::PatriciaMap; /// /// let mut map = PatriciaMap::new(); /// map.insert("foo", 1); /// assert!(map.contains_key("foo")); /// assert!(!map.contains_key("bar")); /// ``` pub fn contains_key>(&self, key: Q) -> bool { self.tree.get(key.as_ref()).is_some() } /// Returns a reference to the value corresponding to the key. /// /// # Examples /// /// ``` /// use patricia_tree::PatriciaMap; /// /// let mut map = PatriciaMap::new(); /// map.insert("foo", 1); /// assert_eq!(map.get("foo"), Some(&1)); /// assert_eq!(map.get("bar"), None); /// ``` pub fn get>(&self, key: Q) -> Option<&V> { self.tree.get(key.as_ref()) } /// Returns a mutable reference to the value corresponding to the key. /// /// # Examples /// /// ``` /// use patricia_tree::PatriciaMap; /// /// let mut map = PatriciaMap::new(); /// map.insert("foo", 1); /// map.get_mut("foo").map(|v| *v = 2); /// assert_eq!(map.get("foo"), Some(&2)); /// ``` pub fn get_mut>(&mut self, key: Q) -> Option<&mut V> { self.tree.get_mut(key.as_ref()) } /// Finds the longest common prefix of `key` and the keys in this map, /// and returns a reference to the entry whose key matches the prefix. /// /// # Examples /// /// ``` /// use patricia_tree::PatriciaMap; /// /// let mut map = PatriciaMap::new(); /// map.insert("foo", 1); /// map.insert("foobar", 2); /// assert_eq!(map.get_longest_common_prefix("fo"), None); /// assert_eq!(map.get_longest_common_prefix("foo"), Some(("foo".as_bytes(), &1))); /// assert_eq!(map.get_longest_common_prefix("fooba"), Some(("foo".as_bytes(), &1))); /// assert_eq!(map.get_longest_common_prefix("foobar"), Some(("foobar".as_bytes(), &2))); /// assert_eq!(map.get_longest_common_prefix("foobarbaz"), Some(("foobar".as_bytes(), &2))); /// ``` pub fn get_longest_common_prefix<'a, Q>(&self, key: &'a Q) -> Option<(&'a K::Borrowed, &V)> where Q: ?Sized + AsRef, { let (key, value) = self.tree.get_longest_common_prefix(key.as_ref())?; Some((K::Borrowed::from_bytes(key), value)) } /// Finds the longest common prefix of `key` and the keys in this map, /// and returns a mutable reference to the entry whose key matches the prefix. /// /// # Examples /// /// ``` /// use patricia_tree::PatriciaMap; /// /// let mut map = PatriciaMap::new(); /// map.insert("foo", 1); /// map.insert("foobar", 2); /// assert_eq!(map.get_longest_common_prefix_mut("fo"), None); /// assert_eq!(map.get_longest_common_prefix_mut("foo"), Some(("foo".as_bytes(), &mut 1))); /// *map.get_longest_common_prefix_mut("foo").unwrap().1 = 3; /// assert_eq!(map.get_longest_common_prefix_mut("fooba"), Some(("foo".as_bytes(), &mut 3))); /// assert_eq!(map.get_longest_common_prefix_mut("foobar"), Some(("foobar".as_bytes(), &mut 2))); /// *map.get_longest_common_prefix_mut("foobar").unwrap().1 = 4; /// assert_eq!(map.get_longest_common_prefix_mut("foobarbaz"), Some(("foobar".as_bytes(), &mut 4))); /// ``` pub fn get_longest_common_prefix_mut<'a, Q>( &mut self, key: &'a Q, ) -> Option<(&'a K::Borrowed, &mut V)> where Q: ?Sized + AsRef, { let (key, value) = self.tree.get_longest_common_prefix_mut(key.as_ref())?; Some((K::Borrowed::from_bytes(key), value)) } /// Returns the longest common prefix length of `key` and the keys in this map. /// /// Unlike `get_longest_common_prefix()`, this method does not check if there is a key that matches the prefix in this map. /// /// # Examples /// /// ``` /// use patricia_tree::PatriciaMap; /// /// let mut map = PatriciaMap::new(); /// map.insert("foo", 1); /// map.insert("foobar", 2); /// assert_eq!(map.longest_common_prefix_len("fo"), 2); /// assert_eq!(map.longest_common_prefix_len("foo"), 3); /// assert_eq!(map.longest_common_prefix_len("fooba"), 5); /// assert_eq!(map.longest_common_prefix_len("foobar"), 6); /// assert_eq!(map.longest_common_prefix_len("foobarbaz"), 6); /// assert_eq!(map.longest_common_prefix_len("foba"), 2); /// ``` pub fn longest_common_prefix_len(&self, key: &Q) -> usize where Q: ?Sized + AsRef, { self.tree.longest_common_prefix_len(key.as_ref()) } /// Inserts a key-value pair into this map. /// /// If the map did not have this key present, `None` is returned. /// If the map did have this key present, the value is updated, and the old value is returned. /// /// # Examples /// /// ``` /// use patricia_tree::PatriciaMap; /// /// let mut map = PatriciaMap::new(); /// assert_eq!(map.insert("foo", 1), None); /// assert_eq!(map.get("foo"), Some(&1)); /// assert_eq!(map.insert("foo", 2), Some(1)); /// assert_eq!(map.get("foo"), Some(&2)); /// ``` pub fn insert>(&mut self, key: Q, value: V) -> Option { self.tree.insert(key.as_ref(), value) } /// Removes a key from this map, returning the value at the key if the key was previously in it. /// /// # Examples /// /// ``` /// use patricia_tree::PatriciaMap; /// /// let mut map = PatriciaMap::new(); /// map.insert("foo", 1); /// assert_eq!(map.remove("foo"), Some(1)); /// assert_eq!(map.remove("foo"), None); /// ``` pub fn remove>(&mut self, key: Q) -> Option { self.tree.remove(key.as_ref()) } /// Returns an iterator that collects all entries in the map up to a certain key. /// /// # Example /// /// ``` /// use patricia_tree::PatriciaMap; /// /// let mut t = PatriciaMap::new(); /// t.insert("a", vec!["a"]); /// t.insert("x", vec!["x"]); /// t.insert("ab", vec!["b"]); /// t.insert("abc", vec!["c"]); /// t.insert("abcd", vec!["d"]); /// t.insert("abcdf", vec!["f"]); /// assert!(t /// .common_prefixes(b"abcde") /// .map(|(_, v)| v) /// .flatten() /// .eq(vec![&"a", &"b", &"c", &"d"].into_iter())); /// ``` pub fn common_prefixes<'a, 'b, Q>( &'a self, key: &'b Q, ) -> CommonPrefixesIter<'a, 'b, K::Borrowed, V> where Q: ?Sized + AsRef, { CommonPrefixesIter { key_bytes: key.as_ref().as_bytes(), iterator: self.tree.common_prefixes(key.as_ref()), } } /// Returns an iterator that collects all values of entries in the map up to a certain key. /// /// # Example /// /// ``` /// use patricia_tree::PatriciaMap; /// let mut t = PatriciaMap::new(); /// t.insert("a", vec!["a"]); /// t.insert("x", vec!["x"]); /// t.insert("ab", vec!["b"]); /// t.insert("abc", vec!["c"]); /// t.insert("abcd", vec!["d"]); /// t.insert("abcdf", vec!["f"]); /// assert!(t /// .common_prefix_values(b"abcde") /// .flatten() /// .eq(vec![&"a", &"b", &"c", &"d"].into_iter())); /// ``` pub fn common_prefix_values<'a, 'b, Q>( &'a self, key: &'b Q, ) -> impl Iterator + use<'a, 'b, Q, K, V> where Q: ?Sized + AsRef, ::Borrowed: 'b, { self.tree .common_prefixes(key.as_ref()) .filter_map(|(_, n)| n.value()) } /// Splits the map into two at the given prefix. /// /// The returned map contains all the entries of which keys are prefixed by `prefix`. /// /// # Examples /// /// ``` /// use patricia_tree::PatriciaMap; /// /// let mut a = PatriciaMap::new(); /// a.insert("rust", 1); /// a.insert("ruby", 2); /// a.insert("bash", 3); /// a.insert("erlang", 4); /// a.insert("elixir", 5); /// /// let b = a.split_by_prefix("e"); /// assert_eq!(a.len(), 3); /// assert_eq!(b.len(), 2); /// /// assert_eq!(a.keys().collect::>(), [b"bash", b"ruby", b"rust"]); /// assert_eq!(b.keys().collect::>(), [b"elixir", b"erlang"]); /// ``` pub fn split_by_prefix>(&mut self, prefix: Q) -> Self { let subtree = self.tree.split_by_prefix(prefix.as_ref()); GenericPatriciaMap { tree: subtree, _key: PhantomData, } } /// Gets an iterator over the entries of this map, sorted by key. /// /// # Examples /// /// ``` /// use patricia_tree::PatriciaMap; /// /// let map: PatriciaMap<_> = /// vec![("foo", 1), ("bar", 2), ("baz", 3)].into_iter().collect(); /// assert_eq!(vec![(Vec::from("bar"), &2), ("baz".into(), &3), ("foo".into(), &1)], /// map.iter().collect::>()); /// ``` pub fn iter(&self) -> Iter { Iter::new(self.tree.nodes(), Vec::new()) } /// Gets a mutable iterator over the entries of this map, soretd by key. /// /// # Examples /// /// ``` /// use patricia_tree::PatriciaMap; /// /// let mut map: PatriciaMap<_> = /// vec![("foo", 1), ("bar", 2), ("baz", 3)].into_iter().collect(); /// for (_, v) in map.iter_mut() { /// *v += 10; /// } /// assert_eq!(map.get("bar"), Some(&12)); /// ``` pub fn iter_mut(&mut self) -> IterMut { IterMut::new(self.tree.nodes_mut(), Vec::new()) } /// Gets an iterator over the keys of this map, in sorted order. /// /// # Examples /// /// ``` /// use patricia_tree::PatriciaMap; /// /// let map: PatriciaMap<_> = /// vec![("foo", 1), ("bar", 2), ("baz", 3)].into_iter().collect(); /// assert_eq!(vec![Vec::from("bar"), "baz".into(), "foo".into()], /// map.keys().collect::>()); /// ``` pub fn keys(&self) -> Keys { Keys(self.iter()) } /// Gets an iterator over the values of this map, in order by key. /// /// # Examples /// /// ``` /// use patricia_tree::PatriciaMap; /// /// let map: PatriciaMap<_> = /// vec![("foo", 1), ("bar", 2), ("baz", 3)].into_iter().collect(); /// assert_eq!(vec![2, 3, 1], /// map.values().cloned().collect::>()); /// ``` pub fn values(&self) -> Values { Values { nodes: self.tree.nodes(), } } /// Gets a mutable iterator over the values of this map, in order by key. /// /// # Examples /// /// ``` /// use patricia_tree::PatriciaMap; /// /// let mut map: PatriciaMap<_> = /// vec![("foo", 1), ("bar", 2), ("baz", 3)].into_iter().collect(); /// for v in map.values_mut() { /// *v += 10; /// } /// assert_eq!(vec![12, 13, 11], /// map.values().cloned().collect::>()); /// ``` pub fn values_mut(&mut self) -> ValuesMut { ValuesMut { nodes: self.tree.nodes_mut(), } } } impl GenericPatriciaMap { /// Gets an iterator over the entries having the given prefix of this map, sorted by key. /// /// # Examples /// /// ``` /// use patricia_tree::PatriciaMap; /// /// let map: PatriciaMap<_> = /// vec![("foo", 1), ("bar", 2), ("baz", 3)].into_iter().collect(); /// assert_eq!(vec![(Vec::from("bar"), &2), ("baz".into(), &3)], /// map.iter_prefix(b"ba").collect::>()); /// ``` pub fn iter_prefix<'a, 'b>( &'a self, prefix: &'b K::Borrowed, ) -> impl Iterator + use<'a, 'b, K, V> { self.tree .iter_prefix(prefix) .into_iter() .flat_map(move |(prefix_len, nodes)| { Iter::::new(nodes, Vec::from(&prefix.as_bytes()[..prefix_len])) }) } /// Gets a mutable iterator over the entries having the given prefix of this map, sorted by key. /// /// # Examples /// /// ``` /// use patricia_tree::PatriciaMap; /// /// let mut map: PatriciaMap<_> = /// vec![("foo", 1), ("bar", 2), ("baz", 3)].into_iter().collect(); /// assert_eq!(vec![(Vec::from("bar"), &mut 2), ("baz".into(), &mut 3)], /// map.iter_prefix_mut(b"ba").collect::>()); /// ``` pub fn iter_prefix_mut<'a, 'b>( &'a mut self, prefix: &'b K::Borrowed, ) -> impl Iterator + use<'a, 'b, K, V> { self.tree .iter_prefix_mut(prefix) .into_iter() .flat_map(move |(prefix_len, nodes)| { IterMut::::new(nodes, Vec::from(&prefix.as_bytes()[..prefix_len])) }) } } impl fmt::Debug for GenericPatriciaMap { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { f.debug_map().entries(self.iter()).finish() } } impl Clone for GenericPatriciaMap { fn clone(&self) -> Self { Self { tree: self.tree.clone(), _key: PhantomData, } } } impl Default for GenericPatriciaMap { fn default() -> Self { Self::new() } } impl IntoIterator for GenericPatriciaMap { type Item = (K, V); type IntoIter = IntoIter; fn into_iter(self) -> Self::IntoIter { IntoIter { nodes: self.tree.into_nodes(), key_bytes: Vec::new(), _key: PhantomData, } } } impl FromIterator<(Q, V)> for GenericPatriciaMap where K: Bytes, Q: AsRef, { fn from_iter(iter: I) -> Self where I: IntoIterator, { let mut map = GenericPatriciaMap::new(); for (k, v) in iter { map.insert(k, v); } map } } impl Extend<(Q, V)> for GenericPatriciaMap where K: Bytes, Q: AsRef, { fn extend(&mut self, iter: I) where I: IntoIterator, { for (k, v) in iter { self.insert(k, v); } } } /// An iterator over a `PatriciaMap`'s entries. #[derive(Debug)] pub struct Iter<'a, K, V: 'a> { nodes: tree::Nodes<'a, V>, key_bytes: Vec, key_offset: usize, _key: PhantomData, } impl<'a, K, V: 'a> Iter<'a, K, V> { fn new(nodes: tree::Nodes<'a, V>, key: Vec) -> Self { let key_offset = key.len(); Self { nodes, key_bytes: key, key_offset, _key: PhantomData, } } } impl<'a, K: Bytes, V: 'a> Iterator for Iter<'a, K, V> { type Item = (K, &'a V); fn next(&mut self) -> Option { for (key_len, node) in &mut self.nodes { self.key_bytes.truncate(self.key_offset + key_len); self.key_bytes.extend(node.label()); if let Some(value) = node.value() { return Some((K::Borrowed::from_bytes(&self.key_bytes).to_owned(), value)); } } None } } /// An owning iterator over a `PatriciaMap`'s entries. #[derive(Debug)] pub struct IntoIter { nodes: tree::IntoNodes, key_bytes: Vec, _key: PhantomData, } impl Iterator for IntoIter { type Item = (K, V); fn next(&mut self) -> Option { for (key_len, mut node) in &mut self.nodes { self.key_bytes.truncate(key_len); self.key_bytes.extend(node.label()); if let Some(value) = node.take_value() { return Some((K::Borrowed::from_bytes(&self.key_bytes).to_owned(), value)); } } None } } /// A mutable iterator over a `PatriciaMap`'s entries. #[derive(Debug)] pub struct IterMut<'a, K, V: 'a> { nodes: tree::NodesMut<'a, V>, key_bytes: Vec, key_offset: usize, _key: PhantomData, } impl<'a, K, V: 'a> IterMut<'a, K, V> { fn new(nodes: tree::NodesMut<'a, V>, key: Vec) -> Self { let key_offset = key.len(); Self { nodes, key_bytes: key, key_offset, _key: PhantomData, } } } impl<'a, K: Bytes, V: 'a> Iterator for IterMut<'a, K, V> { type Item = (K, &'a mut V); fn next(&mut self) -> Option { for (key_len, node) in &mut self.nodes { self.key_bytes.truncate(self.key_offset + key_len); self.key_bytes.extend(node.label()); if let Some(value) = node.into_value_mut() { return Some((K::Borrowed::from_bytes(&self.key_bytes).to_owned(), value)); } } None } } /// An iterator over a `PatriciaMap`'s keys. #[derive(Debug)] pub struct Keys<'a, K, V: 'a>(Iter<'a, K, V>); impl<'a, K: Bytes, V: 'a> Iterator for Keys<'a, K, V> { type Item = K; fn next(&mut self) -> Option { self.0.next().map(|(k, _)| k) } } /// An iterator over a `PatriciaMap`'s values. #[derive(Debug)] pub struct Values<'a, V: 'a> { nodes: tree::Nodes<'a, V>, } impl<'a, V: 'a> Iterator for Values<'a, V> { type Item = &'a V; fn next(&mut self) -> Option { for (_, node) in &mut self.nodes { if let Some(value) = node.value() { return Some(value); } } None } } /// A mutable iterator over a `PatriciaMap`'s values. #[derive(Debug)] pub struct ValuesMut<'a, V: 'a> { nodes: tree::NodesMut<'a, V>, } impl<'a, V: 'a> Iterator for ValuesMut<'a, V> { type Item = &'a mut V; fn next(&mut self) -> Option { for (_, node) in &mut self.nodes { if let Some(value) = node.into_value_mut() { return Some(value); } } None } } /// An iterator over entries in a `PatriciaMap` that share a common prefix with /// a given key. #[derive(Debug)] pub struct CommonPrefixesIter<'a, 'b, K: ?Sized, V> { key_bytes: &'b [u8], iterator: node::CommonPrefixesIter<'a, 'b, K, V>, } impl<'a, 'b, K, V> Iterator for CommonPrefixesIter<'a, 'b, K, V> where K: 'b + ?Sized + BorrowedBytes, { type Item = (&'b K, &'a V); fn next(&mut self) -> Option { for (prefix_len, n) in self.iterator.by_ref() { if let Some(v) = n.value() { return Some((K::from_bytes(&self.key_bytes[..prefix_len]), v)); } } None } } #[cfg(test)] mod tests { use super::*; use rand::seq::SliceRandom; #[test] fn it_works() { let input = [ ("7", 7), ("43", 43), ("92", 92), ("37", 37), ("31", 31), ("21", 21), ("0", 0), ("35", 35), ("47", 47), ("82", 82), ("61", 61), ("9", 9), ]; let mut map = PatriciaMap::new(); for &(ref k, v) in input.iter() { assert_eq!(map.insert(k, v), None); assert_eq!(map.get(k), Some(&v)); } } #[test] fn debug_works() { let map: PatriciaMap<_> = vec![("foo", 1), ("bar", 2), ("baz", 3)] .into_iter() .collect(); assert_eq!( format!("{:?}", map), "{[98, 97, 114]: 2, [98, 97, 122]: 3, [102, 111, 111]: 1}" ); } #[test] fn clear_works() { let mut map = PatriciaMap::new(); assert!(map.is_empty()); map.insert("foo", 1); assert!(!map.is_empty()); map.clear(); assert!(map.is_empty()); } #[test] fn into_iter_works() { let map: PatriciaMap<_> = vec![("foo", 1), ("bar", 2), ("baz", 3)] .into_iter() .collect(); assert_eq!( map.into_iter().collect::>(), [(Vec::from("bar"), 2), ("baz".into(), 3), ("foo".into(), 1)] ); } #[test] fn iter_mut_works() { let mut map: PatriciaMap<_> = vec![("foo", 1), ("bar", 2), ("baz", 3)] .into_iter() .collect(); for (_key, x) in map.iter_mut() { (*x) *= 2; } assert_eq!( map.into_iter().collect::>(), [(Vec::from("bar"), 4), ("baz".into(), 6), ("foo".into(), 2)] ); } #[test] #[cfg_attr(miri, ignore)] fn large_map_works() { let mut input = (0..10000).map(|i| (i.to_string(), i)).collect::>(); input.shuffle(&mut rand::thread_rng()); // Insert let mut map = input.iter().cloned().collect::>(); assert_eq!(map.len(), input.len()); // Get for &(ref k, v) in input.iter() { assert_eq!(map.get(k), Some(&v)); } // Remove for &(ref k, v) in input.iter().take(input.len() / 2) { assert_eq!(map.remove(k), Some(v)); assert_eq!(map.remove(k), None); } for &(ref k, _) in input.iter().take(input.len() / 2) { assert_eq!(map.get(k), None); } for &(ref k, v) in input.iter().skip(input.len() / 2) { assert_eq!(map.get(k), Some(&v)); } // Insert for &(ref k, v) in input.iter().take(input.len() / 2) { assert_eq!(map.insert(k, v), None); } for &(ref k, v) in input.iter().skip(input.len() / 2) { assert_eq!(map.insert(k, v), Some(v)); } // Get for &(ref k, v) in input.iter() { assert_eq!(map.get(k), Some(&v)); } } #[test] fn test_common_word_prefixes() { let mut t = PatriciaMap::new(); t.insert(".com.foo.", vec!["b"]); t.insert(".", vec!["a"]); t.insert(".com.foo.bar.", vec!["c"]); t.insert("..", vec!["e"]); t.insert("x", vec!["d"]); let results = t .common_prefixes(b".com.foo.bar.baz.") .map(|(_, v)| v) .flatten() .cloned() .collect::>(); assert!(results.iter().eq(vec![&"a", &"b", &"c"].into_iter())); } #[test] fn test_letter_prefixes() { let mut t = PatriciaMap::new(); t.insert("x", vec!["x"]); t.insert("a", vec!["a"]); t.insert("ab", vec!["b"]); t.insert("abc", vec!["c"]); t.insert("abcd", vec!["d"]); t.insert("abcdf", vec!["f"]); let results = t .common_prefixes(b"abcde") .map(|(_, v)| v) .flatten() .cloned() .collect::>(); assert!(results.iter().eq(vec![&"a", &"b", &"c", &"d"].into_iter())); } #[test] fn test_common_prefixes() { let mut t = PatriciaMap::new(); t.insert("b", vec!["b"]); t.insert("a", vec!["a"]); t.insert("c", vec!["c"]); t.insert("..", vec!["e"]); t.insert("x", vec!["d"]); let results = t .common_prefixes(b"abc") .map(|(k, v)| { unsafe { println!("{:?}", std::str::from_utf8_unchecked(k)); } v }) .flatten() .cloned() .collect::>(); dbg!(&results); assert!(results.iter().eq(vec![&"a"].into_iter())); let mut t = PatriciaMap::new(); t.insert("ab", vec!["b"]); t.insert("a", vec!["a"]); t.insert("abc", vec!["c"]); t.insert("..", vec!["e"]); t.insert("x", vec!["d"]); let results = t .common_prefixes(b"abcd") .map(|(_, v)| v) .flatten() .cloned() .collect::>(); assert!(results.iter().eq(vec![&"a", &"b", &"c"].into_iter())); let mut list = PatriciaMap::new(); list.insert(b".com.foocatnetworks.".as_ref(), vec![0 as u16]); list.insert(b".com.foocatnetworks.foo.".as_ref(), vec![1]); list.insert(b".com.foocatnetworks.foo.baz.".as_ref(), vec![2]); list.insert(b".com.google.".as_ref(), vec![0]); list.insert(b".com.cisco.".as_ref(), vec![0]); list.insert(b".org.wikipedia.".as_ref(), vec![0]); let results = list .common_prefixes(b".com.foocatnetworks.foo.baz.") .map(|(_, v)| v) .flatten() .cloned() .collect::>(); assert!(vec![0 as u16, 1, 2].into_iter().eq(results.into_iter())); } #[test] fn string_patricia_map_works() { // Insert as bytes. let mut t = PatriciaMap::new(); t.insert("πŸŒπŸ—»", ()); // [240,159,140,143,240,159,151,187] t.insert("πŸŒπŸ”", ()); // [240,159,140,143,240,159,141,148] let first_label = t.as_node().child().unwrap().label(); assert!(std::str::from_utf8(first_label).is_err()); assert_eq!(first_label, [240, 159, 140, 143, 240, 159]); // Insert as string. let mut t = StringPatriciaMap::new(); t.insert("πŸŒπŸ—»", ()); t.insert("πŸŒπŸ”", ()); let first_label = t.as_node().child().unwrap().label(); assert_eq!(std::str::from_utf8(first_label).ok(), Some("🌏")); } #[test] fn issue21() { let mut map = PatriciaMap::new(); map.insert("1", 0); map.insert("2", 0); map.remove("2"); map.insert("2", 0); assert_eq!(map.len(), map.iter().count()); assert_eq!(map.len(), map.iter_mut().count()); } #[test] fn issue35() { let mut map = StringPatriciaMap::::new(); map.insert("むンターポール", 1); map.insert("むンターポル", 2); map.insert("むンターγƒͺγƒΌγƒ–", 3); map.insert("むンターン", 4); assert_eq!(map.get("むンターポール"), Some(&1)); assert_eq!(map.get("むンターポル"), Some(&2)); } #[test] fn issue42_iter_prefix() { let mut map = StringPatriciaMap::new(); map.insert("a0/b0", 0); map.insert("a1/b1", 0); let items: Vec<_> = { let prefix = "a0".to_owned(); map.iter_prefix(&prefix).collect() }; assert_eq!(items, vec![("a0/b0".to_owned(), &0)]) } #[test] fn issue42_iter_prefix_mut() { let mut map = StringPatriciaMap::new(); map.insert("a0/b0", 0); map.insert("a1/b1", 0); let items: Vec<_> = { let prefix = "a0".to_owned(); map.iter_prefix_mut(&prefix).collect() }; assert_eq!(items, vec![("a0/b0".to_owned(), &mut 0)]) } #[test] fn issue42_common_prefix_values() { let mut map = StringPatriciaMap::new(); map.insert("a0/b0", 0); map.insert("a1/b1", 0); let items: Vec<_> = { let prefix = "a0/b0/c0".to_owned(); map.common_prefix_values(&prefix).collect() }; assert_eq!(items, vec![&0]) } } patricia_tree-0.9.0/src/node.rs000064400000000000000000001125001046102023000145260ustar 00000000000000//! A node which represents a subtree of a patricia tree. use crate::BorrowedBytes; use std::alloc::{alloc, dealloc, handle_alloc_error, Layout}; use std::marker::PhantomData; use std::mem; use std::ptr; use std::slice; macro_rules! assert_some { ($expr:expr) => { if let Some(value) = $expr { value } else { panic!("`{}` must be `Some(..)`", stringify!($expr)); } }; } bitflags! { #[derive(Clone, Copy)] pub (crate) struct Flags: u8 { const VALUE_ALLOCATED = 0b0000_0001; const VALUE_INITIALIZED = 0b0000_0010; const CHILD_ALLOCATED = 0b0000_0100; const CHILD_INITIALIZED = 0b0000_1000; const SIBLING_ALLOCATED = 0b0001_0000; const SIBLING_INITIALIZED = 0b0010_0000; } } const FLAGS_OFFSET: isize = 0; const LABEL_LEN_OFFSET: isize = 1; const LABEL_OFFSET: isize = 2; const MAX_LABEL_LEN: usize = 255; /// A node which represents a subtree of a patricia tree. /// /// Note that this is a low level building block. /// Usually it is recommended to use more high level data structures (e.g., `PatriciaTree`). #[derive(Debug)] pub struct Node { // layout: // - flags: u8 // - label_len: u8 // - label: [u8; label_len] // - value: Option // - child: Option> // - sibling: Option> ptr: *mut u8, _value: PhantomData, } unsafe impl Send for Node {} unsafe impl Sync for Node {} impl Node { /// Makes a new node which represents an empty tree. pub fn root() -> Self { Node::new(b"", None, None, None) } /// Makes a new node. pub fn new( mut label: &[u8], mut value: Option, mut child: Option, sibling: Option, ) -> Self { if label.len() > MAX_LABEL_LEN { child = Some(Node::new(&label[MAX_LABEL_LEN..], value, child, None)); label = &label[..MAX_LABEL_LEN]; value = None; } let mut flags = Flags::empty(); let mut layout = Self::initial_layout(label.len()); let value = value.map(|value| { flags.insert(Flags::VALUE_ALLOCATED | Flags::VALUE_INITIALIZED); let (new_layout, offset) = layout.extend(Layout::new::()).expect("unreachable"); layout = new_layout; (value, offset) }); let child = child.map(|child| { flags.insert(Flags::CHILD_ALLOCATED | Flags::CHILD_INITIALIZED); let (new_layout, offset) = layout.extend(Layout::new::()).expect("unreachable"); layout = new_layout; (child, offset) }); let sibling = sibling.map(|sibling| { flags.insert(Flags::SIBLING_ALLOCATED | Flags::SIBLING_INITIALIZED); let (new_layout, offset) = layout.extend(Layout::new::()).expect("unreachable"); layout = new_layout; (sibling, offset) }); unsafe { let ptr = alloc(layout.pad_to_align()); if ptr.is_null() { handle_alloc_error(layout) } ptr::write(ptr.offset(FLAGS_OFFSET), flags.bits()); ptr::write(ptr.offset(LABEL_LEN_OFFSET), label.len() as u8); ptr::copy_nonoverlapping(label.as_ptr(), ptr.offset(LABEL_OFFSET), label.len()); if let Some((value, offset)) = value { ptr::write(ptr.add(offset) as _, value); } if let Some((child, offset)) = child { ptr::write(ptr.add(offset) as _, child); } if let Some((sibling, offset)) = sibling { ptr::write(ptr.add(offset) as _, sibling); } Node { ptr, _value: PhantomData, } } } #[cfg(feature = "serde")] pub(crate) fn new_for_decoding(flags: Flags, label_len: u8) -> Self { let mut init_flags = Flags::empty(); let mut layout = Self::initial_layout(label_len as usize); if flags.contains(Flags::VALUE_INITIALIZED) { init_flags.insert(Flags::VALUE_ALLOCATED); layout = layout.extend(Layout::new::()).expect("unreachable").0; } if flags.contains(Flags::CHILD_INITIALIZED) { init_flags.insert(Flags::CHILD_ALLOCATED); layout = layout.extend(Layout::new::()).expect("unreachable").0; } if flags.contains(Flags::SIBLING_INITIALIZED) { init_flags.insert(Flags::SIBLING_ALLOCATED); layout = layout.extend(Layout::new::()).expect("unreachable").0; } let ptr = unsafe { alloc(layout.pad_to_align()) }; assert_ne!(ptr, ptr::null_mut()); unsafe { ptr::write(ptr.offset(FLAGS_OFFSET), init_flags.bits()); ptr::write(ptr.offset(LABEL_LEN_OFFSET), label_len); } Node { ptr, _value: PhantomData, } } /// Returns the label of this node. pub fn label(&self) -> &[u8] { unsafe { let label_len = *self.ptr.offset(LABEL_LEN_OFFSET) as usize; slice::from_raw_parts(self.ptr.offset(LABEL_OFFSET), label_len) } } #[cfg(feature = "serde")] pub(crate) fn label_mut(&mut self) -> &mut [u8] { unsafe { let label_len = *self.ptr.offset(LABEL_LEN_OFFSET) as usize; slice::from_raw_parts_mut(self.ptr.offset(LABEL_OFFSET), label_len) } } /// Returns the reference to the value of this node. pub fn value(&self) -> Option<&V> { if let Some(offset) = self.value_offset() { if self.flags().contains(Flags::VALUE_INITIALIZED) { unsafe { let value = self.ptr.offset(offset) as *const V; return Some(&*value); } } } None } /// Returns the mutable reference to the value of this node. pub fn value_mut(&mut self) -> Option<&mut V> { if let Some(offset) = self.value_offset() { if self.flags().contains(Flags::VALUE_INITIALIZED) { unsafe { let value = self.ptr.offset(offset) as *mut V; return Some(&mut *value); } } } None } /// Returns the reference to the child of this node. pub fn child(&self) -> Option<&Self> { if let Some(offset) = self.child_offset() { if self.flags().contains(Flags::CHILD_INITIALIZED) { unsafe { let child = self.ptr.offset(offset) as *const Self; return Some(&*child); } } } None } /// Returns the mutable reference to the child of this node. pub fn child_mut(&mut self) -> Option<&mut Self> { if let Some(offset) = self.child_offset() { if self.flags().contains(Flags::CHILD_INITIALIZED) { unsafe { let child = self.ptr.offset(offset) as *mut Self; return Some(&mut *child); } } } None } /// Returns the reference to the sibling of this node. pub fn sibling(&self) -> Option<&Self> { if let Some(offset) = self.sibling_offset() { if self.flags().contains(Flags::SIBLING_INITIALIZED) { unsafe { let sibling = self.ptr.offset(offset) as *const Self; return Some(&*sibling); } } } None } /// Returns the mutable reference to the sibling of this node. pub fn sibling_mut(&mut self) -> Option<&mut Self> { if let Some(offset) = self.sibling_offset() { if self.flags().contains(Flags::SIBLING_INITIALIZED) { unsafe { let sibling = self.ptr.offset(offset) as *mut Self; return Some(&mut *sibling); } } } None } /// Returns mutable references to the node itself with its sibling and child pub fn as_mut(&mut self) -> NodeMut<'_, V> { let mut sibling_result = None; let mut child_result = None; let mut value_result = None; if let Some(offset) = self.child_offset() { if self.flags().contains(Flags::CHILD_INITIALIZED) { unsafe { let child = self.ptr.offset(offset) as *mut Self; child_result.replace(&mut *child); } } } if let Some(offset) = self.sibling_offset() { if self.flags().contains(Flags::SIBLING_INITIALIZED) { unsafe { let sibling = self.ptr.offset(offset) as *mut Self; sibling_result.replace(&mut *sibling); } } } if let Some(offset) = self.value_offset() { if self.flags().contains(Flags::VALUE_INITIALIZED) { unsafe { let value = self.ptr.offset(offset) as *mut V; value_result.replace(&mut *value); } } } NodeMut { label: self.label(), sibling: sibling_result, child: child_result, value: value_result, } } /// Takes the value out of this node. pub fn take_value(&mut self) -> Option { if let Some(offset) = self.value_offset() { if self.flags().contains(Flags::VALUE_INITIALIZED) { self.set_flags(Flags::VALUE_INITIALIZED, false); unsafe { let value = self.ptr.offset(offset) as *const V; return Some(ptr::read(value)); } } } None } /// Takes the child out of this node. pub fn take_child(&mut self) -> Option { if let Some(offset) = self.child_offset() { if self.flags().contains(Flags::CHILD_INITIALIZED) { self.set_flags(Flags::CHILD_INITIALIZED, false); unsafe { let child = self.ptr.offset(offset) as *mut Self; return Some(ptr::read(child)); } } } None } /// Takes the sibling out of this node. pub fn take_sibling(&mut self) -> Option { if let Some(offset) = self.sibling_offset() { if self.flags().contains(Flags::SIBLING_INITIALIZED) { self.set_flags(Flags::SIBLING_INITIALIZED, false); unsafe { let sibling = self.ptr.offset(offset) as *mut Self; return Some(ptr::read(sibling)); } } } None } /// Sets the value of this node. pub fn set_value(&mut self, value: V) { self.take_value(); if let Some(offset) = self.value_offset() { self.set_flags(Flags::VALUE_INITIALIZED, true); unsafe { ptr::write(self.ptr.offset(offset) as _, value) }; } else { let child = self.take_child(); let sibling = self.take_sibling(); let node = Node::new(self.label(), Some(value), child, sibling); *self = node; } } /// Sets the child of this node. pub fn set_child(&mut self, child: Self) { self.take_child(); if let Some(offset) = self.child_offset() { self.set_flags(Flags::CHILD_INITIALIZED, true); unsafe { ptr::write(self.ptr.offset(offset) as _, child) }; } else { let value = self.take_value(); let sibling = self.take_sibling(); let node = Node::new(self.label(), value, Some(child), sibling); *self = node; } } /// Sets the sibling of this node. pub fn set_sibling(&mut self, sibling: Self) { self.take_sibling(); if let Some(offset) = self.sibling_offset() { self.set_flags(Flags::SIBLING_INITIALIZED, true); unsafe { ptr::write(self.ptr.offset(offset) as _, sibling) }; } else { let value = self.take_value(); let child = self.take_child(); let node = Node::new(self.label(), value, child, Some(sibling)); *self = node; } } /// Gets an iterator which traverses the nodes in this tree, in depth first order. pub fn iter(&self) -> Iter { Iter { stack: vec![(0, self)], } } /// Gets a mutable iterator which traverses the nodes in this tree, in depth first order. pub fn iter_mut(&mut self) -> IterMut { IterMut { stack: vec![(0, self)], } } pub(crate) fn iter_descendant(&self) -> Iter { Iter { stack: vec![(0, self)], } } pub(crate) fn iter_descendant_mut(&mut self) -> IterMut { IterMut { stack: vec![(0, self)], } } pub(crate) fn common_prefixes<'a, 'b, K>( &'a self, key: &'b K, ) -> CommonPrefixesIter<'a, 'b, K, V> where K: ?Sized + BorrowedBytes, { CommonPrefixesIter { key, stack: vec![(0, self)], } } pub(crate) fn get(&self, key: &K) -> Option<&V> { let (next, common_prefix_len) = key.strip_common_prefix_and_len(self.label()); if common_prefix_len == self.label().len() { if next.is_empty() { self.value() } else { self.child().and_then(|child| child.get(next)) } } else if common_prefix_len == 0 && key.cmp_first_item(self.label()).is_ge() { self.sibling().and_then(|sibling| sibling.get(next)) } else { None } } pub(crate) fn get_mut(&mut self, key: &K) -> Option<&mut V> { let (next, common_prefix_len) = key.strip_common_prefix_and_len(self.label()); if common_prefix_len == self.label().len() { if next.is_empty() { self.value_mut() } else { self.child_mut().and_then(|child| child.get_mut(next)) } } else if common_prefix_len == 0 && key.cmp_first_item(self.label()).is_ge() { self.sibling_mut().and_then(|sibling| sibling.get_mut(next)) } else { None } } pub(crate) fn longest_common_prefix_len( &self, key: &K, offset: usize, ) -> usize { let (next, common_prefix_len) = key.strip_common_prefix_and_len(self.label()); let next_offset = offset + common_prefix_len; if common_prefix_len == self.label().len() { if next.is_empty() { next_offset } else { self.child() .map(|child| child.longest_common_prefix_len(next, next_offset)) .unwrap_or(next_offset) } } else if common_prefix_len == 0 && key.cmp_first_item(self.label()).is_ge() { self.sibling() .map(|sibling| sibling.longest_common_prefix_len(next, offset)) .unwrap_or(next_offset) } else { next_offset } } pub(crate) fn get_longest_common_prefix( &self, key: &K, offset: usize, ) -> Option<(usize, &V)> { let (next, common_prefix_len) = key.strip_common_prefix_and_len(self.label()); if common_prefix_len == self.label().len() { let offset = offset + common_prefix_len; if next.is_empty() { self.value().map(|v| (offset, v)) } else { self.child() .and_then(|child| child.get_longest_common_prefix(next, offset)) .or_else(|| self.value().map(|v| (offset, v))) } } else if common_prefix_len == 0 && key.cmp_first_item(self.label()).is_ge() { self.sibling() .and_then(|sibling| sibling.get_longest_common_prefix(next, offset)) } else { None } } pub(crate) fn get_longest_common_prefix_mut( &mut self, key: &K, offset: usize, ) -> Option<(usize, &mut V)> { let (next, common_prefix_len) = key.strip_common_prefix_and_len(self.label()); if common_prefix_len == self.label().len() { let offset = offset + common_prefix_len; if next.is_empty() { self.value_mut().map(|v| (offset, v)) } else { let this = self.as_mut(); this.child .and_then(|child| child.get_longest_common_prefix_mut(next, offset)) .or_else(|| this.value.map(|v| (offset, v))) } } else if common_prefix_len == 0 && key.cmp_first_item(self.label()).is_ge() { self.sibling_mut() .and_then(|sibling| sibling.get_longest_common_prefix_mut(next, offset)) } else { None } } pub(crate) fn get_prefix_node( &self, key: &K, ) -> Option<(usize, &Self)> { let (next, common_prefix_len) = key.strip_common_prefix_and_len(self.label()); if next.is_empty() { Some((common_prefix_len, self)) } else if common_prefix_len == self.label().len() { self.child().and_then(|child| child.get_prefix_node(next)) } else if common_prefix_len == 0 && key.cmp_first_item(self.label()).is_ge() { self.sibling() .and_then(|sibling| sibling.get_prefix_node(next)) } else { None } } pub(crate) fn get_prefix_node_mut( &mut self, key: &K, ) -> Option<(usize, &mut Self)> { let (next, common_prefix_len) = key.strip_common_prefix_and_len(self.label()); if next.is_empty() { Some((common_prefix_len, self)) } else if common_prefix_len == self.label().len() { self.child_mut() .and_then(|child| child.get_prefix_node_mut(next)) } else if common_prefix_len == 0 && key.cmp_first_item(self.label()).is_ge() { self.sibling_mut() .and_then(|sibling| sibling.get_prefix_node_mut(next)) } else { None } } pub(crate) fn split_by_prefix( &mut self, prefix: &K, level: usize, ) -> Option { let (next, common_prefix_len) = prefix.strip_common_prefix_and_len(self.label()); if common_prefix_len == prefix.as_bytes().len() { let value = self.take_value(); let child = self.take_child(); let node = Node::new(&self.label()[common_prefix_len..], value, child, None); if let Some(sibling) = self.take_sibling() { *self = sibling; } Some(node) } else if common_prefix_len == self.label().len() { self.child_mut() .and_then(|child| child.split_by_prefix(next, level + 1)) .inspect(|_old| { self.try_reclaim_child(); self.try_merge_with_child(level); }) } else if common_prefix_len == 0 && prefix.cmp_first_item(self.label()).is_ge() { self.sibling_mut() .and_then(|sibling| sibling.split_by_prefix(next, level)) .inspect(|_old| { self.try_reclaim_sibling(); }) } else { None } } pub(crate) fn remove(&mut self, key: &K, level: usize) -> Option { let (next, common_prefix_len) = key.strip_common_prefix_and_len(self.label()); if common_prefix_len == self.label().len() { if next.is_empty() { self.take_value().inspect(|_old| { self.try_merge_with_child(level); }) } else { self.child_mut() .and_then(|child| child.remove(next, level + 1)) .inspect(|_old| { self.try_reclaim_child(); self.try_merge_with_child(level); }) } } else if common_prefix_len == 0 && key.cmp_first_item(self.label()).is_ge() { self.sibling_mut() .and_then(|sibling| sibling.remove(next, level)) .inspect(|_old| { self.try_reclaim_sibling(); }) } else { None } } pub(crate) fn insert(&mut self, key: &K, value: V) -> Option { if key.cmp_first_item(self.label()).is_lt() { let this = Node { ptr: self.ptr, _value: PhantomData, }; let node = Node::new(key.as_bytes(), Some(value), None, Some(this)); self.ptr = node.ptr; mem::forget(node); return None; } let (next, common_prefix_len) = key.strip_common_prefix_and_len(self.label()); let is_label_matched = common_prefix_len == self.label().len(); if next.as_bytes().is_empty() { if is_label_matched { let old = self.take_value(); self.set_value(value); old } else { self.split_at(common_prefix_len); self.set_value(value); None } } else if is_label_matched { if let Some(child) = self.child_mut() { return child.insert(next, value); } let child = Node::new(next.as_bytes(), Some(value), None, None); self.set_child(child); None } else if common_prefix_len == 0 { if let Some(sibling) = self.sibling_mut() { return sibling.insert(next, value); } let sibling = Node::new(next.as_bytes(), Some(value), None, None); self.set_sibling(sibling); None } else { self.split_at(common_prefix_len); assert_some!(self.child_mut()).insert(next, value); None } } pub(crate) fn flags(&self) -> Flags { Flags::from_bits_truncate(unsafe { *self.ptr }) } fn set_flags(&mut self, other: Flags, value: bool) { let mut flags = self.flags(); flags.set(other, value); unsafe { ptr::write(self.ptr, flags.bits()) }; } fn label_len(&self) -> usize { unsafe { *self.ptr.offset(LABEL_LEN_OFFSET) as usize } } fn value_offset(&self) -> Option { let flags = self.flags(); if flags.contains(Flags::VALUE_ALLOCATED) { let layout = Self::initial_layout(self.label_len()); let offset = layout.extend(Layout::new::()).expect("unreachable").1; Some(offset as isize) } else { None } } fn child_offset(&self) -> Option { let flags = self.flags(); if flags.contains(Flags::CHILD_ALLOCATED) { let mut layout = Self::initial_layout(self.label_len()); if flags.contains(Flags::VALUE_ALLOCATED) { layout = layout.extend(Layout::new::()).expect("unreachable").0; } let offset = layout.extend(Layout::new::()).expect("unreachable").1; Some(offset as isize) } else { None } } fn sibling_offset(&self) -> Option { let flags = self.flags(); if flags.contains(Flags::SIBLING_ALLOCATED) { let mut layout = Self::initial_layout(self.label_len()); if flags.contains(Flags::VALUE_ALLOCATED) { layout = layout.extend(Layout::new::()).expect("unreachable").0; } if flags.contains(Flags::CHILD_ALLOCATED) { layout = layout.extend(Layout::new::()).expect("unreachable").0; } let offset = layout.extend(Layout::new::()).expect("unreachable").1; Some(offset as isize) } else { None } } fn split_at(&mut self, position: usize) { debug_assert!(position < self.label_len()); let value = self.take_value(); let child = self.take_child(); let sibling = self.take_sibling(); let child = Node::new(&self.label()[position..], value, child, None); let parent = Node::new(&self.label()[..position], None, Some(child), sibling); *self = parent; } fn try_reclaim_sibling(&mut self) { let flags = assert_some!(self.sibling()).flags(); if flags.intersects(Flags::VALUE_INITIALIZED | Flags::CHILD_INITIALIZED) { return; } if let Some(sibling) = self.take_sibling().and_then(|mut n| n.take_sibling()) { self.set_sibling(sibling); } } fn try_reclaim_child(&mut self) { let flags = assert_some!(self.child()).flags(); if flags.intersects(Flags::VALUE_INITIALIZED | Flags::CHILD_INITIALIZED) { return; } if let Some(child) = self.take_child().and_then(|mut n| n.take_sibling()) { self.set_child(child); } } pub(crate) fn try_merge_with_child(&mut self, level: usize) { if level == 0 { return; } if self.flags().contains(Flags::VALUE_INITIALIZED) || !self.flags().contains(Flags::CHILD_INITIALIZED) { return; } let flags = assert_some!(self.child()).flags(); if !flags.contains(Flags::SIBLING_INITIALIZED) && (self.label_len() + assert_some!(self.child()).label_len()) <= MAX_LABEL_LEN { let mut child = assert_some!(self.take_child()); let sibling = self.take_sibling(); let value = child.take_value(); let grandchild = child.take_child(); let mut label = Vec::with_capacity(self.label_len() + child.label_len()); label.extend(self.label()); label.extend(child.label()); let node = Self::new(&label, value, grandchild, sibling); *self = node; } } #[inline] fn initial_layout(label_len: usize) -> Layout { Layout::from_size_align(LABEL_OFFSET as usize + label_len, 1).expect("unreachable") } } impl Drop for Node { fn drop(&mut self) { let _ = self.take_value(); let _ = self.take_child(); let _ = self.take_sibling(); let mut layout = Self::initial_layout(self.label_len()); if self.flags().contains(Flags::VALUE_ALLOCATED) { layout = layout.extend(Layout::new::()).expect("unreachable").0; } if self.flags().contains(Flags::CHILD_ALLOCATED) { layout = layout.extend(Layout::new::()).expect("unreachable").0; } if self.flags().contains(Flags::SIBLING_ALLOCATED) { layout = layout.extend(Layout::new::()).expect("unreachable").0; } unsafe { dealloc(self.ptr, layout.pad_to_align()) } } } impl Clone for Node { fn clone(&self) -> Self { let label = self.label(); let value = self.value().cloned(); let child = self.child().cloned(); let sibling = self.sibling().cloned(); Node::new(label, value, child, sibling) } } impl IntoIterator for Node { type Item = (usize, Node); type IntoIter = IntoIter; fn into_iter(self) -> Self::IntoIter { IntoIter { stack: vec![(0, self)], } } } /// An iterator which traverses the nodes in a tree, in depth first order. /// /// The first element of an item is the level of the traversing node. #[derive(Debug)] pub struct Iter<'a, V: 'a> { stack: Vec<(usize, &'a Node)>, } impl<'a, V: 'a> Iterator for Iter<'a, V> { type Item = (usize, &'a Node); fn next(&mut self) -> Option { if let Some((level, node)) = self.stack.pop() { if level != 0 { if let Some(sibling) = node.sibling() { self.stack.push((level, sibling)); } } if let Some(child) = node.child() { self.stack.push((level + 1, child)); } Some((level, node)) } else { None } } } /// A mutable iterator which traverses the nodes in a tree, in depth first order. /// /// The first element of an item is the level of the traversing node. #[derive(Debug)] pub struct IterMut<'a, V: 'a> { stack: Vec<(usize, &'a mut Node)>, } /// A reference to an immediate node (without child or sibling) with its /// label and a mutable reference to its value, if present. pub struct NodeMut<'a, V: 'a> { label: &'a [u8], value: Option<&'a mut V>, sibling: Option<&'a mut Node>, child: Option<&'a mut Node>, } impl<'a, V: 'a> NodeMut<'a, V> { /// Returns the label of the node. pub fn label(&self) -> &'a [u8] { self.label } /// Converts into a mutable reference to the value. pub fn into_value_mut(self) -> Option<&'a mut V> { self.value } } impl<'a, V: 'a> Iterator for IterMut<'a, V> { type Item = (usize, NodeMut<'a, V>); fn next(&mut self) -> Option { if let Some((level, node)) = self.stack.pop() { let mut node = node.as_mut(); if level != 0 { if let Some(sibling) = node.sibling.take() { self.stack.push((level, sibling)); } } if let Some(child) = node.child.take() { self.stack.push((level + 1, child)); } Some((level, node)) } else { None } } } /// An iterator over entries in that collects all values up to /// until the key stops matching. #[derive(Debug)] pub(crate) struct CommonPrefixesIter<'a, 'b, K: ?Sized, V> { key: &'b K, stack: Vec<(usize, &'a Node)>, } impl<'a, K, V> Iterator for CommonPrefixesIter<'a, '_, K, V> where K: ?Sized + BorrowedBytes, { type Item = (usize, &'a Node); fn next(&mut self) -> Option { while let Some((offset, node)) = self.stack.pop() { let key = self.key.strip_n_prefix(offset); let (_next, common_prefix_len) = key.strip_common_prefix_and_len(node.label()); if common_prefix_len == 0 && key.cmp_first_item(node.label()).is_ge() { if let Some(sibling) = node.sibling() { self.stack.push((offset, sibling)); } } if common_prefix_len == node.label().len() { let prefix_len = offset + common_prefix_len; if let Some(child) = node.child() { self.stack.push((prefix_len, child)); } return Some((prefix_len, node)); } } None } } /// An owning iterator which traverses the nodes in a tree, in depth first order. /// /// The first element of an item is the level of the traversing node. #[derive(Debug)] pub struct IntoIter { stack: Vec<(usize, Node)>, } impl Iterator for IntoIter { type Item = (usize, Node); fn next(&mut self) -> Option { if let Some((level, mut node)) = self.stack.pop() { if let Some(sibling) = node.take_sibling() { self.stack.push((level, sibling)); } if let Some(child) = node.take_child() { self.stack.push((level + 1, child)); } Some((level, node)) } else { None } } } #[cfg(test)] mod tests { use super::*; use crate::{PatriciaSet, StringPatriciaMap}; use std::str; #[test] fn root_works() { let node = Node::<()>::root(); assert!(node.label().is_empty()); assert!(node.value().is_none()); assert!(node.child().is_none()); assert!(node.sibling().is_none()); } #[test] fn new_works() { let node0 = Node::new("foo".as_ref(), Some(3), None, None); assert_eq!(node0.label(), b"foo"); assert_eq!(node0.value(), Some(&3)); assert_eq!(node0.child().map(|n| n.label()), None); assert_eq!(node0.sibling().map(|n| n.label()), None); let node1 = Node::new("bar".as_ref(), None, None, Some(node0)); assert_eq!(node1.label(), b"bar"); assert_eq!(node1.value(), None); assert_eq!(node1.child().map(|n| n.label()), None); assert_eq!(node1.sibling().map(|n| n.label()), Some(&b"foo"[..])); // If the length of a label name is longer than 255, it will be splitted to two nodes. let node2 = Node::new([b'a'; 256].as_ref(), Some(4), Some(node1), None); assert_eq!(node2.label(), [b'a'; 255].as_ref()); assert_eq!(node2.value(), None); assert_eq!(node2.child().map(|n| n.label()), Some(&b"a"[..])); assert_eq!(node2.sibling().map(|n| n.label()), None); assert_eq!(node2.child().unwrap().value(), Some(&4)); assert_eq!(node2.child().unwrap().child().unwrap().label(), b"bar"); } #[test] fn ietr_works() { let mut set = PatriciaSet::new(); set.insert("foo"); set.insert("bar"); set.insert("baz"); let root = set.into_node(); let nodes = root .iter() .map(|(level, node)| (level, node.label())) .collect::>(); assert_eq!( nodes, [ (0, "".as_ref()), (1, "ba".as_ref()), (2, "r".as_ref()), (2, "z".as_ref()), (1, "foo".as_ref()) ] ); } #[test] fn iter_mut_works() { let mut set = PatriciaSet::new(); set.insert("foo"); set.insert("bar"); set.insert("baz"); let mut root = set.into_node(); let nodes = root .iter_mut() .map(|(level, node)| (level, node.label())) .collect::>(); assert_eq!( nodes, [ (0, "".as_ref()), (1, "ba".as_ref()), (2, "r".as_ref()), (2, "z".as_ref()), (1, "foo".as_ref()) ] ); } #[test] fn long_label_works() { let node = Node::new(&[b'a'; 256][..], Some(10), None, None); assert_eq!(node.label(), &[b'a'; 255][..]); assert_eq!(node.value(), None); assert_eq!(node.child().is_some(), true); let child = node.child().unwrap(); assert_eq!(child.label(), b"a"); assert_eq!(child.value(), Some(&10)); } #[test] fn reclaim_works() { let mut set = ["123", "123456", "123abc", "123def"] .iter() .collect::(); assert_eq!( set_to_labels(&set), [(0, ""), (1, "123"), (2, "456"), (2, "abc"), (2, "def")] ); set.remove("123def"); assert_eq!( set_to_labels(&set), [(0, ""), (1, "123"), (2, "456"), (2, "abc")] ); set.remove("123456"); assert_eq!(set_to_labels(&set), [(0, ""), (1, "123"), (2, "abc")]); set.remove("123"); assert_eq!(set_to_labels(&set), [(0, ""), (1, "123abc")]); } #[test] fn get_longest_common_prefix_works() { let set = ["123", "123456", "1234_67", "123abc", "123def"] .iter() .collect::(); let lcp = |key| set.get_longest_common_prefix(key); assert_eq!(lcp(""), None); assert_eq!(lcp("12"), None); assert_eq!(lcp("123"), Some("123".as_bytes())); assert_eq!(lcp("1234"), Some("123".as_bytes())); assert_eq!(lcp("123456"), Some("123456".as_bytes())); assert_eq!(lcp("1234_6"), Some("123".as_bytes())); assert_eq!(lcp("123456789"), Some("123456".as_bytes())); } #[test] fn get_longest_common_prefix_mut_works() { let mut map = [ ("123", 1), ("123456", 2), ("1234_67", 3), ("123abc", 4), ("123def", 5), ] .iter() .cloned() .map(|(k, v)| (String::from(k), v)) .collect::>(); assert_eq!(map.get_longest_common_prefix_mut(""), None); assert_eq!(map.get_longest_common_prefix_mut("12"), None); assert_eq!( map.get_longest_common_prefix_mut("123"), Some(("123", &mut 1)) ); *map.get_longest_common_prefix_mut("123").unwrap().1 = 10; assert_eq!( map.get_longest_common_prefix_mut("1234"), Some(("123", &mut 10)) ); assert_eq!( map.get_longest_common_prefix_mut("123456"), Some(("123456", &mut 2)) ); *map.get_longest_common_prefix_mut("1234567").unwrap().1 = 20; assert_eq!( map.get_longest_common_prefix_mut("1234_6"), Some(("123", &mut 10)) ); assert_eq!( map.get_longest_common_prefix_mut("123456789"), Some(("123456", &mut 20)) ); } fn set_to_labels(set: &PatriciaSet) -> Vec<(usize, &str)> { set.as_node() .iter() .map(|(level, n)| (level, str::from_utf8(n.label()).unwrap())) .collect() } } patricia_tree-0.9.0/src/serialization.rs000064400000000000000000000217711046102023000164670ustar 00000000000000use crate::node::{Flags, Node}; use crate::{BorrowedBytes, GenericPatriciaMap, GenericPatriciaSet}; use serde::de::{Error, Visitor}; use serde::{Deserialize, Deserializer, Serialize, Serializer}; use std::borrow::{Borrow, Cow}; use std::marker::PhantomData; impl Serialize for GenericPatriciaSet { /// In order to serialize a [PatriciaSet], make sure you installed the crate /// with the feature `serde`. /// /// For example, in your `Cargo.toml`: /// ```toml /// [dependencies] /// patricia_tree = { version = "*", features = ["serde"] } /// ``` /// /// Read more about serialization / deserialization at the [serde] crate. fn serialize(&self, serializer: S) -> Result where S: Serializer, { self.as_node().serialize(serializer) } } impl Serialize for GenericPatriciaMap { /// In order to serialize a [PatriciaMap], make sure you installed the crate /// with the feature `serde`. /// /// For example, in your `Cargo.toml`: /// ```toml /// [dependencies] /// patricia_tree = { version = "*", features = ["serde"] } /// ``` /// /// Read more about serialization / deserialization at the [serde] crate. fn serialize(&self, serializer: S) -> Result where S: Serializer, { self.as_node().serialize(serializer) } } impl Serialize for Node { fn serialize(&self, serializer: S) -> Result where S: Serializer, { let mut tree_bytes = Vec::new(); let mut values = Vec::new(); let mut stack = vec![(0u16, self)]; while let Some((level, node)) = stack.pop() { tree_bytes.push(node.flags().bits()); tree_bytes.push(node.label().len() as u8); tree_bytes.push((level >> 8) as u8); tree_bytes.push(level as u8); tree_bytes.extend(node.label()); if let Some(value) = node.value() { values.push(value); } if let Some(sibling) = node.sibling() { stack.push((level, sibling)); } if let Some(child) = node.child() { stack.push((level + 1, child)); } } (Bytes(Cow::Owned(tree_bytes)), values).serialize(serializer) } } impl<'de, T: crate::Bytes> Deserialize<'de> for GenericPatriciaSet { /// In order to deserialize a [PatriciaSet], make sure you installed the crate /// with the feature `serde`. /// /// For example, in your `Cargo.toml`: /// ```toml /// [dependencies] /// patricia_tree = { version = "*", features = ["serde"] } /// ``` /// /// Read more about serialization / deserialization at the [serde] crate. fn deserialize(deserializer: D) -> Result where D: Deserializer<'de>, { KeyAndNode::::deserialize(deserializer) .map(|x| GenericPatriciaSet::from_node(x.node)) } } impl<'de, K: crate::Bytes, V: Deserialize<'de>> Deserialize<'de> for GenericPatriciaMap { /// In order to serialize a [PatriciaMap], make sure you installed the crate /// with the feature `serde`. /// /// For example, in your `Cargo.toml`: /// ```toml /// [dependencies] /// patricia_tree = { version = "*", features = ["serde"] } /// ``` /// /// Read more about serialization / deserialization at the [serde] crate. fn deserialize(deserializer: D) -> Result where D: Deserializer<'de>, { KeyAndNode::::deserialize(deserializer).map(|x| GenericPatriciaMap::from_node(x.node)) } } #[derive(Debug)] struct KeyAndNode { node: Node, _key: PhantomData, } impl<'de, K: crate::Bytes, V: Deserialize<'de>> Deserialize<'de> for KeyAndNode { fn deserialize(deserializer: D) -> Result where D: Deserializer<'de>, { let (tree_bytes, mut values): (Bytes<'de>, Vec) = Deserialize::deserialize(deserializer)?; values.reverse(); let mut tree_bytes = tree_bytes.0.as_ref(); let mut stack = Vec::new(); while !tree_bytes.is_empty() { if tree_bytes.len() < 4 { return Err(D::Error::custom("unexpected EOS")); } let flags = Flags::from_bits_truncate(tree_bytes[0]); let label_len = usize::from(tree_bytes[1]); let level = (u16::from(tree_bytes[2]) << 8) | u16::from(tree_bytes[3]); tree_bytes = &tree_bytes[4..]; if tree_bytes.len() < label_len { return Err(D::Error::custom("unexpected EOS")); } let mut node = Node::::new_for_decoding(flags, label_len as u8); node.label_mut().copy_from_slice(&tree_bytes[..label_len]); if !K::Borrowed::is_valid_bytes(node.label()) { return Err(D::Error::custom(format!( "malformed label bytes: {:?}", node.label() ))); } tree_bytes = &tree_bytes[label_len..]; if flags.contains(Flags::VALUE_INITIALIZED) { let value = values .pop() .ok_or_else(|| D::Error::custom("too few values"))?; node.set_value(value); } stack.push((level, node)); while let Some((level, node)) = stack.pop() { let flags = node.flags(); let has_child_or_sibling = (flags.contains(Flags::CHILD_ALLOCATED) && !flags.contains(Flags::CHILD_INITIALIZED)) || (flags.contains(Flags::SIBLING_ALLOCATED) && !flags.contains(Flags::SIBLING_INITIALIZED)); if has_child_or_sibling { stack.push((level, node)); break; } if let Some((last_level, last_node)) = stack.last_mut() { if level == *last_level { last_node.set_sibling(node); } else if level == *last_level + 1 { last_node.set_child(node); } else { return Err(D::Error::custom("invalid data")); } } else if level == 0 { return Ok(KeyAndNode { node, _key: PhantomData, }); } else { return Err(D::Error::custom("invalid data")); } } } Err(D::Error::custom("invalid data")) } } struct Bytes<'a>(Cow<'a, [u8]>); impl Serialize for Bytes<'_> { fn serialize(&self, serializer: S) -> Result where S: Serializer, { serializer.serialize_bytes(self.0.borrow()) } } impl<'de> Deserialize<'de> for Bytes<'de> { fn deserialize(deserializer: D) -> Result where D: Deserializer<'de>, { deserializer.deserialize_bytes(BytesVisitor) } } struct BytesVisitor; impl<'de> Visitor<'de> for BytesVisitor { type Value = Bytes<'de>; fn expecting(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result { write!(formatter, "a byte string") } fn visit_borrowed_bytes(self, v: &'de [u8]) -> Result where E: Error, { Ok(Bytes(Cow::Borrowed(v))) } fn visit_bytes(self, v: &[u8]) -> Result where E: Error, { Ok(Bytes(Cow::Owned(v.to_owned()))) } fn visit_seq(self, mut seq: A) -> Result where A: serde::de::SeqAccess<'de>, { let mut bytes = Vec::new(); while let Some(byte) = seq.next_element()? { bytes.push(byte); } Ok(Bytes(Cow::Owned(bytes))) } } #[cfg(test)] mod tests { use crate::PatriciaMap; #[test] fn serde_works() { let mut input = vec![ (Vec::from("foo"), 1u32), ("bar".into(), 2), ("baz".into(), 3), ]; input.sort(); let map: PatriciaMap = input.iter().cloned().collect(); let serialized = serde_json::to_vec(&map).unwrap(); let map: PatriciaMap = serde_json::from_slice(serialized.as_slice()).unwrap(); assert_eq!(map.len(), 3); assert_eq!(map.into_iter().collect::>(), input); } #[test] fn large_serde_works() { let mut input = (0..10000u32) .map(|i| (i.to_string().into_bytes(), i)) .collect::>(); input.sort(); let map: PatriciaMap = input.iter().cloned().collect(); let serialized = serde_json::to_vec(&map).unwrap(); let map: PatriciaMap = serde_json::from_slice(serialized.as_slice()).unwrap(); assert_eq!(map.len(), 10000); assert_eq!(map.into_iter().collect::>(), input); } } patricia_tree-0.9.0/src/set.rs000064400000000000000000000350321046102023000144000ustar 00000000000000//! A set based on a patricia tree. use crate::map::{self, GenericPatriciaMap}; #[cfg(any(feature = "serde", test))] use crate::node::Node; use crate::Bytes; use std::fmt; use std::iter::FromIterator; /// Patricia tree based set with [`Vec`] as key. pub type PatriciaSet = GenericPatriciaSet>; /// Patricia tree based set with [`String`] as key. pub type StringPatriciaSet = GenericPatriciaSet; /// Patricia tree based set. pub struct GenericPatriciaSet { map: GenericPatriciaMap, } impl GenericPatriciaSet { /// Makes a new empty [`GenericPatriciaSet`] instance. /// /// # Examples /// /// ``` /// use patricia_tree::PatriciaSet; /// /// let set = PatriciaSet::new(); /// assert!(set.is_empty()); /// ``` pub fn new() -> Self { GenericPatriciaSet { map: GenericPatriciaMap::new(), } } /// Returns the number of elements in this set. /// /// # Examples /// /// ``` /// use patricia_tree::PatriciaSet; /// /// let mut set = PatriciaSet::new(); /// set.insert("foo"); /// set.insert("bar"); /// assert_eq!(set.len(), 2); /// ``` pub fn len(&self) -> usize { self.map.len() } /// Returns true if this set contains no elements. /// /// # Examples /// /// ``` /// use patricia_tree::PatriciaSet; /// /// let mut set = PatriciaSet::new(); /// assert!(set.is_empty()); /// /// set.insert("foo"); /// assert!(!set.is_empty()); /// /// set.clear(); /// assert!(set.is_empty()); /// ``` pub fn is_empty(&self) -> bool { self.len() == 0 } /// Clears this set, removing all values. /// /// # Examples /// /// ``` /// use patricia_tree::PatriciaSet; /// /// let mut set = PatriciaSet::new(); /// set.insert("foo"); /// set.clear(); /// assert!(set.is_empty()); /// ``` pub fn clear(&mut self) { self.map.clear(); } #[cfg(feature = "serde")] pub(crate) fn from_node(node: Node<()>) -> Self { Self { map: GenericPatriciaMap::from_node(node), } } #[cfg(any(test, feature = "serde"))] pub(crate) fn as_node(&self) -> &Node<()> { self.map.as_node() } #[cfg(test)] pub(crate) fn into_node(self) -> Node<()> { self.map.into_node() } } impl GenericPatriciaSet { /// Returns `true` if this set contains a value. /// /// # Examples /// /// ``` /// use patricia_tree::PatriciaSet; /// /// let mut set = PatriciaSet::new(); /// set.insert("foo"); /// assert!(set.contains("foo")); /// assert!(!set.contains("bar")); /// ``` pub fn contains>(&self, value: U) -> bool { self.map.get(value).is_some() } /// Finds the longest common prefix of `value` and the elements in this set. /// /// # Examples /// /// ``` /// use patricia_tree::PatriciaSet; /// /// let mut set = PatriciaSet::new(); /// /// set.insert("foo"); /// set.insert("foobar"); /// assert_eq!(set.get_longest_common_prefix("fo"), None); /// assert_eq!(set.get_longest_common_prefix("foo"), Some("foo".as_bytes())); /// assert_eq!(set.get_longest_common_prefix("fooba"), Some("foo".as_bytes())); /// assert_eq!(set.get_longest_common_prefix("foobar"), Some("foobar".as_bytes())); /// assert_eq!(set.get_longest_common_prefix("foobarbaz"), Some("foobar".as_bytes())); /// ``` pub fn get_longest_common_prefix<'a, U>(&self, value: &'a U) -> Option<&'a T::Borrowed> where U: ?Sized + AsRef, { self.map.get_longest_common_prefix(value).map(|x| x.0) } /// Returns the longest common prefix length of `value` and the elements in this set. /// /// Unlike `get_longest_common_prefix()`, this method does not check if there is a element that matches the prefix in this set. /// /// # Examples /// /// ``` /// use patricia_tree::PatriciaSet; /// /// let mut set = PatriciaSet::new(); /// set.insert("foo"); /// set.insert("foobar"); /// assert_eq!(set.longest_common_prefix_len("fo"), 2); /// assert_eq!(set.longest_common_prefix_len("foo"), 3); /// assert_eq!(set.longest_common_prefix_len("fooba"), 5); /// assert_eq!(set.longest_common_prefix_len("foobar"), 6); /// assert_eq!(set.longest_common_prefix_len("foobarbaz"), 6); /// assert_eq!(set.longest_common_prefix_len("foba"), 2); /// ``` pub fn longest_common_prefix_len(&self, value: &U) -> usize where U: ?Sized + AsRef, { self.map.longest_common_prefix_len(value) } /// Adds a value to this set. /// /// If the set did not have this value present, `true` is returned. /// If the set did have this value present, `false` is returned, and the entry is not updated. /// /// # Examples /// /// ``` /// use patricia_tree::PatriciaSet; /// /// let mut set = PatriciaSet::new(); /// assert!(set.insert("foo")); /// assert!(!set.insert("foo")); /// assert_eq!(set.len(), 1); /// ``` pub fn insert>(&mut self, value: U) -> bool { self.map.insert(value, ()).is_none() } /// Removes a value from the set. Returns `true` is the value was present in this set. /// /// # Examples /// /// ``` /// use patricia_tree::PatriciaSet; /// /// let mut set = PatriciaSet::new(); /// set.insert("foo"); /// assert_eq!(set.remove("foo"), true); /// assert_eq!(set.remove("foo"), false); /// ``` pub fn remove>(&mut self, value: U) -> bool { self.map.remove(value).is_some() } /// Splits the set into two at the given prefix. /// /// The returned set contains all the entries that prefixed by `prefix`. /// /// # Examples /// /// ``` /// use patricia_tree::PatriciaSet; /// /// let mut a = PatriciaSet::new(); /// a.insert("rust"); /// a.insert("ruby"); /// a.insert("python"); /// a.insert("erlang"); /// /// let b = a.split_by_prefix("ru"); /// /// assert_eq!(a.iter().collect::>(), [b"erlang", b"python"]); /// assert_eq!(b.iter().collect::>(), [b"ruby", b"rust"]); /// ``` pub fn split_by_prefix>(&mut self, prefix: U) -> Self { GenericPatriciaSet { map: self.map.split_by_prefix(prefix), } } /// Gets an iterator over the contents of this set, in sorted order. /// /// # Examples /// /// ``` /// use patricia_tree::PatriciaSet; /// /// let mut set = PatriciaSet::new(); /// set.insert("foo"); /// set.insert("bar"); /// set.insert("baz"); /// /// assert_eq!(set.iter().collect::>(), [Vec::from("bar"), "baz".into(), "foo".into()]); /// ``` pub fn iter(&self) -> Iter { Iter(self.map.keys()) } } impl GenericPatriciaSet { /// Gets an iterator over the contents having the given prefix of this set, in sorted order. /// /// # Examples /// /// ``` /// use patricia_tree::PatriciaSet; /// /// let mut set = PatriciaSet::new(); /// set.insert("foo"); /// set.insert("bar"); /// set.insert("baz"); /// /// assert_eq!(set.iter_prefix(b"ba").collect::>(), [Vec::from("bar"), "baz".into()]); /// ``` pub fn iter_prefix<'a, 'b>(&'a self, prefix: &'b T::Borrowed) -> impl 'a + Iterator where 'b: 'a, { self.map.iter_prefix(prefix).map(|(k, _)| k) } } impl fmt::Debug for GenericPatriciaSet { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { f.debug_set().entries(self.iter()).finish() } } impl Clone for GenericPatriciaSet { fn clone(&self) -> Self { GenericPatriciaSet { map: self.map.clone(), } } } impl Default for GenericPatriciaSet { fn default() -> Self { GenericPatriciaSet::new() } } impl IntoIterator for GenericPatriciaSet { type Item = T; type IntoIter = IntoIter; fn into_iter(self) -> Self::IntoIter { IntoIter(self.map.into_iter()) } } impl> FromIterator for GenericPatriciaSet { fn from_iter(iter: I) -> Self where I: IntoIterator, { let mut set = GenericPatriciaSet::new(); for t in iter { set.insert(t); } set } } impl> Extend for GenericPatriciaSet { fn extend(&mut self, iter: I) where I: IntoIterator, { for t in iter { self.insert(t); } } } /// An Iterator over a `PatriciaSet`'s items. #[derive(Debug)] pub struct Iter<'a, T>(map::Keys<'a, T, ()>); impl Iterator for Iter<'_, T> { type Item = T; fn next(&mut self) -> Option { self.0.next() } } /// An owning iterator over a `PatriciaSet`'s items. #[derive(Debug)] pub struct IntoIter(map::IntoIter); impl Iterator for IntoIter { type Item = T; fn next(&mut self) -> Option { self.0.next().map(|(k, _)| k) } } #[cfg(test)] mod tests { use super::*; #[test] fn debug_works() { let set: PatriciaSet = vec!["foo", "bar", "baz"].into_iter().collect(); assert_eq!( format!("{:?}", set), "{[98, 97, 114], [98, 97, 122], [102, 111, 111]}" ); } #[test] fn clear_works() { let mut set = PatriciaSet::new(); set.insert("foo"); assert!(!set.is_empty()); set.clear(); assert!(set.is_empty()); } #[test] fn into_iter_works() { let set: PatriciaSet = vec!["foo", "bar", "baz"].into_iter().collect(); assert_eq!( set.into_iter().collect::>(), [Vec::from("bar"), "baz".into(), "foo".into()] ); } #[test] fn split_by_prefix_works() { let mut set: PatriciaSet = vec!["foo", "bar", "baz"].into_iter().collect(); let splitted_set = set.split_by_prefix(""); assert!(set.is_empty()); assert_eq!( splitted_set.iter().collect::>(), [b"bar", b"baz", b"foo"] ); let mut set: PatriciaSet = vec!["foo", "bar", "baz"].into_iter().collect(); let splitted_set = set.split_by_prefix("f"); assert_eq!(set.iter().collect::>(), [b"bar", b"baz"]); assert_eq!(splitted_set.iter().collect::>(), [b"foo"]); let mut set: PatriciaSet = vec!["foo", "bar", "baz"].into_iter().collect(); let splitted_set = set.split_by_prefix("fo"); assert_eq!(set.iter().collect::>(), [b"bar", b"baz"]); assert_eq!(splitted_set.iter().collect::>(), [b"foo"]); let mut set: PatriciaSet = vec!["foo", "bar", "baz"].into_iter().collect(); let splitted_set = set.split_by_prefix("foo"); assert_eq!(set.iter().collect::>(), [b"bar", b"baz"]); assert_eq!(splitted_set.iter().collect::>(), [b"foo"]); let mut set: PatriciaSet = vec!["foo", "bar", "baz"].into_iter().collect(); let splitted_set = set.split_by_prefix("b"); assert_eq!(set.iter().collect::>(), [b"foo"]); assert_eq!(splitted_set.iter().collect::>(), [b"bar", b"baz"]); let mut set: PatriciaSet = vec!["foo", "bar", "baz"].into_iter().collect(); let splitted_set = set.split_by_prefix("ba"); assert_eq!(set.iter().collect::>(), [b"foo"]); assert_eq!(splitted_set.iter().collect::>(), [b"bar", b"baz"]); let mut set: PatriciaSet = vec!["foo", "bar", "baz"].into_iter().collect(); let splitted_set = set.split_by_prefix("bar"); assert_eq!(set.iter().collect::>(), [b"baz", b"foo"]); assert_eq!(splitted_set.iter().collect::>(), [b"bar"]); let mut set: PatriciaSet = vec!["foo", "bar", "baz"].into_iter().collect(); let mut splitted_set = set.split_by_prefix("baz"); assert_eq!(set.iter().collect::>(), [b"bar", b"foo"]); assert_eq!(splitted_set.iter().collect::>(), [b"baz"]); splitted_set.insert("aaa"); assert_eq!(splitted_set.iter().collect::>(), [b"aaa", b"baz"]); let mut set: PatriciaSet = vec!["foo", "bar", "baz"].into_iter().collect(); let splitted_set = set.split_by_prefix("bazz"); assert_eq!(set.iter().collect::>(), [b"bar", b"baz", b"foo"]); assert!(splitted_set.is_empty()); let mut set: PatriciaSet = vec!["foo", "bar", "baz"].into_iter().collect(); let splitted_set = set.split_by_prefix("for"); assert_eq!(set.iter().collect::>(), [b"bar", b"baz", b"foo"]); assert!(splitted_set.is_empty()); let mut set: PatriciaSet = vec!["foo", "bar", "baz"].into_iter().collect(); let splitted_set = set.split_by_prefix("qux"); assert_eq!(set.iter().collect::>(), [b"bar", b"baz", b"foo"]); assert!(splitted_set.is_empty()); } #[test] fn iter_prefix_works() { fn assert_iter_prefix(set: &PatriciaSet, prefix: &str) { let actual = set.iter_prefix(prefix.as_bytes()).collect::>(); let expected = set .iter() .filter(|key| key.starts_with(prefix.as_bytes())) .collect::>(); assert_eq!(actual, expected); } let set: PatriciaSet = vec!["foo", "bar", "baz"].into_iter().collect(); let prefixes = [ "", "a", "b", "ba", "bar", "baz", "bax", "c", "f", "fo", "foo", ]; for prefix in &prefixes { assert_iter_prefix(&set, prefix); } let set: PatriciaSet = vec![ "JavaScript", "Python", "Java", "C++", "Swift", "TypeScript", "Go", "SQL", "Ruby", "R", "PHP", "Perl", "Kotlin", "C#", "Rust", "Scheme", "Erlang", "Scala", "Elixir", "Haskell", ] .into_iter() .collect(); let prefixes = [ "", "P", "Py", "J", "Jav", "Java", "JavaS", "Rusti", "E", "El", "H", "S", "Sc", ]; for prefix in &prefixes { assert_iter_prefix(&set, prefix); } } } patricia_tree-0.9.0/src/tree.rs000064400000000000000000000205661046102023000145520ustar 00000000000000use crate::{ node::{self, Node, NodeMut}, BorrowedBytes, }; #[derive(Debug, Clone)] pub struct PatriciaTree { root: Node, len: usize, } impl PatriciaTree { pub fn new() -> Self { PatriciaTree { root: Node::root(), len: 0, } } #[cfg(any(test, feature = "serde"))] pub fn root(&self) -> &Node { &self.root } #[cfg(test)] pub fn into_root(self) -> Node { self.root } pub fn insert(&mut self, key: &K, value: V) -> Option { if let Some(old) = self.root.insert(key, value) { Some(old) } else { self.len += 1; None } } pub fn get(&self, key: &K) -> Option<&V> { self.root.get(key) } pub fn get_mut(&mut self, key: &K) -> Option<&mut V> { self.root.get_mut(key) } pub fn longest_common_prefix_len(&self, key: &K) -> usize { self.root.longest_common_prefix_len(key, 0) } pub fn get_longest_common_prefix<'a, K: ?Sized + BorrowedBytes>( &self, key: &'a K, ) -> Option<(&'a [u8], &V)> { self.root .get_longest_common_prefix(key, 0) .map(|(n, v)| (&key.as_bytes()[..n], v)) } pub fn get_longest_common_prefix_mut<'a, K: ?Sized + BorrowedBytes>( &mut self, key: &'a K, ) -> Option<(&'a [u8], &mut V)> { self.root .get_longest_common_prefix_mut(key, 0) .map(|(n, v)| (&key.as_bytes()[..n], v)) } pub fn iter_prefix(&self, prefix: &K) -> Option<(usize, Nodes)> { if let Some((common_prefix_len, node)) = self.root.get_prefix_node(prefix) { let nodes = Nodes { nodes: node.iter_descendant(), label_lens: Vec::new(), }; Some((prefix.as_bytes().len() - common_prefix_len, nodes)) } else { None } } pub fn iter_prefix_mut( &mut self, prefix: &K, ) -> Option<(usize, NodesMut)> { if let Some((common_prefix_len, node)) = self.root.get_prefix_node_mut(prefix) { let nodes = NodesMut { nodes: node.iter_descendant_mut(), label_lens: Vec::new(), }; Some((prefix.as_bytes().len() - common_prefix_len, nodes)) } else { None } } pub(crate) fn common_prefixes<'a, 'b, K>( &'a self, key: &'b K, ) -> node::CommonPrefixesIter<'a, 'b, K, V> where K: ?Sized + BorrowedBytes, { self.root.common_prefixes(key) } pub fn remove(&mut self, key: &K) -> Option { if let Some(old) = self.root.remove(key, 0) { self.len -= 1; Some(old) } else { None } } pub fn split_by_prefix(&mut self, prefix: &K) -> Self { if let Some(splitted_root) = self.root.split_by_prefix(prefix, 0) { let mut splitted_root = Node::new(prefix.as_bytes(), None, Some(splitted_root), None); splitted_root.try_merge_with_child(1); let splitted = Self::from(Node::new(b"", None, Some(splitted_root), None)); self.len -= splitted.len(); splitted } else { Self::new() } } pub fn clear(&mut self) { self.root = Node::root(); self.len = 0; } pub fn len(&self) -> usize { self.len } pub fn nodes(&self) -> Nodes { Nodes { nodes: self.root.iter(), label_lens: Vec::new(), } } pub fn nodes_mut(&mut self) -> NodesMut { NodesMut { nodes: self.root.iter_mut(), label_lens: Vec::new(), } } pub fn into_nodes(self) -> IntoNodes { IntoNodes { nodes: self.root.into_iter(), label_lens: Vec::new(), } } } impl Default for PatriciaTree { fn default() -> Self { Self::new() } } impl From> for PatriciaTree { fn from(f: Node) -> Self { let mut this = PatriciaTree { root: f, len: 0 }; let count = this.nodes().filter(|n| n.1.value().is_some()).count(); this.len = count; this } } #[derive(Debug)] pub struct Nodes<'a, V: 'a> { nodes: node::Iter<'a, V>, label_lens: Vec, } impl<'a, V: 'a> Iterator for Nodes<'a, V> { type Item = (usize, &'a Node); fn next(&mut self) -> Option { if let Some((level, node)) = self.nodes.next() { self.label_lens.resize(level + 1, 0); self.label_lens[level] = node.label().len(); let parent_key_len = self.label_lens.iter().take(level).sum(); Some((parent_key_len, node)) } else { None } } } #[derive(Debug)] pub struct NodesMut<'a, V: 'a> { nodes: node::IterMut<'a, V>, label_lens: Vec, } impl<'a, V: 'a> Iterator for NodesMut<'a, V> { type Item = (usize, NodeMut<'a, V>); fn next(&mut self) -> Option { if let Some((level, node)) = self.nodes.next() { self.label_lens.resize(level + 1, 0); self.label_lens[level] = node.label().len(); let parent_key_len = self.label_lens.iter().take(level).sum(); Some((parent_key_len, node)) } else { None } } } #[derive(Debug)] pub struct IntoNodes { nodes: node::IntoIter, label_lens: Vec, } impl Iterator for IntoNodes { type Item = (usize, Node); fn next(&mut self) -> Option { if let Some((level, node)) = self.nodes.next() { self.label_lens.resize(level + 1, 0); self.label_lens[level] = node.label().len(); let parent_key_len = self.label_lens.iter().take(level).sum(); Some((parent_key_len, node)) } else { None } } } #[cfg(test)] mod tests { use super::*; #[test] fn it_works() { let mut tree = PatriciaTree::new(); assert_eq!(tree.insert("".as_bytes(), 1), None); assert_eq!(tree.insert("".as_bytes(), 2), Some(1)); assert_eq!(tree.insert("foo".as_bytes(), 3), None); assert_eq!(tree.insert("foo".as_bytes(), 4), Some(3)); assert_eq!(tree.insert("foobar".as_bytes(), 5), None); assert_eq!(tree.insert("bar".as_bytes(), 6), None); assert_eq!(tree.insert("baz".as_bytes(), 7), None); assert_eq!(tree.insert("bar".as_bytes(), 7), Some(6)); assert_eq!(tree.insert("baz".as_bytes(), 8), Some(7)); assert_eq!(tree.get("".as_bytes()), Some(&2)); assert_eq!(tree.get("foo".as_bytes()), Some(&4)); assert_eq!(tree.get("foobar".as_bytes()), Some(&5)); assert_eq!(tree.get("bar".as_bytes()), Some(&7)); assert_eq!(tree.get("baz".as_bytes()), Some(&8)); assert_eq!(tree.get("qux".as_bytes()), None); let tree2 = tree.clone(); assert_eq!(tree2.get("".as_bytes()), Some(&2)); assert_eq!(tree2.get("foo".as_bytes()), Some(&4)); assert_eq!(tree2.get("foobar".as_bytes()), Some(&5)); assert_eq!(tree2.get("bar".as_bytes()), Some(&7)); assert_eq!(tree2.get("baz".as_bytes()), Some(&8)); assert_eq!(tree.remove("".as_bytes()), Some(2)); assert_eq!(tree.remove("foo".as_bytes()), Some(4)); assert_eq!(tree.remove("foobar".as_bytes()), Some(5)); assert_eq!(tree.remove("bar".as_bytes()), Some(7)); assert_eq!(tree.remove("baz".as_bytes()), Some(8)); assert_eq!(tree.remove("qux".as_bytes()), None); assert_eq!(tree.get("".as_bytes()), None); assert_eq!(tree.get("foo".as_bytes()), None); assert_eq!(tree.get("foobar".as_bytes()), None); assert_eq!(tree.get("bar".as_bytes()), None); assert_eq!(tree.get("baz".as_bytes()), None); assert_eq!(tree.get("qux".as_bytes()), None); assert_eq!(tree2.get("".as_bytes()), Some(&2)); assert_eq!(tree2.get("foo".as_bytes()), Some(&4)); assert_eq!(tree2.get("foobar".as_bytes()), Some(&5)); assert_eq!(tree2.get("bar".as_bytes()), Some(&7)); assert_eq!(tree2.get("baz".as_bytes()), Some(&8)); } }