bk-tree-0.5.0/.cargo_vcs_info.json0000644000000001360000000000100123670ustar { "git": { "sha1": "2753aeac4a1c8bc540e85e56a70c47df863565c5" }, "path_in_vcs": "" }bk-tree-0.5.0/.gitignore000075500000000000000000000047141046102023000131600ustar 00000000000000 # Created by https://www.gitignore.io/api/rust,sublimetext,intellij+all ### Intellij+all ### # Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio and Webstorm # Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839 # User-specific stuff: .idea/**/workspace.xml .idea/**/tasks.xml .idea/dictionaries # Sensitive or high-churn files: .idea/**/dataSources/ .idea/**/dataSources.ids .idea/**/dataSources.xml .idea/**/dataSources.local.xml .idea/**/sqlDataSources.xml .idea/**/dynamic.xml .idea/**/uiDesigner.xml # Gradle: .idea/**/gradle.xml .idea/**/libraries # CMake cmake-build-debug/ # Mongo Explorer plugin: .idea/**/mongoSettings.xml ## File-based project format: *.iws ## Plugin-specific files: # IntelliJ /out/ # mpeltonen/sbt-idea plugin .idea_modules/ # JIRA plugin atlassian-ide-plugin.xml # Cursive Clojure plugin .idea/replstate.xml # Ruby plugin and RubyMine /.rakeTasks # Crashlytics plugin (for Android Studio and IntelliJ) com_crashlytics_export_strings.xml crashlytics.properties crashlytics-build.properties fabric.properties ### Intellij+all Patch ### # Ignores the whole idea folder # See https://github.com/joeblau/gitignore.io/issues/186 and https://github.com/joeblau/gitignore.io/issues/360 .idea/ ### Rust ### # Generated by Cargo # will have compiled files and executables /target/ # Remove Cargo.lock from gitignore if creating an executable, leave it for libraries # More information here http://doc.crates.io/guide.html#cargotoml-vs-cargolock Cargo.lock # These are backup files generated by rustfmt **/*.rs.bk ### SublimeText ### # cache files for sublime text *.tmlanguage.cache *.tmPreferences.cache *.stTheme.cache # workspace files are user-specific *.sublime-workspace # project files should be checked into the repository, unless a significant # proportion of contributors will probably not be using SublimeText *.sublime-project # sftp configuration file sftp-config.json # Package control specific files Package Control.last-run Package Control.ca-list Package Control.ca-bundle Package Control.system-ca-bundle Package Control.cache/ Package Control.ca-certs/ Package Control.merged-ca-bundle Package Control.user-ca-bundle oscrypto-ca-bundle.crt bh_unicode_properties.cache # Sublime-github package stores a github token in this file # https://packagecontrol.io/packages/sublime-github GitHub.sublime-settings # End of https://www.gitignore.io/api/rust,sublimetext,intellij+allbk-tree-0.5.0/.travis.yml000075500000000000000000000004411046102023000132720ustar 00000000000000language: rust rust: - stable - beta - nightly script: - cargo build --verbose - cargo test --verbose - cargo doc after_success: curl https://raw.githubusercontent.com/kmcallister/travis-doc-upload/master/travis-doc-upload.sh | sh notifications: email: on_success: never bk-tree-0.5.0/Cargo.toml0000644000000023600000000000100103660ustar # THIS FILE IS AUTOMATICALLY GENERATED BY CARGO # # When uploading crates to the registry Cargo will automatically # "normalize" Cargo.toml files for maximal compatibility # with all versions of Cargo and also rewrite `path` dependencies # to registry (e.g., crates.io) dependencies. # # If you are reading this file be aware that the original Cargo.toml # will likely look very different (and much more reasonable). # See Cargo.toml.orig for the original contents. [package] name = "bk-tree" version = "0.5.0" authors = ["Eugene Bulkin "] description = "A Rust BK-tree implementation" documentation = "http://eugene-bulkin.github.io/rust-docs/rust-bk-tree/bk_tree/index.html" readme = "README.md" keywords = [ "fuzzy", "search", "BK-tree", ] categories = [ "data-structures", "text-processing", ] license = "MIT" repository = "https://github.com/eugene-bulkin/rust-bk-tree" [dependencies.fnv] version = "1.0.7" optional = true [dependencies.serde] version = "1.0" features = ["derive"] optional = true [dependencies.triple_accel] version = "0.3.4" [dev-dependencies.bincode] version = "1.3" [dev-dependencies.rand] version = "0.3" [features] default = ["enable-fnv"] enable-fnv = ["fnv"] serde = ["dep:serde"] bk-tree-0.5.0/Cargo.toml.orig000075500000000000000000000013101046102023000140440ustar 00000000000000[package] name = "bk-tree" version = "0.5.0" authors = ["Eugene Bulkin "] description = "A Rust BK-tree implementation" documentation = "http://eugene-bulkin.github.io/rust-docs/rust-bk-tree/bk_tree/index.html" repository = "https://github.com/eugene-bulkin/rust-bk-tree" readme = "README.md" license = "MIT" keywords = ["fuzzy", "search", "BK-tree"] categories = ["data-structures", "text-processing"] [dependencies] fnv = { version = "1.0.7", optional = true } triple_accel = "0.3.4" serde = { version = "1.0", features = ["derive"], optional = true } [dev-dependencies] rand = "0.3" bincode = "1.3" [features] default = ["enable-fnv"] enable-fnv = ["fnv"] serde = ["dep:serde"] bk-tree-0.5.0/LICENSE000075500000000000000000000020701046102023000121660ustar 00000000000000The MIT License (MIT) Copyright (c) 2016 Eugene Bulkin Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. bk-tree-0.5.0/README.md000075500000000000000000000021771046102023000124500ustar 00000000000000# rust-bk-tree A BK-tree implementation in Rust. [![Build Status](https://travis-ci.org/eugene-bulkin/rust-bk-tree.svg?branch=master)](https://travis-ci.org/eugene-bulkin/rust-bk-tree) [![Crates.io](https://img.shields.io/crates/v/bk-tree.svg)](https://crates.io/crates/bk-tree) [![Clippy Linting Result](http://clippy.bashy.io/github/eugene-bulkin/rust-bk-tree/master/badge.svg)](http://clippy.bashy.io/github/eugene-bulkin/rust-bk-tree/master/log) [Documentation](https://docs.rs/bk-tree/) # Examples Here's some example usages: ```rust use bk_tree::{BKTree, metrics}; // A BK-tree using the Levenshtein distance metric. let mut tree: BKTree<&str> = BKTree::new(metrics::levenshtein); tree.add("foo"); tree.add("bar"); tree.add("baz"); tree.add("bup"); tree.find("bar", 0); // returns vec!["bar"] tree.find("bar", 1); // returns vec!["bar", "baz"] tree.find("bup", 2); // returns vec!["bar", "baz", "bup"] ``` # Benchmarks To run benchmarks, you need to have the nightly version of Rust installed. If you do (and use [multirust](/brson/multirust), for example), then you can run ``` rustup run nightly cargo bench ``` to run benchmarks. bk-tree-0.5.0/benches/lib.rs000075500000000000000000000034361046102023000137130ustar 00000000000000#![feature(test)] extern crate test; extern crate bk_tree; extern crate rand; use bk_tree::BKTree; use bk_tree::metrics::Levenshtein; use test::Bencher; use rand::{thread_rng, Rng}; fn make_words(rng: &mut R, n: i32) -> Vec { let mut words: Vec = Vec::new(); for _ in 1..n { let l = rng.gen_range(4, 12); let s: String = rng.gen_ascii_chars().take(l).collect(); words.push(s); } words } #[bench] fn bench_find_exact(b: &mut Bencher) { let mut tree: BKTree = BKTree::new(Levenshtein); let words = make_words(&mut thread_rng(), 1000); let word = words.last().unwrap().clone(); tree.extend(words); b.iter(|| { tree.find_exact(&word) }); } #[bench] fn bench_find_tol_one(b: &mut Bencher) { let mut tree: BKTree = BKTree::new(Levenshtein); let words = make_words(&mut thread_rng(), 1000); let word = words.last().unwrap().clone(); tree.extend(words); b.iter(|| { tree.find(&word, 1) }); } #[bench] fn bench_find_tol_two(b: &mut Bencher) { let mut tree: BKTree = BKTree::new(Levenshtein); let words = make_words(&mut thread_rng(), 1000); let word = words.last().unwrap().clone(); tree.extend(words); b.iter(|| { tree.find(&word, 2) }); } #[bench] fn bench_add(b: &mut Bencher) { let words = make_words(&mut thread_rng(), 1000); b.iter(move || { let mut tree: BKTree = BKTree::new(Levenshtein); for word in words.clone() { tree.add(word); } }); } #[bench] fn bench_extend(b: &mut Bencher) { let words = make_words(&mut thread_rng(), 1000); b.iter(move || { let mut tree: BKTree = BKTree::new(Levenshtein); tree.extend(words.clone()); }); } bk-tree-0.5.0/scripts/id_rsa.enc000075500000000000000000000062601046102023000146050ustar 00000000000000;63\H{{?6n_A2bkʋ2L,6iʂÿz<^Fkqn/D>Luw_/@nK#Aw;6l}5Tn8ہgեySqBle?+M?7mˣB.R,jLpNf*;8&=J/RC+d]@ lsF(j| c(mJ(2`{T睶U] zj1Ny_Neg&SnD zA#OCsAF 5۴ jPq)[-l.L]0cifV; :X' 6qxNP4c}оH yo"TLwq+ld˳1"u[˖龉td Nu àetčj]]7H_1{y~liypp\b~Yp6./9VسI]B TU|"=l A;Jf޿v/ 45ѝLmX wScqOs$>$?{#^;B(|˨̓~yP-8UE>GY.Mbjb񠤩ڂoo.5)NÜDݠv+XAgpt汉ˑCGZa6՘#g$%hDoHLUp> ݑHӹ~4} ig`yE>%jr\uG3-]pʶSp\wP5[Lg &m *b)4SCCeyq# WL 虮5hi7<{fWE' AQdJ#3ɎG;`mgMxA7ESsؚ(vsO y ׊ٿDVj.Ksقc?Bs1JdeC ^;C d!t640ɶShұP)4 zʽut-Y[._=&

EqL5=V@ʹu14* BE*!`f+$LKZ4Kgno.%39KpT5+(|uזq뙧Y4N |!N%gFxd ێg_^G4p jХƵ6ilNذ9J2> jƭsz%.j혹k#6t&S1/tNEǗ`7׺$xWZ¡'& >DL1/@!o^\nY0U_F/KKfF3bޛ1K fdš,b <A\јf,39⌑K{qV/_qi{tG/%=գ_pO7lhУxCMuŦV3 Q)j mZ*uZ?A%#L( |w&Ioz R&UyZbk-tree-0.5.0/scripts/travis-doc-upload.cfg000075500000000000000000000001371046102023000166700ustar 00000000000000PROJECT_NAME=rust-bk-tree DOCS_REPO=eugene-bulkin/rust-docs.git SSH_KEY_TRAVIS_ID=1339badf3097 bk-tree-0.5.0/src/lib.rs000075500000000000000000000401241046102023000130660ustar 00000000000000#[cfg(feature = "serde")] extern crate serde; pub mod metrics; use std::{ borrow::Borrow, collections::VecDeque, fmt::{Debug, Formatter, Result as FmtResult}, iter::Extend, }; #[cfg(feature = "enable-fnv")] extern crate fnv; #[cfg(feature = "enable-fnv")] use fnv::FnvHashMap; #[cfg(not(feature = "enable-fnv"))] use std::collections::HashMap; /// A trait for a *metric* (distance function). /// /// Implementations should follow the metric axioms: /// /// * **Zero**: `distance(a, b) == 0` if and only if `a == b` /// * **Symmetry**: `distance(a, b) == distance(b, a)` /// * **Triangle inequality**: `distance(a, c) <= distance(a, b) + distance(b, c)` /// /// If any of these rules are broken, then the BK-tree may give unexpected /// results. pub trait Metric { fn distance(&self, a: &K, b: &K) -> u32; fn threshold_distance(&self, a: &K, b: &K, threshold: u32) -> Option; } /// A node within the [BK-tree](https://en.wikipedia.org/wiki/BK-tree). #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] struct BKNode { /// The key determining the node. key: K, /// A hash-map of children, indexed by their distance from this node based /// on the metric being used by the tree. #[cfg(feature = "enable-fnv")] children: FnvHashMap>, #[cfg(not(feature = "enable-fnv"))] children: HashMap>, max_child_distance: Option, } impl BKNode { /// Constructs a new `BKNode`. pub fn new(key: K) -> BKNode { BKNode { key, #[cfg(feature = "enable-fnv")] children: fnv::FnvHashMap::default(), #[cfg(not(feature = "enable-fnv"))] children: HashMap::default(), max_child_distance: None, } } /// Add a child to the node. /// /// Given the distance from this node's key, add the given key as a child /// node. *Warning:* this does not test the invariant that the distance as /// measured by the tree between this node's key and the provided key /// actually matches the distance passed in. /// /// # Examples /// /// ```ignore /// use bk_tree::BKNode; /// /// let mut foo = BKNode::new("foo"); /// foo.add_child(1, "fop"); /// ``` pub fn add_child(&mut self, distance: u32, key: K) { self.children.insert(distance, BKNode::new(key)); self.max_child_distance = self.max_child_distance.max(Some(distance)); } } impl Debug for BKNode where K: Debug, { fn fmt(&self, f: &mut Formatter) -> FmtResult { f.debug_map().entry(&self.key, &self.children).finish() } } /// A representation of a [BK-tree](https://en.wikipedia.org/wiki/BK-tree). #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] pub struct BKTree { /// The root node. May be empty if nothing has been put in the tree yet. root: Option>, /// The metric being used to determine the distance between nodes on the /// tree. metric: M, } impl BKTree where M: Metric, { /// Constructs a new `BKTree` using the provided metric. /// /// Note that we make no assumptions about the metric function provided. /// *Ideally* it is actually a /// [valid metric](https://en.wikipedia.org/wiki/Metric_(mathematics)), /// but you may choose to use one that is not technically a valid metric. /// If you do not use a valid metric, however, you may find that the tree /// behaves confusingly for some values. /// /// # Examples /// /// ``` /// use bk_tree::{BKTree, metrics}; /// /// let tree: BKTree<&str> = BKTree::new(metrics::Levenshtein); /// ``` pub fn new(metric: M) -> BKTree { BKTree { root: None, metric } } /// Adds a key to the tree. /// /// If the tree is empty, this simply sets the root to /// `Some(BKNode::new(key))`. Otherwise, we iterate downwards through the /// tree until we see a node that does not have a child with the same /// distance. If we encounter a node that is exactly the same distance from /// the root node, then the new key is the same as that node's key and so we /// do nothing. **Note**: This means that if your metric allows for unequal /// keys to return 0, you will see improper behavior! /// /// # Examples /// /// ``` /// use bk_tree::{BKTree, metrics}; /// /// let mut tree: BKTree<&str> = BKTree::new(metrics::Levenshtein); /// /// tree.add("foo"); /// tree.add("bar"); /// ``` pub fn add(&mut self, key: K) { match self.root { Some(ref mut root) => { let mut cur_node = root; let mut cur_dist = self.metric.distance(&cur_node.key, &key); while cur_node.children.contains_key(&cur_dist) && cur_dist > 0 { // We have to do some moving around here to safely get the // child corresponding to the current distance away without // accidentally trying to mutate the wrong thing. let current = cur_node; let next_node = current.children.get_mut(&cur_dist).unwrap(); cur_node = next_node; cur_dist = self.metric.distance(&cur_node.key, &key); } // If cur_dist == 0, we have landed on a node with the same key. if cur_dist > 0 { cur_node.add_child(cur_dist, key); } } None => { self.root = Some(BKNode::new(key)); } } } /// Searches for a key in the BK-tree given a certain tolerance. /// /// This traverses the tree searching for all keys with distance within /// `tolerance` of of the key provided. The tolerance may be zero, in which /// case this searches for exact matches. The results are returned as an /// iterator of `(distance, key)` pairs. /// /// *Note:* There is no guarantee on the order of elements yielded by the /// iterator. The elements returned may or may not be sorted in terms of /// distance from the provided key. /// /// # Examples /// ``` /// use bk_tree::{BKTree, metrics}; /// /// let mut tree: BKTree<&str> = BKTree::new(metrics::Levenshtein); /// /// tree.add("foo"); /// tree.add("fop"); /// tree.add("bar"); /// /// assert_eq!(tree.find("foo", 0).collect::>(), vec![(0, &"foo")]); /// assert_eq!(tree.find("foo", 1).collect::>(), vec![(0, &"foo"), (1, &"fop")]); /// assert!(tree.find("foz", 0).next().is_none()); /// ``` pub fn find<'a, 'q, Q: ?Sized>(&'a self, key: &'q Q, tolerance: u32) -> Find<'a, 'q, K, Q, M> where K: Borrow, M: Metric, { let candidates = if let Some(root) = &self.root { VecDeque::from(vec![root]) } else { VecDeque::new() }; Find { candidates, tolerance, metric: &self.metric, key, } } /// Searches for an exact match in the tree. /// /// This is equivalent to calling `find` with a tolerance of 0, then picking /// out the first result. /// /// # Examples /// ``` /// use bk_tree::{BKTree, metrics}; /// /// let mut tree: BKTree<&str> = BKTree::new(metrics::Levenshtein); /// /// tree.add("foo"); /// tree.add("fop"); /// tree.add("bar"); /// /// assert_eq!(tree.find_exact("foz"), None); /// assert_eq!(tree.find_exact("foo"), Some(&"foo")); /// ``` pub fn find_exact(&self, key: &Q) -> Option<&K> where K: Borrow, M: Metric, { self.find(key, 0).next().map(|(_, found_key)| found_key) } } impl> Extend for BKTree { /// Adds multiple keys to the tree. /// /// Given an iterator with items of type `K`, this method simply adds every /// item to the tree. /// /// # Examples /// /// ``` /// use bk_tree::{BKTree, metrics}; /// /// let mut tree: BKTree<&str> = BKTree::new(metrics::Levenshtein); /// /// tree.extend(vec!["foo", "bar"]); /// ``` fn extend>(&mut self, keys: I) { for key in keys { self.add(key); } } } impl> Default for BKTree { fn default() -> BKTree { BKTree::new(metrics::Levenshtein) } } /// Iterator for the results of `BKTree::find`. pub struct Find<'a, 'q, K: 'a, Q: 'q + ?Sized, M: 'a> { /// Iterator stack. Because of the inversion of control in play, we must /// implement the traversal using an explicit stack. candidates: VecDeque<&'a BKNode>, tolerance: u32, metric: &'a M, key: &'q Q, } impl<'a, 'q, K, Q: ?Sized, M> Iterator for Find<'a, 'q, K, Q, M> where K: Borrow, M: Metric, { type Item = (u32, &'a K); fn next(&mut self) -> Option<(u32, &'a K)> { while let Some(current) = self.candidates.pop_front() { let BKNode { key, children, max_child_distance, } = current; let distance_cutoff = max_child_distance.unwrap_or(0) + self.tolerance; let cur_dist = self.metric.threshold_distance( self.key, current.key.borrow() as &Q, distance_cutoff, ); if let Some(dist) = cur_dist { // Find the first child node within an appropriate distance let min_dist = dist.saturating_sub(self.tolerance); let max_dist = dist.saturating_add(self.tolerance); for (dist, child_node) in &mut children.iter() { if min_dist <= *dist && *dist <= max_dist { self.candidates.push_back(child_node); } } // If this node is also close enough to the key, yield it if dist <= self.tolerance { return Some((dist, &key)); } } } None } } #[cfg(test)] mod tests { extern crate bincode; use std::fmt::Debug; use {BKNode, BKTree}; fn assert_eq_sorted<'t, T: 't, I>(left: I, right: &[(u32, T)]) where T: Ord + Debug, I: Iterator, { let mut left_mut: Vec<_> = left.collect(); let mut right_mut: Vec<_> = right.iter().map(|&(dist, ref key)| (dist, key)).collect(); left_mut.sort(); right_mut.sort(); assert_eq!(left_mut, right_mut); } #[test] fn node_construct() { let node: BKNode<&str> = BKNode::new("foo"); assert_eq!(node.key, "foo"); assert!(node.children.is_empty()); } #[test] fn tree_construct() { let tree: BKTree<&str> = Default::default(); assert!(tree.root.is_none()); } #[test] fn tree_add() { let mut tree: BKTree<&str> = Default::default(); tree.add("foo"); match tree.root { Some(ref root) => { assert_eq!(root.key, "foo"); } None => { assert!(false); } } tree.add("fop"); tree.add("f\u{e9}\u{e9}"); match tree.root { Some(ref root) => { assert_eq!(root.children.get(&1).unwrap().key, "fop"); assert_eq!(root.children.get(&4).unwrap().key, "f\u{e9}\u{e9}"); } None => { assert!(false); } } } #[test] fn tree_extend() { let mut tree: BKTree<&str> = Default::default(); tree.extend(vec!["foo", "fop"]); match tree.root { Some(ref root) => { assert_eq!(root.key, "foo"); } None => { assert!(false); } } assert_eq!(tree.root.unwrap().children.get(&1).unwrap().key, "fop"); } #[test] fn tree_find() { /* * This example tree is from * https://nullwords.wordpress.com/2013/03/13/the-bk-tree-a-data-structure-for-spell-checking/ */ let mut tree: BKTree<&str> = Default::default(); tree.add("book"); tree.add("books"); tree.add("cake"); tree.add("boo"); tree.add("cape"); tree.add("boon"); tree.add("cook"); tree.add("cart"); assert_eq_sorted(tree.find("caqe", 1), &[(1, "cake"), (1, "cape")]); assert_eq_sorted(tree.find("cape", 1), &[(1, "cake"), (0, "cape")]); assert_eq_sorted( tree.find("book", 1), &[ (0, "book"), (1, "books"), (1, "boo"), (1, "boon"), (1, "cook"), ], ); assert_eq_sorted(tree.find("book", 0), &[(0, "book")]); assert!(tree.find("foobar", 1).next().is_none()); } #[test] fn tree_find_exact() { let mut tree: BKTree<&str> = Default::default(); tree.add("book"); tree.add("books"); tree.add("cake"); tree.add("boo"); tree.add("cape"); tree.add("boon"); tree.add("cook"); tree.add("cart"); assert_eq!(tree.find_exact("caqe"), None); assert_eq!(tree.find_exact("cape"), Some(&"cape")); assert_eq!(tree.find_exact("book"), Some(&"book")); } #[test] fn one_node_tree() { let mut tree: BKTree<&str> = Default::default(); tree.add("book"); tree.add("book"); assert_eq!(tree.root.unwrap().children.len(), 0); } #[cfg(feature = "serde")] #[test] fn test_serialization() { let mut tree: BKTree<&str> = Default::default(); tree.add("book"); tree.add("books"); tree.add("cake"); tree.add("boo"); tree.add("cape"); tree.add("boon"); tree.add("cook"); tree.add("cart"); // Test exact search (zero tolerance) assert_eq_sorted(tree.find("book", 0), &[(0, "book")]); assert_eq_sorted(tree.find("books", 0), &[(0, "books")]); assert_eq_sorted(tree.find("cake", 0), &[(0, "cake")]); assert_eq_sorted(tree.find("boo", 0), &[(0, "boo")]); assert_eq_sorted(tree.find("cape", 0), &[(0, "cape")]); assert_eq_sorted(tree.find("boon", 0), &[(0, "boon")]); assert_eq_sorted(tree.find("cook", 0), &[(0, "cook")]); assert_eq_sorted(tree.find("cart", 0), &[(0, "cart")]); // Test fuzzy search assert_eq_sorted( tree.find("book", 1), &[ (0, "book"), (1, "books"), (1, "boo"), (1, "boon"), (1, "cook"), ], ); // Test for false positives assert_eq!(None, tree.find_exact("This &str hasn't been added")); let encoded_tree: Vec = bincode::serialize(&tree).unwrap(); let decoded_tree: BKTree<&str> = bincode::deserialize(&encoded_tree[..]).unwrap(); // Test exact search (zero tolerance) assert_eq_sorted(decoded_tree.find("book", 0), &[(0, "book")]); assert_eq_sorted(decoded_tree.find("books", 0), &[(0, "books")]); assert_eq_sorted(decoded_tree.find("cake", 0), &[(0, "cake")]); assert_eq_sorted(decoded_tree.find("boo", 0), &[(0, "boo")]); assert_eq_sorted(decoded_tree.find("cape", 0), &[(0, "cape")]); assert_eq_sorted(decoded_tree.find("boon", 0), &[(0, "boon")]); assert_eq_sorted(decoded_tree.find("cook", 0), &[(0, "cook")]); assert_eq_sorted(decoded_tree.find("cart", 0), &[(0, "cart")]); // Test fuzzy search assert_eq_sorted( decoded_tree.find("book", 1), &[ (0, "book"), (1, "books"), (1, "boo"), (1, "boon"), (1, "cook"), ], ); // Test for false positives assert_eq!(None, decoded_tree.find_exact("This &str hasn't been added")); } } bk-tree-0.5.0/src/metrics.rs000075500000000000000000000025311046102023000137660ustar 00000000000000//! This is a collection of string metrics that are suitable for use with a //! BK-tree. #[cfg(feature = "serde")] extern crate serde; use Metric; extern crate triple_accel; use self::triple_accel::{levenshtein, levenshtein::levenshtein_simd_k}; /// This calculates the Levenshtein distance between two strings. /// /// The [distance metric itself][1] is calculated using the [Wagner-Fischer][2] /// dynamic programming algorithm. /// /// # Examples /// /// ``` /// use bk_tree::Metric; /// use bk_tree::metrics::Levenshtein; /// /// assert_eq!(Levenshtein.distance("bar", "baz"), 1); /// assert_eq!(Levenshtein.distance("kitten", "sitting"), 3); /// ``` /// /// [1]: https://en.wikipedia.org/wiki/Levenshtein_distance /// [2]: https://en.wikipedia.org/wiki/Wagner%E2%80%93Fischer_algorithm #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] pub struct Levenshtein; impl + ?Sized> Metric for Levenshtein { fn distance(&self, a: &K, b: &K) -> u32 { let a_bytes = a.as_ref().as_bytes(); let b_bytes = b.as_ref().as_bytes(); levenshtein(a_bytes, b_bytes) } fn threshold_distance(&self, a: &K, b: &K, threshold: u32) -> Option { let a_bytes = a.as_ref().as_bytes(); let b_bytes = b.as_ref().as_bytes(); levenshtein_simd_k(a_bytes, b_bytes, threshold) } }