binary-merge-0.1.2/.cargo_vcs_info.json0000644000000001120000000000100134070ustar { "git": { "sha1": "8cc484abe1efdaf72a9bb6ff19fe200a6c5afaec" } } binary-merge-0.1.2/.github/workflows/rust.yml000064400000000000000000000007630072674642500173600ustar 00000000000000name: Rust on: push: branches: [ master ] pull_request: branches: [ master ] env: CARGO_TERM_COLOR: always jobs: build: runs-on: ubuntu-latest steps: - uses: actions/checkout@v2 - name: fmt run: cargo fmt --all -- --check - name: clippy run: cargo --locked clippy --all-targets -- -D warnings - name: Build run: cargo build --all-features --locked --verbose - name: Run tests run: cargo test --all-features --locked --verbose binary-merge-0.1.2/.gitignore000064400000000000000000000000250072674642500142220ustar 00000000000000**/target **/*.rs.bk binary-merge-0.1.2/Cargo.toml0000644000000020700000000000100114120ustar # THIS FILE IS AUTOMATICALLY GENERATED BY CARGO # # When uploading crates to the registry Cargo will automatically # "normalize" Cargo.toml files for maximal compatibility # with all versions of Cargo and also rewrite `path` dependencies # to registry (e.g., crates.io) dependencies. # # If you are reading this file be aware that the original Cargo.toml # will likely look very different (and much more reasonable). # See Cargo.toml.orig for the original contents. [package] edition = "2021" name = "binary-merge" version = "0.1.2" authors = ["Rüdiger Klaehn "] description = "Minimum comparison merge of two sorted sequences with random access" homepage = "https://github.com/rklaehn" readme = "README.md" keywords = ["array", "sorting", "merging"] categories = ["algorithms"] license = "MIT OR Apache-2.0" repository = "https://github.com/rklaehn/binary-merge" resolver = "2" [lib] doctest = false [[bench]] name = "merge" harness = false [dependencies] [dev-dependencies.criterion] version = "0.3.5" [dev-dependencies.proptest] version = "1.0.0" binary-merge-0.1.2/Cargo.toml.orig000064400000000000000000000012250072674642500151240ustar 00000000000000[package] name = "binary-merge" version = "0.1.2" edition = "2021" authors = ["Rüdiger Klaehn "] description = "Minimum comparison merge of two sorted sequences with random access" repository = "https://github.com/rklaehn/binary-merge" license = "MIT OR Apache-2.0" keywords = ["array", "sorting", "merging"] categories = ["algorithms"] readme = "README.md" homepage = "https://github.com/rklaehn" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] [dev-dependencies] criterion = "0.3.5" proptest = "1.0.0" [[bench]] name = "merge" harness = false [lib] doctest = false binary-merge-0.1.2/README.md000064400000000000000000000241560072674642500135240ustar 00000000000000# Minimum comparison merge of two sorted random acces sequences ## Problem At the end of 2014, I was thinking about what would be the most efficient way to merge two sorted sequences. The answer is obviously trivial if you consider *copying* elements to be roughly as expensive as *comparing* elements. In that case, simply compare the *first remaining element* of each sequence and take the smaller one, until you run out of elements in one of the sequences, then just copy the rest. But in many cases this the assumption that comparing is as expensive as copying is not true. Let's say you have a sequence of `BigInt`, `Rational`, very long `String` or complex tuples. In that case *comparing* two elements will be *several orders of magnitude* more expensive than *copying* an element. So let's consider the case where only the number of comparisons matters, and the copying is considered to be essentially free (Copying a pointer is just about the cheapest operation you can do. You can literally copy millions of pointers in less than a millisecond on a modern machine). In that case, the seemingly trivial problem of merging two sorted lists turns into a problem that has *10 pages of [TAOCP](https://en.wikipedia.org/wiki/The_Art_of_Computer_Programming)* devoted to it (Volume 3, Pages 197-207, **Minimum-Comparison Merging**) So I did what you usually do in this situation: [ask on stackexchange](http://programmers.stackexchange.com/questions/267406/algorithm-to-merge-two-sorted-arrays-with-minimum-number-of-comparisons). Given that this should be a pretty common problem, I was expecting an answer like "you obviously have to use the Foo-Bar algorithm described in 1969 by XYZ". But to my surprise, the algorithm that was posted as the answer, despite being called [A simple algorithm for merging two disjoint linearly-ordered sets (F. K. Hwang , S. Lin)](http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.419.8292), is not very simple. It is asymptotically optimal, but too complex to degrade well in the case that the comparison is relatively cheap. Also, it is pretty complex to implement. So I tried to come up with a simpler solution. ## Cases There are several cases that have to be considered when merging two sorted sequences. Coming up with a good solution for any of these cases is simple. The challenge is to come up with a solution that works well for **all** of the cases and that gracefully degrades in the worst case. a) Merging long sequence with single element sequence ```js a = [1,2,3,4,6,7,8,9,10] b = [5] ``` The best solution in this case is to do a binary search for the insertion point of the single element of `b` in `a`, then just copy - the part of `a` that is below `b[0]` - the element `b[0]` - the part of `a` that is above `b[0]` Obviously it would be possible to just special-case this solution. But that would be unelegant and in any case would not help in case b) b) Merging a long sequence and a short sequence ```js a = [1,2,4,5,6,7,9,10] b = [3,8] ``` In this case you might be tempted to just insert all elements of the smaller list into the larger list, doing binary searches for each insert. But that would be less than optimal. From the insertion position of the first element, we know which elements are definitely smaller than the second element and thus do not have to be compared, so we can restrict the range of the second binary search based on the result of the first. c) Merging two large sequences which are non-overlapping ```js a = [1,2,3,4,5] b = [6,7,8,9,10] ``` This is a case where you can expect huge performance gains, because you just have to copy one list after the other. You could detect this case by comparing the first element of one sequence with the last element of the other sequence and vice versa. But the cost of that comparison will be overhead in other cases, so you can only justify this if you know that this case is very common (which we don't). d) Merging two completely interleaved sequences ```js a = [1,3,5,7,9] b = [2,4,6,8,10] ``` This is the worst case, where it won't be possible to get better results than the linear merge. Any good algorithm should gracefully handle this case without doing much more than m + n - 1 comparisons. Depending on what you expect as the average case, doing twice as many comparisons might still be OK. But e.g. *o(m log n)* comparisons, like you would get by inserting all *n* elements from the smaller list into the larger list with *m* elements, would *not* be ok. ## Coming up with a good algorithm Being a functional programmer, I think that the most elegant algorithms are recursive. So let's think about how a recursion step would look like. ### Naming Let's use `a0` and `a1` for the first (inclusive) and last (exclusive) index of `a` that we're currently interested in. Likewise, `b0` and `b1` for the first (inclusive) and last (exclusive) index of `b` that we're currently interested in. ### The base cases Before we start thinking about complex things, let's consider the base case(s). Merging a section of a sequence with an *empty* section of another sequence means just copying over all elements of interest from that sequence to the target sequence. So if `a0` is `a1`, just copy everything from `b0` until `b1` to the result, and vice versa. ### The first comparison It is clear that we have to gain the maximum information from each comparison in order to limit the number of comparisons to the minimum. So it seems intuitively obvious that we have to compare the *middle* element of `a` with the *middle* element of `b`. No matter what the result of the comparison is, we have 50% of all elements in `a` that we never again have to compare with 50% of the elements in `b`. We have gained information for a quarter of all possible comparisons with just a single comparison. If you had a table of size m \* n with each cell being a possible comparison, executing the comparison at the *center* of the table allows you to eliminate an entire quadrant of the table. | | 5 | 6 | 7 | 8 | 9 | |---|---|---|---|---|---| | 1 | | | > | > | > | | 3 | | | > | > | > | | 5 | | | > | > | > | | 7 | | | | | | | 9 | | | | | | ``` am = (a0 + a1) / 2 bm = (b0 + b1) / 2 ```` `a(am) < b(bm)`, so *all* elements `a[i], a0 ≤ i ≤ am` are smaller than *all* elements `b[j], bm ≤ j < b1`. ### The recursion step Now that know what we have to do for the first comparison, what do we do with it? What I came up with is the following: we look for the *insertion index* of the center element of `a` in `b`, using a binary search. The first comparison done by the binary search will be exactly as described above. Once we have the result, which we shall call `bm`, we can recurse. We have to merge elements `a0 until am` from `a` with all elements `b0 until bm` from `b`. Then we have to copy the single element `a(am)` to the result, and finally merge elements `am + 1 until a1` from `a` with all elements `bm + 1 until b1` from `b`. And that's it. Here is our code, for the case that `a` and `b` are disjoint ordered sets. ```rust fn binary_merge(&self, m: &mut M, an: usize, bn: usize) -> bool { if an == 0 { bn == 0 || self.from_b(m, bn) } else if bn == 0 { an == 0 || self.from_a(m, an) } else { // neither a nor b are 0 let am: usize = an / 2; // pick the center element of a and find the corresponding one in b using binary search let a = &m.a_slice()[am]; match m.b_slice()[..bn].binary_search_by(|b| self.cmp(a, b).reverse()) { Ok(bm) => { // same elements. bm is the index corresponding to am // merge everything below am with everything below the found element bm self.binary_merge(m, am, bm) && // add the elements a(am) and b(bm) self.collision(m) && // merge everything above a(am) with everything above the found element self.binary_merge(m, an - am - 1, bn - bm - 1) } Err(bi) => { // not found. bi is the insertion point // merge everything below a(am) with everything below the found insertion point bi self.binary_merge(m, am, bi) && // add a(am) self.from_a(m, 1) && // everything above a(am) with everything above the found insertion point self.binary_merge(m, an - am - 1, bn - bi) } } } } ``` Note that while this method is using recursion, it is not referentially transparent. The result sequence is built in the methods fromA and fromB using a mutable builder for efficiency. Of course, you will typically wrap this algorithm in a referentially transparent way. Also note that the [version in spire](https://github.com/rklaehn/spire/blob/eb70e8e89f669c1cdb731cacf5398c4f9e0dd3f7/core/shared/src/main/scala/spire/math/Merging.scala#L61) is slightly more complex, because it also works for the case where there are common elements in `a` and `b`, and because it is sometimes an advantage to have the insertion point. Here is an [example](https://github.com/rklaehn/spire/blob/eb70e8e89f669c1cdb731cacf5398c4f9e0dd3f7/core/shared/src/main/scala/spire/math/Merging.scala#L101) how the merging strategy is used to merge two sorted `Array[T]` given an `Order[T]`. ## Behavior for the cases described above a) Merging long list with single element list It might seem that the algorithm is not symmetric. But at least for the case of merging a large list with a single element list, the algorithm boils down to a binary search in both cases. b) Merging a long list and a small list The algorithm will use the information from the comparison of both middle elements to avoid unnecessary comparisons c) Merging two long non-overlapping lists The algorithm will figure out in O(log n) in the first recursion step that the lists are disjoint, and then just copy them d) Merging interleaved lists This is tricky, but tests with counting comparisons have indicated that the maximum number of comparisons is never much more than `m + n - 1`. binary-merge-0.1.2/benches/merge.rs000064400000000000000000000120170072674642500153120ustar 00000000000000use binary_merge::{MergeOperation, MergeState}; use criterion::{black_box, criterion_group, criterion_main, Criterion}; use std::{any::type_name, ops::Range, rc::Rc}; struct VecMergeState<'a, T> { a: std::slice::Iter<'a, T>, b: std::slice::Iter<'a, T>, r: Vec, } impl<'a, T> MergeState for VecMergeState<'a, T> { type A = T; type B = T; fn a_slice(&self) -> &[Self::A] { self.a.as_slice() } fn b_slice(&self) -> &[Self::B] { self.b.as_slice() } } struct BinaryMergeUnion; impl<'a, T: Ord + Clone> MergeOperation> for BinaryMergeUnion { fn from_a(&self, m: &mut VecMergeState<'a, T>, n: usize) -> bool { m.r.extend((&mut m.a).cloned().take(n)); true } fn from_b(&self, m: &mut VecMergeState<'a, T>, n: usize) -> bool { m.r.extend((&mut m.b).cloned().take(n)); true } fn collision(&self, m: &mut VecMergeState<'a, T>) -> bool { m.r.extend((&mut m.a).cloned().take(1)); m.b.next(); true } fn cmp(&self, a: &T, b: &T) -> std::cmp::Ordering { a.cmp(b) } } struct TapeMergeUnion; impl<'a, T: Ord + Clone> MergeOperation> for TapeMergeUnion { fn from_a(&self, m: &mut VecMergeState<'a, T>, n: usize) -> bool { m.r.extend((&mut m.a).cloned().take(n)); true } fn from_b(&self, m: &mut VecMergeState<'a, T>, n: usize) -> bool { m.r.extend((&mut m.b).cloned().take(n)); true } fn collision(&self, m: &mut VecMergeState<'a, T>) -> bool { m.r.extend((&mut m.a).cloned().take(1)); m.b.next(); true } fn cmp(&self, a: &T, b: &T) -> std::cmp::Ordering { a.cmp(b) } const MCM_THRESHOLD: usize = usize::MAX; } /// binary merge union fn binary_merge_union(a: &[T], b: &[T]) -> Vec { let mut state = VecMergeState { a: a.iter(), b: b.iter(), r: Vec::with_capacity(a.len().max(b.len())), }; BinaryMergeUnion.merge(&mut state); state.r } /// tape merge union fn tape_merge_union(a: &[T], b: &[T]) -> Vec { let mut state = VecMergeState { a: a.iter(), b: b.iter(), r: Vec::with_capacity(a.len().max(b.len())), }; TapeMergeUnion.merge(&mut state); state.r } /// handrolled version of a tape merge, just as a baseline fn tape_merge_union_handrolled(a: &[T], b: &[T]) -> Vec { let mut res = Vec::with_capacity(a.len().max(b.len())); let mut ai = 0; let mut bi = 0; while ai < a.len() && bi < b.len() { match a[ai].cmp(&b[bi]) { std::cmp::Ordering::Less => { res.push(a[ai].clone()); ai += 1; } std::cmp::Ordering::Equal => { res.push(a[ai].clone()); ai += 1; bi += 1; } std::cmp::Ordering::Greater => { res.push(b[bi].clone()); bi += 1; } } } res.extend_from_slice(&a[ai..]); res.extend_from_slice(&b[bi..]); res } fn union_benches( a: Range, b: Range, f: impl Fn(usize) -> T + Copy, name: &str, c: &mut Criterion, ) { let name = format!("{} T={} a={:?} b={:?}", name, type_name::(), a, b); let ae = a.map(f).collect::>(); let be = b.map(f).collect::>(); c.bench_function(&format!("union {} binary merge", name), |bencher| { bencher.iter(|| binary_merge_union(black_box(&ae), black_box(&be))) }); c.bench_function(&format!("union {} tape merge", name), |bencher| { bencher.iter(|| tape_merge_union(black_box(&ae), black_box(&be))) }); c.bench_function( &format!("union {} tape merge, handrolled", name), |bencher| bencher.iter(|| tape_merge_union_handrolled(black_box(&ae), black_box(&be))), ); } fn full_overlap(c: &mut Criterion) { union_benches(0..1000, 0..1000, |x| x, "full_overlap", c); } fn partial_overlap(c: &mut Criterion) { union_benches(0..1000, 500..1500, |x| x, "partial_overlap", c); } fn no_overlap(c: &mut Criterion) { union_benches(0..1000, 1000..2000, |x| x, "no_overlap", c); } fn insertion(c: &mut Criterion) { union_benches(0..2000, 232..233, |x| x, "insert", c); } fn insertion_rev_0(f: impl Fn(usize) -> T + Copy, c: &mut Criterion) { union_benches(1234..1235, 0..2000, f, "insert", c); } fn insertion_rev_usize(c: &mut Criterion) { insertion_rev_0(|x| x, c) } fn insertion_rev_ratio(c: &mut Criterion) { insertion_rev_0( |x| { // make a thing that is cheap to copy, but expensive to compare let mut data = vec![0u8; 4096 - 8]; data.extend_from_slice(&(x as u64).to_be_bytes()); Rc::<[u8]>::from(data) }, c, ) } criterion_group!( benches, full_overlap, partial_overlap, no_overlap, insertion, insertion_rev_usize, insertion_rev_ratio, ); criterion_main!(benches); binary-merge-0.1.2/proptest-regressions/test.txt000064400000000000000000000006250072674642500202010ustar 00000000000000# Seeds for failure cases proptest has generated in the past. It is # automatically read and these particular cases re-run before any # novel cases are generated. # # It is recommended to check this file in to source control so that # everyone who runs the test benefits from these saved cases. cc 92405fa49411d99f857356ff2954dbd8e218a318b0ae1335f0301201670b7307 # shrinks to mut a = [], mut b = [44, 44] binary-merge-0.1.2/src/lib.rs000064400000000000000000000125220072674642500141420ustar 00000000000000#![doc = include_str!("../README.md")] #[cfg(test)] mod test; use core::cmp::Ordering; /// The read part of the merge state that is needed for the binary merge algorithm /// it just needs random access for the remainder of a and b /// /// Very often A and B are the same type, but this is not strictly necessary pub trait MergeState { /// Element type for a type A; /// Element type for b type B; /// The remaining data in a fn a_slice(&self) -> &[Self::A]; /// The remaining data in b fn b_slice(&self) -> &[Self::B]; } /// A binary merge operation /// /// It is often useful to keep the merge operation and the merge state separate. E.g. computing the /// intersection and checking if the intersection exists can be done with the same operation, but /// a different merge state. Likewise in-place operations and operations that produce a new entity /// can use the same merge operation. THerefore, the merge state is an additional parameter.SortedPairIter /// /// The operation itself will often be a zero size struct pub trait MergeOperation { /// Take n elements from a, return true to continue operation fn from_a(&self, m: &mut M, n: usize) -> bool; /// Take n elements from b, return true to continue operation fn from_b(&self, m: &mut M, n: usize) -> bool; /// Take 1 element from both a and b, return true to continue operation fn collision(&self, m: &mut M) -> bool; /// The comparison operation fn cmp(&self, a: &M::A, b: &M::B) -> Ordering; /// merge `an` elements from a and `bn` elements from b into the result /// /// This is a minimum comparison merge that has some overhead, so it is only worth /// it for larger collections and if the comparison operation is expensive. /// /// It does make a big difference e.g. when merging a very large and a very small sequence, /// or two disjoint sequences. /// /// returns false if the operation was prematurely aborted fn binary_merge(&self, m: &mut M, an: usize, bn: usize) -> bool { if an == 0 { bn == 0 || self.from_b(m, bn) } else if bn == 0 { an == 0 || self.from_a(m, an) } else { // neither a nor b are 0 let am: usize = an / 2; // pick the center element of a and find the corresponding one in b using binary search let a = &m.a_slice()[am]; match m.b_slice()[..bn].binary_search_by(|b| self.cmp(a, b).reverse()) { Ok(bm) => { // same elements. bm is the index corresponding to am // merge everything below am with everything below the found element bm self.binary_merge(m, am, bm) && // add the elements a(am) and b(bm) self.collision(m) && // merge everything above a(am) with everything above the found element self.binary_merge(m, an - am - 1, bn - bm - 1) } Err(bi) => { // not found. bi is the insertion point // merge everything below a(am) with everything below the found insertion point bi self.binary_merge(m, am, bi) && // add a(am) self.from_a(m, 1) && // everything above a(am) with everything above the found insertion point self.binary_merge(m, an - am - 1, bn - bi) } } } } /// This is the classical tape merge algorithm, useful for when either /// the number of elements is small or the comparison operation is very cheap. fn tape_merge(&self, m: &mut M) -> bool { loop { if let Some(a) = m.a_slice().first() { if let Some(b) = m.b_slice().first() { // something left in both a and b if !match self.cmp(a, b) { Ordering::Less => self.from_a(m, 1), Ordering::Equal => self.collision(m), Ordering::Greater => self.from_b(m, 1), } { return false; } } else { // b is empty, add the rest of a break m.a_slice().is_empty() || self.from_a(m, m.a_slice().len()); } } else { // a is empty, add the rest of b break m.b_slice().is_empty() || self.from_b(m, m.b_slice().len()); } } } fn merge(&self, m: &mut M) -> bool { let an = m.a_slice().len(); let bn = m.b_slice().len(); // only use the minimum comparison merge when it is worth it if an > Self::MCM_THRESHOLD || bn > Self::MCM_THRESHOLD { self.binary_merge(m, an, bn) } else { self.tape_merge(m) } } /// Threshold above which we use the minimum comparison merge /// /// For very small collections, the tape merge has a similar number of comparisons /// and requires less state. /// /// Also, if the comparison operation is very cheap, the minimum comparison merge /// does not have big advantages. /// /// Set this to 0 to always do minimum comparison merge. /// Set it to usize::max to always do tape merge. const MCM_THRESHOLD: usize = 8; } binary-merge-0.1.2/src/test.rs000064400000000000000000000114340072674642500143540ustar 00000000000000use std::collections::BTreeSet; use crate::{MergeOperation, MergeState}; use proptest::prelude::*; struct VecMergeState<'a, T> { a: std::slice::Iter<'a, T>, b: std::slice::Iter<'a, T>, r: Vec, } impl<'a, T> MergeState for VecMergeState<'a, T> { type A = T; type B = T; fn a_slice(&self) -> &[Self::A] { self.a.as_slice() } fn b_slice(&self) -> &[Self::B] { self.b.as_slice() } } struct BoolMergeState<'a, T> { a: std::slice::Iter<'a, T>, b: std::slice::Iter<'a, T>, r: bool, } impl<'a, T> MergeState for BoolMergeState<'a, T> { type A = T; type B = T; fn a_slice(&self) -> &[Self::A] { self.a.as_slice() } fn b_slice(&self) -> &[Self::B] { self.b.as_slice() } } struct Union; impl<'a, T: Ord + Copy> MergeOperation> for Union { fn from_a(&self, m: &mut VecMergeState<'a, T>, n: usize) -> bool { m.r.extend((&mut m.a).cloned().take(n)); true } fn from_b(&self, m: &mut VecMergeState<'a, T>, n: usize) -> bool { m.r.extend((&mut m.b).cloned().take(n)); true } fn collision(&self, m: &mut VecMergeState<'a, T>) -> bool { m.r.extend((&mut m.a).cloned().take(1)); m.b.next(); true } fn cmp(&self, a: &T, b: &T) -> std::cmp::Ordering { a.cmp(b) } } struct Intersection; impl<'a, T: Ord + Copy> MergeOperation> for Intersection { fn from_a(&self, m: &mut VecMergeState<'a, T>, n: usize) -> bool { (&mut m.a).take(n).for_each(drop); true } fn from_b(&self, m: &mut VecMergeState<'a, T>, n: usize) -> bool { (&mut m.b).take(n).for_each(drop); true } fn collision(&self, m: &mut VecMergeState<'a, T>) -> bool { m.r.extend((&mut m.a).cloned().take(1)); m.b.next(); true } fn cmp(&self, a: &T, b: &T) -> std::cmp::Ordering { a.cmp(b) } } struct Intersects; impl<'a, T: Ord + Copy> MergeOperation> for Intersects { fn from_a(&self, m: &mut BoolMergeState<'a, T>, n: usize) -> bool { (&mut m.a).take(n).for_each(drop); true } fn from_b(&self, m: &mut BoolMergeState<'a, T>, n: usize) -> bool { (&mut m.b).take(n).for_each(drop); true } fn collision(&self, m: &mut BoolMergeState<'a, T>) -> bool { m.r = true; false } fn cmp(&self, a: &T, b: &T) -> std::cmp::Ordering { a.cmp(b) } } fn arb_sorted_vec() -> impl Strategy> { any::>().prop_map(|mut v| { v.sort_unstable(); v.dedup(); v }) } #[test] fn smoke() { let a = vec![1, 2, 3, 4]; let b = vec![4, 5, 6, 7]; let mut s = VecMergeState { a: a.iter(), b: b.iter(), r: Default::default(), }; Union.merge(&mut s); assert_eq!(s.r, vec![1, 2, 3, 4, 5, 6, 7]); let mut s = VecMergeState { a: a.iter(), b: b.iter(), r: Default::default(), }; Intersection.merge(&mut s); assert_eq!(s.r, vec![4]); let mut s = BoolMergeState { a: a.iter(), b: b.iter(), r: Default::default(), }; Intersects.merge(&mut s); assert!(s.r); } fn std_set_union(a: Vec, b: Vec) -> Vec { let mut r = BTreeSet::new(); r.extend(a.into_iter()); r.extend(b.into_iter()); r.into_iter().collect() } fn std_set_intersection(a: Vec, b: Vec) -> Vec { let a: BTreeSet = a.into_iter().collect(); let b: BTreeSet = b.into_iter().collect(); a.intersection(&b).cloned().collect() } fn std_set_intersects(a: Vec, b: Vec) -> bool { let a: BTreeSet = a.into_iter().collect(); let b: BTreeSet = b.into_iter().collect(); a.intersection(&b).next().is_some() } proptest! { #[test] fn union( a in arb_sorted_vec(), b in arb_sorted_vec(), ) { let mut s = VecMergeState { a: a.iter(), b: b.iter(), r: Default::default(), }; Union.merge(&mut s); prop_assert_eq!(s.r, std_set_union(a, b)); } #[test] fn intersection( a in arb_sorted_vec(), b in arb_sorted_vec(), ) { let mut s = VecMergeState { a: a.iter(), b: b.iter(), r: Default::default(), }; Intersection.merge(&mut s); prop_assert_eq!(s.r, std_set_intersection(a, b)); } #[test] fn intersects( a in arb_sorted_vec(), b in arb_sorted_vec(), ) { let mut s = BoolMergeState { a: a.iter(), b: b.iter(), r: Default::default(), }; Intersects.merge(&mut s); prop_assert_eq!(s.r, std_set_intersects(a, b)); } }