bitarray-0.10.0/.cargo/config000064400000000000000000000000570072674642500141120ustar 00000000000000[build] rustflags = ["-C", "target-cpu=native"]bitarray-0.10.0/.cargo_vcs_info.json0000644000000001120000000000100127210ustar { "git": { "sha1": "947ea7ba90fb95095cfa79cd94b644d951ba4809" } } bitarray-0.10.0/.github/workflows/lints.yml000064400000000000000000000022210072674642500170150ustar 00000000000000on: push: branches: - main pull_request: name: lints jobs: rustfmt: name: rustfmt runs-on: ubuntu-latest steps: - name: Checkout sources uses: actions/checkout@v2 - name: Install nightly toolchain uses: actions-rs/toolchain@v1 with: profile: minimal toolchain: nightly override: true components: rustfmt - name: Run cargo fmt uses: actions-rs/cargo@v1 with: command: fmt args: --all -- --check clippy: name: clippy runs-on: ubuntu-latest strategy: matrix: features: - - --features unstable-512-bit-simd - --features serde steps: - name: Checkout sources uses: actions/checkout@v2 - name: Install nightly toolchain uses: actions-rs/toolchain@v1 with: profile: minimal toolchain: nightly override: true components: clippy - name: Run cargo clippy uses: actions-rs/cargo@v1 with: command: clippy args: ${{ matrix.features }} -- -D warningsbitarray-0.10.0/.github/workflows/no-std.yml000064400000000000000000000011560072674642500170760ustar 00000000000000# This builds for armv7a-none-eabi to ensure we can build with no-std. # It will fail if there is a dependency on std, as armv7a-none-eabi has no std. on: push: branches: - main pull_request: name: no-std jobs: build: name: no-std runs-on: ubuntu-latest steps: - uses: actions/checkout@v2 - uses: actions-rs/toolchain@v1 with: toolchain: nightly target: armv7a-none-eabi override: true - uses: actions-rs/cargo@v1 with: command: rustc args: --target=armv7a-none-eabi --manifest-path=ensure_no_std/Cargo.tomlbitarray-0.10.0/.github/workflows/tests.yml000064400000000000000000000012120072674642500170250ustar 00000000000000on: push: branches: - main pull_request: name: tests jobs: tests: name: tests runs-on: ubuntu-latest strategy: matrix: features: - - --features unstable-512-bit-simd - --features serde steps: - name: Checkout sources uses: actions/checkout@v2 - name: Install nightly toolchain uses: actions-rs/toolchain@v1 with: profile: minimal toolchain: nightly override: true - name: Run cargo test uses: actions-rs/cargo@v1 with: command: test args: ${{ matrix.features }}bitarray-0.10.0/.gitignore000064400000000000000000000000220072674642500135310ustar 00000000000000target Cargo.lock bitarray-0.10.0/Cargo.toml0000644000000024670000000000100107360ustar # THIS FILE IS AUTOMATICALLY GENERATED BY CARGO # # When uploading crates to the registry Cargo will automatically # "normalize" Cargo.toml files for maximal compatibility # with all versions of Cargo and also rewrite `path` dependencies # to registry (e.g., crates.io) dependencies. # # If you are reading this file be aware that the original Cargo.toml # will likely look very different (and much more reasonable). # See Cargo.toml.orig for the original contents. [package] edition = "2018" name = "bitarray" version = "0.10.0" authors = ["Geordon Worley "] description = "A const generics driven bit array specialized for hamming distance" documentation = "https://docs.rs/bitarray/" readme = "README.md" keywords = ["const", "generics", "bit", "array", "hamming"] categories = ["no-std"] license = "MIT" repository = "https://github.com/rust-cv/bitarray" [package.metadata.docs.rs] all-features = true [dependencies.cfg-if] version = "1.0.0" [dependencies.serde] version = "1.0.127" features = ["derive"] optional = true default-features = false [dependencies.space] version = "0.18.0" optional = true [dev-dependencies.bincode] version = "1.3.3" default-features = false [dev-dependencies.serde_json] version = "1.0.66" features = ["alloc"] default-features = false [features] unstable-512-bit-simd = [] bitarray-0.10.0/Cargo.toml.orig000064400000000000000000000016770072674642500144510ustar 00000000000000[package] name = "bitarray" version = "0.10.0" edition = "2018" authors = ["Geordon Worley "] license = "MIT" description = "A const generics driven bit array specialized for hamming distance" repository = "https://github.com/rust-cv/bitarray" documentation = "https://docs.rs/bitarray/" keywords = ["const", "generics", "bit", "array", "hamming"] categories = ["no-std"] readme = "README.md" [features] # This does not seem to build on all Windows machines (compiler has an access violation), and only works on Nightly. unstable-512-bit-simd = [] [dependencies] space = { version = "0.18.0", optional = true } serde = { version = "1.0.127", default-features = false, features = ["derive"], optional = true } cfg-if = "1.0.0" [dev-dependencies] bincode = { version = "1.3.3", default-features = false } serde_json = { version = "1.0.66", default-features = false, features = ["alloc"] } [package.metadata.docs.rs] all-features = true bitarray-0.10.0/LICENSE000064400000000000000000000020330072674642500125520ustar 00000000000000Copyright (c) 2019 Rust CV Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. bitarray-0.10.0/README.md000064400000000000000000000027600072674642500130330ustar 00000000000000# bitarray [![Discord][dci]][dcl] [![Crates.io][ci]][cl] ![MIT/Apache][li] [![docs.rs][di]][dl] ![LoC][lo] ![Tests][btl] ![Lints][bll] ![no_std][bnl] [ci]: https://img.shields.io/crates/v/bitarray.svg [cl]: https://crates.io/crates/bitarray/ [li]: https://img.shields.io/crates/l/specs.svg?maxAge=2592000 [di]: https://docs.rs/bitarray/badge.svg [dl]: https://docs.rs/bitarray/ [lo]: https://tokei.rs/b1/github/rust-cv/bitarray?category=code [dci]: https://img.shields.io/discord/550706294311485440.svg?logo=discord&colorB=7289DA [dcl]: https://discord.gg/d32jaam [btl]: https://github.com/rust-cv/bitarray/workflows/tests/badge.svg [bll]: https://github.com/rust-cv/bitarray/workflows/lints/badge.svg [bnl]: https://github.com/rust-cv/bitarray/workflows/no-std/badge.svg A compile time sized array of bits that uses const generics and intrinsics. This library was created to maximize the speed of hamming weight and hamming distance computation. It could be used for many other things. Contributions are welcome! The minimum supported rustc version is 1.51 ## Features Enable the `unstable-512-bit-simd` feature if you would like to use 512-bit SIMD instructions to speed up the library. This feature does not compile on all machines for some currently unknown reason, as an LLVM intrinsics error is reported, even with the same compiler version and host tripple. ## Questions Please visit [![Discord][dci]][dcl] if you have any questions or want to contribute. Also feel free to file an issue on GitHub. bitarray-0.10.0/src/lib.rs000064400000000000000000000236670072674642500134700ustar 00000000000000#![no_std] #![cfg_attr( feature = "unstable-512-bit-simd", feature(link_llvm_intrinsics, repr_simd, simd_ffi, platform_intrinsics) )] #[cfg(feature = "serde")] mod serde_impl; use core::{ fmt, hash::{Hash, Hasher}, ops::{BitAnd, BitOr, BitXor, Deref, DerefMut}, slice, }; #[cfg(feature = "space")] use space::Metric; cfg_if::cfg_if! { if #[cfg(feature = "unstable-512-bit-simd")] { #[repr(simd)] #[derive(Copy, Clone)] struct Tup(u128, u128, u128, u128); #[allow(improper_ctypes, dead_code)] extern "C" { #[link_name = "llvm.ctpop.v4i128"] fn ctpop_512(x: Tup) -> Tup; #[link_name = "llvm.experimental.vector.reduce.add.v4i128"] fn reduce_add_512(x: Tup) -> u128; } extern "platform-intrinsic" { fn simd_xor(x: T, y: T) -> T; } /// Split the bytes up into number of operations of size (512, 64, 8) const fn split_up_simd(n: usize) -> (usize, usize, usize) { let n_512 = n >> 6; let bytes_512 = n_512 << 6; let n_64 = (n - bytes_512) >> 3; let bytes_64 = n_64 << 3; let n_8 = n - bytes_512 - bytes_64; (n_512, n_64, n_8) } } else { /// Split the bytes up into number of operations of size (512, 64, 8) const fn split_up_simd(n: usize) -> (usize, usize) { let n_64 = n >> 3; let bytes_64 = n_64 << 3; let n_8 = n - bytes_64; (n_64, n_8) } } } /// A constant sized array of bits. `B` defines the number of bytes. /// This has an alignment of 64 to maximize the efficiency of SIMD operations. /// It will automatically utilize SIMD at runtime where possible. #[repr(align(64))] #[derive(Copy, Clone)] pub struct BitArray { pub bytes: [u8; B], } impl BitArray { /// Create a new `BitArray`. /// /// ``` /// use bitarray::BitArray; /// let array = BitArray::new([0]); /// assert_eq!(*array.bytes(), [0]); /// ``` pub fn new(bytes: [u8; B]) -> Self { Self { bytes } } /// Create a new `BitArray` with all zeros. /// /// ``` /// use bitarray::BitArray; /// let array = BitArray::zeros(); /// assert_eq!(array, BitArray::new([0])); /// assert_eq!(*array, [0]); /// ``` pub fn zeros() -> Self { Self { bytes: [0; B] } } /// Retrieve the byte array of a `BitArray`. /// /// ``` /// use bitarray::BitArray; /// let array = BitArray::new([1, 2]); /// assert_eq!(*array, [1, 2]); /// ``` pub fn bytes(&self) -> &[u8; B] { &self.bytes } /// Retrieve the mutable byte array of a `BitArray`. /// /// ``` /// use bitarray::BitArray; /// let mut array = BitArray::new([1, 2]); /// array.bytes_mut()[0] = 3; /// assert_eq!(*array, [3, 2]); /// ``` pub fn bytes_mut(&mut self) -> &mut [u8; B] { &mut self.bytes } /// Compute the hamming weight (number of ones) of the `BitArray`. /// /// This is also called `count_ones` in the standard library. /// /// ``` /// use bitarray::BitArray; /// let array = BitArray::new([0xAA; 83]); /// assert_eq!(array.weight(), 4 * 83); /// ``` #[allow(clippy::cast_ptr_alignment)] pub fn weight(&self) -> u32 { cfg_if::cfg_if! { if #[cfg(feature = "unstable-512-bit-simd")] { let (n_512, n_64, n_8) = split_up_simd(self.bytes.len()); let sum_512 = unsafe { slice::from_raw_parts(self.bytes.as_ptr() as *const Tup, n_512) .iter() .copied() .map(|chunk| reduce_add_512(ctpop_512(chunk)) as u32) .sum::() }; let sum_64 = unsafe { slice::from_raw_parts(self.bytes.as_ptr() as *const u64, n_64) .iter() .copied() .map(|chunk| chunk.count_ones()) .sum::() }; let sum_8 = self.bytes[self.bytes.len() - n_8..] .iter() .copied() .map(|b| b.count_ones()) .sum::(); sum_512 + sum_64 + sum_8 } else { let (n_64, n_8) = split_up_simd(self.bytes.len()); let sum_64 = unsafe { slice::from_raw_parts(self.bytes.as_ptr() as *const u64, n_64) .iter() .copied() .map(|chunk| chunk.count_ones()) .sum::() }; let sum_8 = self.bytes[self.bytes.len() - n_8..] .iter() .copied() .map(|b| b.count_ones()) .sum::(); sum_64 + sum_8 } } } /// Compute the hamming distance to another `BitArray`. /// /// ``` /// use bitarray::BitArray; /// /// // All the bits are different. /// let a = BitArray::new([0xAA; 65]); /// let b = BitArray::new([0x55; 65]); /// assert_eq!(a.distance(&b), 8 * 65); /// /// // None of the bits are different. /// let a = BitArray::new([0xAA; 65]); /// let b = BitArray::new([0xAA; 65]); /// assert_eq!(a.distance(&b), 0); /// ``` #[allow(clippy::cast_ptr_alignment)] pub fn distance(&self, other: &Self) -> u32 { cfg_if::cfg_if! { if #[cfg(feature = "unstable-512-bit-simd")] { let simd_len = B >> 6; let simd_bytes = simd_len << 6; let simd_sum = unsafe { slice::from_raw_parts(self.bytes.as_ptr() as *const Tup, simd_len) .iter() .copied() .zip( slice::from_raw_parts(other.bytes.as_ptr() as *const Tup, simd_len) .iter() .copied(), ) .map(|(a, b)| reduce_add_512(ctpop_512(simd_xor(a, b))) as u32) .sum::() }; let remaining_sum = self.bytes[simd_bytes..] .iter() .copied() .zip(other.bytes[simd_bytes..].iter().copied()) .map(|(a, b)| (a ^ b).count_ones()) .sum::(); simd_sum + remaining_sum } else { self.bytes .iter() .copied() .zip(other.bytes.iter().copied()) .map(|(a, b)| (a ^ b).count_ones()) .sum::() } } } } impl BitAnd for BitArray { type Output = Self; fn bitand(mut self, rhs: Self) -> Self::Output { for (d, s) in self.iter_mut().zip(rhs.iter().copied()) { *d &= s; } self } } impl BitOr for BitArray { type Output = Self; fn bitor(mut self, rhs: Self) -> Self::Output { for (d, s) in self.iter_mut().zip(rhs.iter().copied()) { *d |= s; } self } } impl BitXor for BitArray { type Output = Self; fn bitxor(mut self, rhs: Self) -> Self::Output { for (d, s) in self.iter_mut().zip(rhs.iter().copied()) { *d ^= s; } self } } impl PartialEq for BitArray { fn eq(&self, other: &Self) -> bool { self.bytes .iter() .zip(other.bytes.iter()) .all(|(&a, &b)| a == b) } } impl Eq for BitArray {} impl fmt::Debug for BitArray { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { self.bytes[..].fmt(f) } } impl Hash for BitArray { fn hash(&self, state: &mut H) { self.bytes[..].hash(state) } } /// ``` /// use bitarray::BitArray; /// let mut array = BitArray::new([1, 2]); /// assert_eq!(*array, [1, 2]); /// ``` impl Deref for BitArray { type Target = [u8; B]; fn deref(&self) -> &Self::Target { &self.bytes } } /// ``` /// use bitarray::BitArray; /// let mut array = BitArray::zeros(); /// array[0] = 1; /// array[1] = 2; /// assert_eq!(*array, [1, 2]); /// ``` impl DerefMut for BitArray { fn deref_mut(&mut self) -> &mut Self::Target { &mut self.bytes } } /// Provides [hamming distance](https://en.wikipedia.org/wiki/Hamming_distance) as a metric. #[cfg(feature = "space")] #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] #[derive(Copy, Clone, Debug, Default, PartialEq, Eq, PartialOrd, Ord, Hash)] pub struct Hamming; #[cfg(feature = "space")] impl Metric> for Hamming { type Unit = u32; fn distance(&self, a: &BitArray, b: &BitArray) -> u32 { a.distance(b) as u32 } } /// Provides [Jaccard distance](https://en.wikipedia.org/wiki/Jaccard_index) as a metric. /// /// The Jaccard similarity is computed and then subtracted from `1.0` /// so that items are ordered by Jaccard distance/dissimilarity. #[cfg(feature = "space")] #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] #[derive(Copy, Clone, Debug, Default, PartialEq, Eq, PartialOrd, Ord, Hash)] pub struct Jaccard; #[cfg(feature = "space")] impl Metric> for Jaccard { type Unit = u32; fn distance(&self, &a: &BitArray, &b: &BitArray) -> u32 { let intersection = (a & b).weight(); let union = (a | b).weight(); if union == 0 { 0 } else { (1.0 - intersection as f32 / union as f32).to_bits() } } } bitarray-0.10.0/src/serde_impl.rs000064400000000000000000000041400072674642500150260ustar 00000000000000use crate::BitArray; use core::fmt; use serde::{ de::{Error, Expected, SeqAccess, Visitor}, Deserialize, Deserializer, Serialize, Serializer, }; impl Serialize for BitArray { fn serialize(&self, serializer: S) -> Result where S: Serializer, { serializer.serialize_bytes(&self.bytes) } } impl<'de, const B: usize> Deserialize<'de> for BitArray { fn deserialize(deserializer: D) -> Result where D: Deserializer<'de>, { deserializer.deserialize_bytes(BitArrayVisitor::) } } struct BitArrayVisitor; impl<'de, const B: usize> Visitor<'de> for BitArrayVisitor { type Value = BitArray; fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result { write!(formatter, "[u8; {}]", B) } fn visit_seq(self, mut seq: A) -> Result where A: SeqAccess<'de>, { let mut arr = [0u8; B]; let mut ix = 0; // Continuously fill the array with more values. while let Some(value) = seq.next_element()? { if ix == B { return Err(Error::custom("bitarray: too many bytes in sequence")); } arr[ix] = value; ix += 1; } if ix != B { Err(Error::invalid_length(ix, &BitArrayExpectedBytes::)) } else { Ok(BitArray::new(arr)) } } fn visit_bytes(self, bytes: &[u8]) -> Result where E: Error, { if bytes.len() != B { Err(Error::invalid_length( bytes.len(), &BitArrayExpectedBytes::, )) } else { let mut bitarray = BitArray::::zeros(); bitarray.bytes.copy_from_slice(bytes); Ok(bitarray) } } } struct BitArrayExpectedBytes; impl Expected for BitArrayExpectedBytes { fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { write!(formatter, "{} bytes", B) } } bitarray-0.10.0/tests/serde.rs000064400000000000000000000012260072674642500143620ustar 00000000000000#![cfg(feature = "serde")] use bitarray::BitArray; #[test] fn bincode_serde_json_cycle() { let old_bits = vec![BitArray::new([0, 1, 2, 3, 255])]; let mut bdata = vec![]; bincode::serialize_into(&mut bdata, &old_bits).expect("failed to serialize with bincode"); let middle_bits: Vec> = bincode::deserialize_from(bdata.as_slice()).expect("failed to deserialize with bincode"); let new_bits: Vec> = serde_json::from_str( &serde_json::to_string(&middle_bits).expect("failed to serialize with serde_json"), ) .expect("failed to deserialize with serde_json"); assert_eq!(old_bits, new_bits); }