twox-hash-1.6.3/.cargo_vcs_info.json0000644000000001360000000000100127650ustar { "git": { "sha1": "79168f770e0e870a11c5bb69ec0382547ef04790" }, "path_in_vcs": "" }twox-hash-1.6.3/.github/dependabot.yml000064400000000000000000000002210072674642500157700ustar 00000000000000version: 2 updates: - package-ecosystem: cargo directory: "/" schedule: interval: daily time: "10:00" open-pull-requests-limit: 10 twox-hash-1.6.3/.github/workflows/ci.yml000064400000000000000000000035240072674642500163240ustar 00000000000000on: push name: Continuous integration jobs: library: runs-on: ubuntu-latest strategy: matrix: rust: - stable - beta - nightly - 1.37.0 # MSRV steps: - uses: actions/checkout@v2 - uses: actions-rs/toolchain@v1 with: profile: minimal toolchain: ${{ matrix.rust }} override: true components: rustfmt, clippy - uses: actions-rs/cargo@v1 with: command: build - uses: actions-rs/cargo@v1 with: command: test - uses: actions-rs/cargo@v1 with: command: test args: --all-features - uses: actions-rs/cargo@v1 with: command: fmt args: --all -- --check if: ${{ matrix.rust == 'stable' }} - uses: actions-rs/cargo@v1 with: command: clippy args: --all-features -- -D warnings if: ${{ matrix.rust == 'stable' }} no-std: runs-on: ubuntu-latest steps: - uses: actions/checkout@v2 - uses: actions-rs/toolchain@v1 with: profile: minimal toolchain: stable target: thumbv6m-none-eabi override: true - uses: actions-rs/cargo@v1 with: command: build args: --no-default-features --target thumbv6m-none-eabi --lib compatibility-tests: runs-on: ubuntu-latest strategy: matrix: test: - digest_0_8 - digest_0_9 steps: - uses: actions/checkout@v2 - uses: actions-rs/toolchain@v1 with: profile: minimal toolchain: stable override: true - uses: actions-rs/cargo@v1 with: command: test args: --manifest-path "compatibility-tests/${{ matrix.test }}/Cargo.toml" twox-hash-1.6.3/.gitignore000064400000000000000000000000220072674642500135670ustar 00000000000000target Cargo.lock twox-hash-1.6.3/.gitmodules000064400000000000000000000001500072674642500137560ustar 00000000000000[submodule "comparison/xxHash"] path = comparison/xxHash url = https://github.com/Cyan4973/xxHash.git twox-hash-1.6.3/Cargo.lock0000644000000136330000000000100107460ustar # This file is automatically @generated by Cargo. # It is not intended for manual editing. version = 3 [[package]] name = "cfg-if" version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" [[package]] name = "crypto-common" version = "0.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "57952ca27b5e3606ff4dd79b0020231aaf9d6aa76dc05fd30137538c50bd3ce8" dependencies = [ "generic-array 0.14.5", "typenum", ] [[package]] name = "digest" version = "0.8.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f3d0c8c8752312f9713efd397ff63acb9f85585afbf179282e720e7704954dd5" dependencies = [ "generic-array 0.12.4", ] [[package]] name = "digest" version = "0.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d3dd60d1080a57a05ab032377049e0591415d2b31afd7028356dbf3cc6dcb066" dependencies = [ "generic-array 0.14.5", ] [[package]] name = "digest" version = "0.10.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f2fb860ca6fafa5552fb6d0e816a69c8e49f0908bf524e30a90d97c85892d506" dependencies = [ "crypto-common", ] [[package]] name = "generic-array" version = "0.12.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ffdf9f34f1447443d37393cc6c2b8313aebddcd96906caf34e54c68d8e57d7bd" dependencies = [ "typenum", ] [[package]] name = "generic-array" version = "0.14.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fd48d33ec7f05fbfa152300fdad764757cbded343c1aa1cff2fbaf4134851803" dependencies = [ "typenum", "version_check", ] [[package]] name = "getrandom" version = "0.2.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9be70c98951c83b8d2f8f60d7065fa6d5146873094452a1008da8c2f1e4205ad" dependencies = [ "cfg-if", "libc", "wasi", ] [[package]] name = "itoa" version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1aab8fc367588b89dcee83ab0fd66b72b50b72fa1904d7095045ace2b0c81c35" [[package]] name = "libc" version = "0.2.125" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5916d2ae698f6de9bfb891ad7a8d65c09d232dc58cc4ac433c7da3b2fd84bc2b" [[package]] name = "ppv-lite86" version = "0.2.16" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "eb9f9e6e233e5c4a35559a617bf40a4ec447db2e84c20b55a6f83167b7e57872" [[package]] name = "proc-macro2" version = "1.0.37" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ec757218438d5fda206afc041538b2f6d889286160d649a86a24d37e1235afd1" dependencies = [ "unicode-xid", ] [[package]] name = "quote" version = "1.0.18" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a1feb54ed693b93a84e14094943b84b7c4eae204c512b7ccb95ab0c66d278ad1" dependencies = [ "proc-macro2", ] [[package]] name = "rand" version = "0.8.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" dependencies = [ "libc", "rand_chacha", "rand_core", ] [[package]] name = "rand_chacha" version = "0.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88" dependencies = [ "ppv-lite86", "rand_core", ] [[package]] name = "rand_core" version = "0.6.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d34f1408f55294453790c48b2f1ebbb1c5b4b7563eb1f418bcfcfdbb06ebb4e7" dependencies = [ "getrandom", ] [[package]] name = "ryu" version = "1.0.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "73b4b750c782965c211b42f022f59af1fbceabdd026623714f104152f1ec149f" [[package]] name = "serde" version = "1.0.137" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "61ea8d54c77f8315140a05f4c7237403bf38b72704d031543aa1d16abbf517d1" dependencies = [ "serde_derive", ] [[package]] name = "serde_derive" version = "1.0.137" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1f26faba0c3959972377d3b2d306ee9f71faee9714294e41bb777f83f88578be" dependencies = [ "proc-macro2", "quote", "syn", ] [[package]] name = "serde_json" version = "1.0.81" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9b7ce2b32a1aed03c558dc61a5cd328f15aff2dbc17daad8fb8af04d2100e15c" dependencies = [ "itoa", "ryu", "serde", ] [[package]] name = "static_assertions" version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" [[package]] name = "syn" version = "1.0.92" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7ff7c592601f11445996a06f8ad0c27f094a58857c2f89e97974ab9235b92c52" dependencies = [ "proc-macro2", "quote", "unicode-xid", ] [[package]] name = "twox-hash" version = "1.6.3" dependencies = [ "cfg-if", "digest 0.10.3", "digest 0.8.1", "digest 0.9.0", "rand", "serde", "serde_json", "static_assertions", ] [[package]] name = "typenum" version = "1.15.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "dcf81ac59edc17cc8697ff311e8f5ef2d99fcbd9817b34cec66f90b6c3dfd987" [[package]] name = "unicode-xid" version = "0.2.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "957e51f3646910546462e67d5f7599b9e4fb8acdd304b087a6494730f9eebf04" [[package]] name = "version_check" version = "0.9.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f" [[package]] name = "wasi" version = "0.10.2+wasi-snapshot-preview1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fd6fbd9a79829dd1ad0cc20627bf1ed606756a7f77edff7b66b7064f9cb327c6" twox-hash-1.6.3/Cargo.toml0000644000000030770000000000100107720ustar # THIS FILE IS AUTOMATICALLY GENERATED BY CARGO # # When uploading crates to the registry Cargo will automatically # "normalize" Cargo.toml files for maximal compatibility # with all versions of Cargo and also rewrite `path` dependencies # to registry (e.g., crates.io) dependencies. # # If you are reading this file be aware that the original Cargo.toml # will likely look very different (and much more reasonable). # See Cargo.toml.orig for the original contents. [package] edition = "2018" name = "twox-hash" version = "1.6.3" authors = ["Jake Goulding "] description = "A Rust implementation of the XXHash and XXH3 algorithms" documentation = "https://docs.rs/twox-hash/" readme = "README.md" keywords = [ "hash", "hasher", "xxhash", "xxh3", ] categories = ["algorithms"] license = "MIT" repository = "https://github.com/shepmaster/twox-hash" [dependencies.cfg-if] version = ">= 0.1, < 2" default-features = false [dependencies.digest] version = "0.8" optional = true default-features = false package = "digest" [dependencies.digest_0_10] version = "0.10" optional = true default-features = false package = "digest" [dependencies.digest_0_9] version = "0.9" optional = true default-features = false package = "digest" [dependencies.rand] version = ">= 0.3.10, < 0.9" optional = true [dependencies.serde] version = "1.0" features = ["derive"] optional = true [dependencies.static_assertions] version = "1.0" default-features = false [dev-dependencies.serde_json] version = "1.0" [features] default = ["std"] serialize = ["serde"] std = ["rand"] twox-hash-1.6.3/Cargo.toml.orig000064400000000000000000000020540072674642500144750ustar 00000000000000[package] name = "twox-hash" version = "1.6.3" authors = ["Jake Goulding "] edition = "2018" description = "A Rust implementation of the XXHash and XXH3 algorithms" readme = "README.md" keywords = ["hash", "hasher", "xxhash", "xxh3"] categories = ["algorithms"] repository = "https://github.com/shepmaster/twox-hash" documentation = "https://docs.rs/twox-hash/" license = "MIT" [dependencies] cfg-if = { version = ">= 0.1, < 2", default-features = false } static_assertions = { version = "1.0", default-features = false } rand = { version = ">= 0.3.10, < 0.9", optional = true } serde = { version = "1.0", features = ["derive"], optional = true} digest = { package = "digest", version = "0.8", default-features = false, optional = true } digest_0_9 = { package = "digest", version = "0.9", default-features = false, optional = true } digest_0_10 = { package = "digest", version = "0.10", default-features = false, optional = true } [dev-dependencies] serde_json = "1.0" [features] default = ["std"] serialize = ["serde"] std = ["rand"] twox-hash-1.6.3/LICENSE.txt000064400000000000000000000020700072674642500134270ustar 00000000000000The MIT License (MIT) Copyright (c) 2015 Jake Goulding Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. twox-hash-1.6.3/README.md000064400000000000000000000076340072674642500130760ustar 00000000000000# TwoX-Hash A Rust implementation of the [XXHash] algorithm. [![Build Status](https://travis-ci.org/shepmaster/twox-hash.svg)](https://travis-ci.org/shepmaster/twox-hash) [![Current Version](https://img.shields.io/crates/v/twox-hash.svg)](https://crates.io/crates/twox-hash) [Documentation](https://docs.rs/twox-hash/) [XXHash]: https://github.com/Cyan4973/xxHash ## Examples ### With a fixed seed ```rust use std::hash::BuildHasherDefault; use std::collections::HashMap; use twox_hash::XxHash64; let mut hash: HashMap<_, _, BuildHasherDefault> = Default::default(); hash.insert(42, "the answer"); assert_eq!(hash.get(&42), Some(&"the answer")); ``` ### With a random seed ```rust use std::collections::HashMap; use twox_hash::RandomXxHashBuilder64; let mut hash: HashMap<_, _, RandomXxHashBuilder64> = Default::default(); hash.insert(42, "the answer"); assert_eq!(hash.get(&42), Some(&"the answer")); ``` ## Benchmarks ### 64-bit | Bytes | SipHasher (MB/s) | XXHash (MB/s) | Ratio | |---------|------------------|---------------|-------| | 1 | 52 | 38 | 73% | | 4 | 210 | 148 | 70% | | 16 | 615 | 615 | 100% | | 32 | 914 | 1391 | 152% | | 128 | 1347 | 3657 | 271% | | 256 | 1414 | 5019 | 355% | | 512 | 1546 | 6168 | 399% | | 1024 | 1565 | 6206 | 397% | | 1048576 | 1592 | 7564 | 475% | | Bytes | [FnvHasher][fnv] (MB/s) | XXHash (MB/s) | Ratio | |---------|-------------------------|---------------|-------| | 1 | 1000 | 38 | 4% | | 4 | 800 | 148 | 19% | | 16 | 761 | 615 | 81% | | 32 | 761 | 1391 | 183% | | 128 | 727 | 3657 | 503% | | 256 | 759 | 5019 | 661% | | 512 | 745 | 6168 | 828% | | 1024 | 741 | 6206 | 838% | | 1048576 | 745 | 7564 | 1015% | ### 32-bit | Bytes | SipHasher (MB/s) | XXHash32 (MB/s) | Ratio | |---------|------------------|-----------------|-------| | 1 | 52 | 55 | 106% | | 4 | 210 | 210 | 100% | | 16 | 615 | 1230 | 200% | | 32 | 914 | 1882 | 206% | | 128 | 1347 | 3282 | 244% | | 256 | 1414 | 3459 | 245% | | 512 | 1546 | 3792 | 245% | | 1024 | 1565 | 3938 | 252% | | 1048576 | 1592 | 4127 | 259% | | Bytes | [FnvHasher][fnv] (MB/s) | XXHash32 (MB/s) | Ratio | |---------|-------------------------|-----------------|-------| | 1 | 1000 | 55 | 6% | | 4 | 800 | 210 | 26% | | 16 | 761 | 1230 | 162% | | 32 | 761 | 1882 | 247% | | 128 | 727 | 3282 | 451% | | 256 | 759 | 3459 | 456% | | 512 | 745 | 3792 | 509% | | 1024 | 741 | 3938 | 531% | | 1048576 | 745 | 4127 | 554% | [fnv]: https://github.com/servo/rust-fnv ## Contributing 1. Fork it ( https://github.com/shepmaster/twox-hash/fork ) 2. Create your feature branch (`git checkout -b my-new-feature`) 3. Add a failing test. 4. Add code to pass the test. 5. Commit your changes (`git commit -am 'Add some feature'`) 6. Ensure tests pass. 7. Push to the branch (`git push origin my-new-feature`) 8. Create a new Pull Request twox-hash-1.6.3/src/bin/hash_file.rs000064400000000000000000000012300072674642500154300ustar 00000000000000use std::env; use std::fs::File; use std::hash::Hasher; use std::io::{BufRead, BufReader}; use twox_hash::XxHash64; fn main() { for arg in env::args().skip(1) { let f = File::open(&arg).unwrap(); let mut f = BufReader::new(f); let mut hasher = XxHash64::with_seed(0); loop { let consumed = { let bytes = f.fill_buf().unwrap(); if bytes.is_empty() { break; } hasher.write(bytes); bytes.len() }; f.consume(consumed); } println!("{:16x} {}", hasher.finish(), arg); } } twox-hash-1.6.3/src/digest_0_10_support.rs000064400000000000000000000034720072674642500165420ustar 00000000000000use core::hash::Hasher; use digest_0_10::{ generic_array::typenum::consts::{U16, U4, U8}, FixedOutput, HashMarker, Output, OutputSizeUser, Update, }; use crate::{xxh3, XxHash32, XxHash64}; // ---------- impl Update for XxHash32 { fn update(&mut self, data: &[u8]) { self.write(data); } } impl OutputSizeUser for XxHash32 { type OutputSize = U4; } impl FixedOutput for XxHash32 { fn finalize_into(self, out: &mut Output) { let tmp: &mut [u8; 4] = out.as_mut(); *tmp = self.finish().to_be_bytes(); } } impl HashMarker for XxHash32 {} // ---------- impl Update for XxHash64 { fn update(&mut self, data: &[u8]) { self.write(data); } } impl OutputSizeUser for XxHash64 { type OutputSize = U8; } impl FixedOutput for XxHash64 { fn finalize_into(self, out: &mut Output) { let tmp: &mut [u8; 8] = out.as_mut(); *tmp = self.finish().to_be_bytes(); } } impl HashMarker for XxHash64 {} // ---------- impl Update for xxh3::Hash64 { fn update(&mut self, data: &[u8]) { self.write(data); } } impl OutputSizeUser for xxh3::Hash64 { type OutputSize = U8; } impl FixedOutput for xxh3::Hash64 { fn finalize_into(self, out: &mut Output) { let tmp: &mut [u8; 8] = out.as_mut(); *tmp = self.finish().to_be_bytes(); } } impl HashMarker for xxh3::Hash64 {} // ---------- impl Update for xxh3::Hash128 { fn update(&mut self, data: &[u8]) { self.write(data); } } impl OutputSizeUser for xxh3::Hash128 { type OutputSize = U16; } impl FixedOutput for xxh3::Hash128 { fn finalize_into(self, out: &mut Output) { let tmp: &mut [u8; 16] = out.as_mut(); *tmp = xxh3::HasherExt::finish_ext(&self).to_be_bytes(); } } impl HashMarker for xxh3::Hash128 {} twox-hash-1.6.3/src/digest_0_9_support.rs000064400000000000000000000070170072674642500164710ustar 00000000000000use core::hash::Hasher; use digest_0_9::{ generic_array::{ typenum::consts::{U16, U4, U8}, GenericArray, }, Digest, }; use crate::{xxh3, XxHash32, XxHash64}; impl Digest for XxHash32 { type OutputSize = U4; fn new() -> Self { Self::default() } fn update(&mut self, data: impl AsRef<[u8]>) { self.write(data.as_ref()); } fn chain(mut self, data: impl AsRef<[u8]>) -> Self where Self: Sized, { self.update(data); self } fn finalize(self) -> GenericArray { self.finish().to_be_bytes().into() } fn finalize_reset(&mut self) -> GenericArray { let result = self.finalize(); self.reset(); result } fn reset(&mut self) { *self = Self::default(); } fn output_size() -> usize { 4 } fn digest(data: &[u8]) -> GenericArray { Self::new().chain(data).finalize() } } impl Digest for XxHash64 { type OutputSize = U8; fn new() -> Self { Self::default() } fn update(&mut self, data: impl AsRef<[u8]>) { self.write(data.as_ref()); } fn chain(mut self, data: impl AsRef<[u8]>) -> Self where Self: Sized, { self.update(data); self } fn finalize(self) -> GenericArray { self.finish().to_be_bytes().into() } fn finalize_reset(&mut self) -> GenericArray { let result = self.finalize(); self.reset(); result } fn reset(&mut self) { *self = Self::default(); } fn output_size() -> usize { 8 } fn digest(data: &[u8]) -> GenericArray { Self::new().chain(data).finalize() } } impl Digest for xxh3::Hash64 { type OutputSize = U8; fn new() -> Self { Self::default() } fn update(&mut self, data: impl AsRef<[u8]>) { self.write(data.as_ref()); } fn chain(mut self, data: impl AsRef<[u8]>) -> Self where Self: Sized, { self.update(data); self } fn finalize(self) -> GenericArray { self.finish().to_be_bytes().into() } fn finalize_reset(&mut self) -> GenericArray { let result = self.clone().finalize(); self.reset(); result } fn reset(&mut self) { *self = Self::default(); } fn output_size() -> usize { 8 } fn digest(data: &[u8]) -> GenericArray { Self::new().chain(data).finalize() } } impl Digest for xxh3::Hash128 { type OutputSize = U16; fn new() -> Self { Self::default() } fn update(&mut self, data: impl AsRef<[u8]>) { self.write(data.as_ref()); } fn chain(mut self, data: impl AsRef<[u8]>) -> Self where Self: Sized, { self.update(data); self } fn finalize(self) -> GenericArray { xxh3::HasherExt::finish_ext(&self).to_be_bytes().into() } fn finalize_reset(&mut self) -> GenericArray { let result = self.clone().finalize(); self.reset(); result } fn reset(&mut self) { *self = Self::default(); } fn output_size() -> usize { 8 } fn digest(data: &[u8]) -> GenericArray { Self::new().chain(data).finalize() } } twox-hash-1.6.3/src/digest_support.rs000064400000000000000000000067530072674642500160300ustar 00000000000000use core::hash::Hasher; use digest::{ generic_array::{ typenum::consts::{U16, U4, U8}, GenericArray, }, Digest, }; use crate::{xxh3, XxHash32, XxHash64}; impl Digest for XxHash32 { type OutputSize = U4; fn new() -> Self { Self::default() } fn input>(&mut self, data: B) { self.write(data.as_ref()); } fn chain>(mut self, data: B) -> Self where Self: Sized, { self.input(data); self } fn result(self) -> GenericArray { self.finish().to_be_bytes().into() } fn result_reset(&mut self) -> GenericArray { let result = self.result(); self.reset(); result } fn reset(&mut self) { *self = Self::default(); } fn output_size() -> usize { 4 } fn digest(data: &[u8]) -> GenericArray { Self::new().chain(data).result() } } impl Digest for XxHash64 { type OutputSize = U8; fn new() -> Self { Self::default() } fn input>(&mut self, data: B) { self.write(data.as_ref()); } fn chain>(mut self, data: B) -> Self where Self: Sized, { self.input(data); self } fn result(self) -> GenericArray { self.finish().to_be_bytes().into() } fn result_reset(&mut self) -> GenericArray { let result = self.result(); self.reset(); result } fn reset(&mut self) { *self = Self::default(); } fn output_size() -> usize { 8 } fn digest(data: &[u8]) -> GenericArray { Self::new().chain(data).result() } } impl Digest for xxh3::Hash64 { type OutputSize = U8; fn new() -> Self { Self::default() } fn input>(&mut self, data: B) { self.write(data.as_ref()); } fn chain>(mut self, data: B) -> Self where Self: Sized, { self.input(data); self } fn result(self) -> GenericArray { self.finish().to_be_bytes().into() } fn result_reset(&mut self) -> GenericArray { let result = self.clone().result(); self.reset(); result } fn reset(&mut self) { *self = Self::default(); } fn output_size() -> usize { 8 } fn digest(data: &[u8]) -> GenericArray { Self::new().chain(data).result() } } impl Digest for xxh3::Hash128 { type OutputSize = U16; fn new() -> Self { Self::default() } fn input>(&mut self, data: B) { self.write(data.as_ref()); } fn chain>(mut self, data: B) -> Self where Self: Sized, { self.input(data); self } fn result(self) -> GenericArray { xxh3::HasherExt::finish_ext(&self).to_be_bytes().into() } fn result_reset(&mut self) -> GenericArray { let result = self.clone().result(); self.reset(); result } fn reset(&mut self) { *self = Self::default(); } fn output_size() -> usize { 8 } fn digest(data: &[u8]) -> GenericArray { Self::new().chain(data).result() } } twox-hash-1.6.3/src/lib.rs000064400000000000000000000061720072674642500135160ustar 00000000000000//! A Rust implementation of the [XXHash] algorithm. //! //! [XXHash]: https://github.com/Cyan4973/xxHash //! //! ### With a fixed seed //! //! ```rust //! use std::hash::BuildHasherDefault; //! use std::collections::HashMap; //! use twox_hash::XxHash64; //! //! let mut hash: HashMap<_, _, BuildHasherDefault> = Default::default(); //! hash.insert(42, "the answer"); //! assert_eq!(hash.get(&42), Some(&"the answer")); //! ``` //! //! ### With a random seed //! //! ```rust //! use std::collections::HashMap; //! use twox_hash::RandomXxHashBuilder64; //! //! let mut hash: HashMap<_, _, RandomXxHashBuilder64> = Default::default(); //! hash.insert(42, "the answer"); //! assert_eq!(hash.get(&42), Some(&"the answer")); //! ``` #![no_std] extern crate alloc; #[cfg(test)] extern crate std; use core::{marker::PhantomData, mem}; mod sixty_four; mod thirty_two; pub mod xxh3; #[cfg(feature = "std")] mod std_support; #[cfg(feature = "std")] pub use std_support::sixty_four::RandomXxHashBuilder64; #[cfg(feature = "std")] pub use std_support::thirty_two::RandomXxHashBuilder32; #[cfg(feature = "std")] pub use std_support::xxh3::{ RandomHashBuilder128 as RandomXxh3HashBuilder128, RandomHashBuilder64 as RandomXxh3HashBuilder64, }; #[cfg(feature = "digest")] mod digest_support; #[cfg(feature = "digest_0_9")] mod digest_0_9_support; #[cfg(feature = "digest_0_10")] mod digest_0_10_support; pub use crate::sixty_four::XxHash64; pub use crate::thirty_two::XxHash32; pub use crate::xxh3::{Hash128 as Xxh3Hash128, Hash64 as Xxh3Hash64}; /// A backwards compatibility type alias. Consider directly using /// `XxHash64` instead. pub type XxHash = XxHash64; #[cfg(feature = "std")] /// A backwards compatibility type alias. Consider directly using /// `RandomXxHashBuilder64` instead. pub type RandomXxHashBuilder = RandomXxHashBuilder64; /// An unaligned buffer with iteration support for `UnalignedItem`. struct UnalignedBuffer<'a, T> { buf: &'a [u8], phantom: PhantomData, } /// Types implementing this trait must be transmutable from a `*const /// u8` to `*const Self` at any possible alignment. /// /// The intent is to use this with only primitive integer types (and /// tightly-packed arrays of those integers). #[allow(clippy::missing_safety_doc)] unsafe trait UnalignedItem {} unsafe impl UnalignedItem for [u64; 4] {} unsafe impl UnalignedItem for [u32; 4] {} unsafe impl UnalignedItem for u64 {} unsafe impl UnalignedItem for u32 {} impl<'a, T: UnalignedItem> UnalignedBuffer<'a, T> { #[inline] fn new(buf: &'a [u8]) -> Self { Self { buf, phantom: PhantomData, } } #[inline] fn remaining(&self) -> &[u8] { self.buf } } impl<'a, T: UnalignedItem> Iterator for UnalignedBuffer<'a, T> { type Item = T; fn next(&mut self) -> Option { let size = mem::size_of::(); self.buf.get(size..).map(|remaining| { // `self.buf` has at least `size` bytes that can be read as `T`. let result = unsafe { (self.buf.as_ptr() as *const T).read_unaligned() }; self.buf = remaining; result }) } } twox-hash-1.6.3/src/sixty_four.rs000064400000000000000000000304560072674642500151650ustar 00000000000000use crate::UnalignedBuffer; use core::{cmp, hash::Hasher}; #[cfg(feature = "serialize")] use serde::{Deserialize, Serialize}; const CHUNK_SIZE: usize = 32; pub const PRIME_1: u64 = 11_400_714_785_074_694_791; pub const PRIME_2: u64 = 14_029_467_366_897_019_727; pub const PRIME_3: u64 = 1_609_587_929_392_839_161; pub const PRIME_4: u64 = 9_650_029_242_287_828_579; pub const PRIME_5: u64 = 2_870_177_450_012_600_261; #[cfg_attr(feature = "serialize", derive(Deserialize, Serialize))] #[derive(Copy, Clone, PartialEq)] struct XxCore { v1: u64, v2: u64, v3: u64, v4: u64, } /// Calculates the 64-bit hash. #[cfg_attr(feature = "serialize", derive(Deserialize, Serialize))] #[derive(Debug, Copy, Clone, PartialEq)] pub struct XxHash64 { total_len: u64, seed: u64, core: XxCore, #[cfg_attr(feature = "serialize", serde(flatten))] buffer: Buffer, } impl XxCore { fn with_seed(seed: u64) -> XxCore { XxCore { v1: seed.wrapping_add(PRIME_1).wrapping_add(PRIME_2), v2: seed.wrapping_add(PRIME_2), v3: seed, v4: seed.wrapping_sub(PRIME_1), } } #[inline(always)] fn ingest_chunks(&mut self, values: I) where I: IntoIterator, { #[inline(always)] fn ingest_one_number(mut current_value: u64, mut value: u64) -> u64 { value = value.wrapping_mul(PRIME_2); current_value = current_value.wrapping_add(value); current_value = current_value.rotate_left(31); current_value.wrapping_mul(PRIME_1) } // By drawing these out, we can avoid going back and forth to // memory. It only really helps for large files, when we need // to iterate multiple times here. let mut v1 = self.v1; let mut v2 = self.v2; let mut v3 = self.v3; let mut v4 = self.v4; for [n1, n2, n3, n4] in values { v1 = ingest_one_number(v1, n1.to_le()); v2 = ingest_one_number(v2, n2.to_le()); v3 = ingest_one_number(v3, n3.to_le()); v4 = ingest_one_number(v4, n4.to_le()); } self.v1 = v1; self.v2 = v2; self.v3 = v3; self.v4 = v4; } #[inline(always)] fn finish(&self) -> u64 { // The original code pulls out local vars for v[1234] // here. Performance tests did not show that to be effective // here, presumably because this method is not called in a // tight loop. #[allow(unknown_lints, clippy::needless_late_init)] // keeping things parallel let mut hash; hash = self.v1.rotate_left(1); hash = hash.wrapping_add(self.v2.rotate_left(7)); hash = hash.wrapping_add(self.v3.rotate_left(12)); hash = hash.wrapping_add(self.v4.rotate_left(18)); #[inline(always)] fn mix_one(mut hash: u64, mut value: u64) -> u64 { value = value.wrapping_mul(PRIME_2); value = value.rotate_left(31); value = value.wrapping_mul(PRIME_1); hash ^= value; hash = hash.wrapping_mul(PRIME_1); hash.wrapping_add(PRIME_4) } hash = mix_one(hash, self.v1); hash = mix_one(hash, self.v2); hash = mix_one(hash, self.v3); hash = mix_one(hash, self.v4); hash } } impl core::fmt::Debug for XxCore { fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> Result<(), core::fmt::Error> { write!( f, "XxCore {{ {:016x} {:016x} {:016x} {:016x} }}", self.v1, self.v2, self.v3, self.v4 ) } } #[cfg_attr(feature = "serialize", derive(Serialize, Deserialize))] #[derive(Debug, Copy, Clone, Default, PartialEq)] #[repr(align(8))] #[cfg_attr(feature = "serialize", serde(transparent))] struct AlignToU64(T); #[cfg_attr(feature = "serialize", derive(Serialize, Deserialize))] #[derive(Debug, Copy, Clone, Default, PartialEq)] struct Buffer { #[cfg_attr(feature = "serialize", serde(rename = "buffer"))] data: AlignToU64<[u8; CHUNK_SIZE]>, #[cfg_attr(feature = "serialize", serde(rename = "buffer_usage"))] len: usize, } impl Buffer { fn data(&self) -> &[u8] { &self.data.0[..self.len] } /// Consumes as much of the parameter as it can, returning the unused part. fn consume<'a>(&mut self, data: &'a [u8]) -> &'a [u8] { let to_use = cmp::min(self.available(), data.len()); let (data, remaining) = data.split_at(to_use); self.data.0[self.len..][..to_use].copy_from_slice(data); self.len += to_use; remaining } fn set_data(&mut self, data: &[u8]) { debug_assert!(self.is_empty()); debug_assert!(data.len() < CHUNK_SIZE); self.data.0[..data.len()].copy_from_slice(data); self.len = data.len(); } fn available(&self) -> usize { CHUNK_SIZE - self.len } fn is_empty(&self) -> bool { self.len == 0 } fn is_full(&self) -> bool { self.len == CHUNK_SIZE } } impl XxHash64 { /// Constructs the hash with an initial seed pub fn with_seed(seed: u64) -> XxHash64 { XxHash64 { total_len: 0, seed, core: XxCore::with_seed(seed), buffer: Buffer::default(), } } pub(crate) fn write(&mut self, bytes: &[u8]) { let remaining = self.maybe_consume_bytes(bytes); if !remaining.is_empty() { let mut remaining = UnalignedBuffer::new(remaining); self.core.ingest_chunks(&mut remaining); self.buffer.set_data(remaining.remaining()); } self.total_len += bytes.len() as u64; } // Consume bytes and try to make `self.buffer` empty. // If there are not enough bytes, `self.buffer` can be non-empty, and this // function returns an empty slice. fn maybe_consume_bytes<'a>(&mut self, data: &'a [u8]) -> &'a [u8] { if self.buffer.is_empty() { data } else { let data = self.buffer.consume(data); if self.buffer.is_full() { let mut u64s = UnalignedBuffer::new(self.buffer.data()); self.core.ingest_chunks(&mut u64s); debug_assert!(u64s.remaining().is_empty()); self.buffer.len = 0; } data } } pub(crate) fn finish(&self) -> u64 { let mut hash = if self.total_len >= CHUNK_SIZE as u64 { // We have processed at least one full chunk self.core.finish() } else { self.seed.wrapping_add(PRIME_5) }; hash = hash.wrapping_add(self.total_len); let mut buffered_u64s = UnalignedBuffer::::new(self.buffer.data()); for buffered_u64 in &mut buffered_u64s { let mut k1 = buffered_u64.to_le().wrapping_mul(PRIME_2); k1 = k1.rotate_left(31); k1 = k1.wrapping_mul(PRIME_1); hash ^= k1; hash = hash.rotate_left(27); hash = hash.wrapping_mul(PRIME_1); hash = hash.wrapping_add(PRIME_4); } let mut buffered_u32s = UnalignedBuffer::::new(buffered_u64s.remaining()); for buffered_u32 in &mut buffered_u32s { let k1 = u64::from(buffered_u32.to_le()).wrapping_mul(PRIME_1); hash ^= k1; hash = hash.rotate_left(23); hash = hash.wrapping_mul(PRIME_2); hash = hash.wrapping_add(PRIME_3); } let buffered_u8s = buffered_u32s.remaining(); for &buffered_u8 in buffered_u8s { let k1 = u64::from(buffered_u8).wrapping_mul(PRIME_5); hash ^= k1; hash = hash.rotate_left(11); hash = hash.wrapping_mul(PRIME_1); } // The final intermixing hash ^= hash >> 33; hash = hash.wrapping_mul(PRIME_2); hash ^= hash >> 29; hash = hash.wrapping_mul(PRIME_3); hash ^= hash >> 32; hash } pub fn seed(&self) -> u64 { self.seed } pub fn total_len(&self) -> u64 { self.total_len } } impl Default for XxHash64 { fn default() -> XxHash64 { XxHash64::with_seed(0) } } impl Hasher for XxHash64 { fn finish(&self) -> u64 { XxHash64::finish(self) } fn write(&mut self, bytes: &[u8]) { XxHash64::write(self, bytes) } } #[cfg(feature = "std")] pub use crate::std_support::sixty_four::RandomXxHashBuilder64; #[cfg(test)] mod test { use super::{RandomXxHashBuilder64, XxHash64}; use std::collections::HashMap; use std::hash::BuildHasherDefault; use std::prelude::v1::*; #[test] fn ingesting_byte_by_byte_is_equivalent_to_large_chunks() { let bytes: Vec<_> = (0..32).map(|_| 0).collect(); let mut byte_by_byte = XxHash64::with_seed(0); for byte in bytes.chunks(1) { byte_by_byte.write(byte); } let mut one_chunk = XxHash64::with_seed(0); one_chunk.write(&bytes); assert_eq!(byte_by_byte.core, one_chunk.core); } #[test] fn hash_of_nothing_matches_c_implementation() { let mut hasher = XxHash64::with_seed(0); hasher.write(&[]); assert_eq!(hasher.finish(), 0xef46_db37_51d8_e999); } #[test] fn hash_of_single_byte_matches_c_implementation() { let mut hasher = XxHash64::with_seed(0); hasher.write(&[42]); assert_eq!(hasher.finish(), 0x0a9e_dece_beb0_3ae4); } #[test] fn hash_of_multiple_bytes_matches_c_implementation() { let mut hasher = XxHash64::with_seed(0); hasher.write(b"Hello, world!\0"); assert_eq!(hasher.finish(), 0x7b06_c531_ea43_e89f); } #[test] fn hash_of_multiple_chunks_matches_c_implementation() { let bytes: Vec<_> = (0..100).collect(); let mut hasher = XxHash64::with_seed(0); hasher.write(&bytes); assert_eq!(hasher.finish(), 0x6ac1_e580_3216_6597); } #[test] fn hash_with_different_seed_matches_c_implementation() { let mut hasher = XxHash64::with_seed(0xae05_4331_1b70_2d91); hasher.write(&[]); assert_eq!(hasher.finish(), 0x4b6a_04fc_df7a_4672); } #[test] fn hash_with_different_seed_and_multiple_chunks_matches_c_implementation() { let bytes: Vec<_> = (0..100).collect(); let mut hasher = XxHash64::with_seed(0xae05_4331_1b70_2d91); hasher.write(&bytes); assert_eq!(hasher.finish(), 0x567e_355e_0682_e1f1); } #[test] fn can_be_used_in_a_hashmap_with_a_default_seed() { let mut hash: HashMap<_, _, BuildHasherDefault> = Default::default(); hash.insert(42, "the answer"); assert_eq!(hash.get(&42), Some(&"the answer")); } #[test] fn can_be_used_in_a_hashmap_with_a_random_seed() { let mut hash: HashMap<_, _, RandomXxHashBuilder64> = Default::default(); hash.insert(42, "the answer"); assert_eq!(hash.get(&42), Some(&"the answer")); } #[cfg(feature = "serialize")] type TestResult = Result>; #[cfg(feature = "serialize")] #[test] fn test_serialization_cycle() -> TestResult { let mut hasher = XxHash64::with_seed(0); hasher.write(b"Hello, world!\0"); hasher.finish(); let serialized = serde_json::to_string(&hasher)?; let unserialized: XxHash64 = serde_json::from_str(&serialized)?; assert_eq!(hasher, unserialized); Ok(()) } #[cfg(feature = "serialize")] #[test] fn test_serialization_stability() -> TestResult { let mut hasher = XxHash64::with_seed(0); hasher.write(b"Hello, world!\0"); hasher.finish(); let serialized = r#"{ "total_len": 14, "seed": 0, "core": { "v1": 6983438078262162902, "v2": 14029467366897019727, "v3": 0, "v4": 7046029288634856825 }, "buffer": [ 72, 101, 108, 108, 111, 44, 32, 119, 111, 114, 108, 100, 33, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ], "buffer_usage": 14 }"#; let unserialized: XxHash64 = serde_json::from_str(serialized).unwrap(); assert_eq!(hasher, unserialized); Ok(()) } } twox-hash-1.6.3/src/std_support.rs000064400000000000000000000055030072674642500153330ustar 00000000000000pub mod sixty_four { use crate::XxHash64; use core::hash::BuildHasher; use rand::{self, Rng}; #[derive(Clone)] /// Constructs a randomized seed and reuses it for multiple hasher instances. pub struct RandomXxHashBuilder64(u64); impl RandomXxHashBuilder64 { fn new() -> RandomXxHashBuilder64 { RandomXxHashBuilder64(rand::thread_rng().gen()) } } impl Default for RandomXxHashBuilder64 { fn default() -> RandomXxHashBuilder64 { RandomXxHashBuilder64::new() } } impl BuildHasher for RandomXxHashBuilder64 { type Hasher = XxHash64; fn build_hasher(&self) -> XxHash64 { XxHash64::with_seed(self.0) } } } pub mod thirty_two { use crate::XxHash32; use core::hash::BuildHasher; use rand::{self, Rng}; #[derive(Clone)] /// Constructs a randomized seed and reuses it for multiple hasher instances. See the usage warning on `XxHash32`. pub struct RandomXxHashBuilder32(u32); impl RandomXxHashBuilder32 { fn new() -> RandomXxHashBuilder32 { RandomXxHashBuilder32(rand::thread_rng().gen()) } } impl Default for RandomXxHashBuilder32 { fn default() -> RandomXxHashBuilder32 { RandomXxHashBuilder32::new() } } impl BuildHasher for RandomXxHashBuilder32 { type Hasher = XxHash32; fn build_hasher(&self) -> XxHash32 { XxHash32::with_seed(self.0) } } } pub mod xxh3 { use crate::xxh3::{Hash128, Hash64}; use core::hash::BuildHasher; use rand::{self, Rng}; #[derive(Clone)] /// Constructs a randomized seed and reuses it for multiple hasher instances. pub struct RandomHashBuilder64(u64); impl RandomHashBuilder64 { fn new() -> RandomHashBuilder64 { RandomHashBuilder64(rand::thread_rng().gen()) } } impl Default for RandomHashBuilder64 { fn default() -> RandomHashBuilder64 { RandomHashBuilder64::new() } } impl BuildHasher for RandomHashBuilder64 { type Hasher = Hash64; fn build_hasher(&self) -> Hash64 { Hash64::with_seed(self.0) } } #[derive(Clone)] /// Constructs a randomized seed and reuses it for multiple hasher instances. pub struct RandomHashBuilder128(u64); impl RandomHashBuilder128 { fn new() -> RandomHashBuilder128 { RandomHashBuilder128(rand::thread_rng().gen()) } } impl Default for RandomHashBuilder128 { fn default() -> RandomHashBuilder128 { RandomHashBuilder128::new() } } impl BuildHasher for RandomHashBuilder128 { type Hasher = Hash128; fn build_hasher(&self) -> Hash128 { Hash128::with_seed(self.0) } } } twox-hash-1.6.3/src/thirty_two.rs000064400000000000000000000306030072674642500151600ustar 00000000000000use crate::UnalignedBuffer; use core::{cmp, hash::Hasher}; #[cfg(feature = "serialize")] use serde::{Deserialize, Serialize}; const CHUNK_SIZE: usize = 16; pub const PRIME_1: u32 = 2_654_435_761; pub const PRIME_2: u32 = 2_246_822_519; pub const PRIME_3: u32 = 3_266_489_917; pub const PRIME_4: u32 = 668_265_263; pub const PRIME_5: u32 = 374_761_393; #[cfg_attr(feature = "serialize", derive(Deserialize, Serialize))] #[derive(Copy, Clone, PartialEq)] struct XxCore { v1: u32, v2: u32, v3: u32, v4: u32, } /// Calculates the 32-bit hash. Care should be taken when using this /// hash. /// /// Although this struct implements `Hasher`, it only calculates a /// 32-bit number, leaving the upper bits as 0. This means it is /// unlikely to be correct to use this in places like a `HashMap`. #[cfg_attr(feature = "serialize", derive(Deserialize, Serialize))] #[derive(Debug, Copy, Clone, PartialEq)] pub struct XxHash32 { total_len: u64, seed: u32, core: XxCore, #[cfg_attr(feature = "serialize", serde(flatten))] buffer: Buffer, } impl XxCore { fn with_seed(seed: u32) -> XxCore { XxCore { v1: seed.wrapping_add(PRIME_1).wrapping_add(PRIME_2), v2: seed.wrapping_add(PRIME_2), v3: seed, v4: seed.wrapping_sub(PRIME_1), } } #[inline(always)] fn ingest_chunks(&mut self, values: I) where I: IntoIterator, { #[inline(always)] fn ingest_one_number(mut current_value: u32, mut value: u32) -> u32 { value = value.wrapping_mul(PRIME_2); current_value = current_value.wrapping_add(value); current_value = current_value.rotate_left(13); current_value.wrapping_mul(PRIME_1) } // By drawing these out, we can avoid going back and forth to // memory. It only really helps for large files, when we need // to iterate multiple times here. let mut v1 = self.v1; let mut v2 = self.v2; let mut v3 = self.v3; let mut v4 = self.v4; for [n1, n2, n3, n4] in values { v1 = ingest_one_number(v1, n1.to_le()); v2 = ingest_one_number(v2, n2.to_le()); v3 = ingest_one_number(v3, n3.to_le()); v4 = ingest_one_number(v4, n4.to_le()); } self.v1 = v1; self.v2 = v2; self.v3 = v3; self.v4 = v4; } #[inline(always)] fn finish(&self) -> u32 { // The original code pulls out local vars for v[1234] // here. Performance tests did not show that to be effective // here, presumably because this method is not called in a // tight loop. #[allow(unknown_lints, clippy::needless_late_init)] // keeping things parallel let mut hash; hash = self.v1.rotate_left(1); hash = hash.wrapping_add(self.v2.rotate_left(7)); hash = hash.wrapping_add(self.v3.rotate_left(12)); hash = hash.wrapping_add(self.v4.rotate_left(18)); hash } } impl core::fmt::Debug for XxCore { fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> Result<(), core::fmt::Error> { write!( f, "XxCore {{ {:016x} {:016x} {:016x} {:016x} }}", self.v1, self.v2, self.v3, self.v4 ) } } #[cfg_attr(feature = "serialize", derive(Serialize, Deserialize))] #[derive(Debug, Copy, Clone, Default, PartialEq)] #[repr(align(4))] #[cfg_attr(feature = "serialize", serde(transparent))] struct AlignToU32(T); #[cfg_attr(feature = "serialize", derive(Serialize, Deserialize))] #[derive(Debug, Copy, Clone, Default, PartialEq)] struct Buffer { #[cfg_attr(feature = "serialize", serde(rename = "buffer"))] data: AlignToU32<[u8; CHUNK_SIZE]>, #[cfg_attr(feature = "serialize", serde(rename = "buffer_usage"))] len: usize, } impl Buffer { fn data(&self) -> &[u8] { &self.data.0[..self.len] } /// Consumes as much of the parameter as it can, returning the unused part. fn consume<'a>(&mut self, data: &'a [u8]) -> &'a [u8] { let to_use = cmp::min(self.available(), data.len()); let (data, remaining) = data.split_at(to_use); self.data.0[self.len..][..to_use].copy_from_slice(data); self.len += to_use; remaining } fn set_data(&mut self, data: &[u8]) { debug_assert!(self.is_empty()); debug_assert!(data.len() < CHUNK_SIZE); self.data.0[..data.len()].copy_from_slice(data); self.len = data.len(); } fn available(&self) -> usize { CHUNK_SIZE - self.len } fn is_empty(&self) -> bool { self.len == 0 } fn is_full(&self) -> bool { self.len == CHUNK_SIZE } } impl XxHash32 { /// Constructs the hash with an initial seed pub fn with_seed(seed: u32) -> XxHash32 { XxHash32 { total_len: 0, seed, core: XxCore::with_seed(seed), buffer: Buffer::default(), } } pub(crate) fn write(&mut self, bytes: &[u8]) { let remaining = self.maybe_consume_bytes(bytes); if !remaining.is_empty() { let mut remaining = UnalignedBuffer::new(remaining); self.core.ingest_chunks(&mut remaining); self.buffer.set_data(remaining.remaining()); } self.total_len += bytes.len() as u64; } // Consume bytes and try to make `self.buffer` empty. // If there are not enough bytes, `self.buffer` can be non-empty, and this // function returns an empty slice. fn maybe_consume_bytes<'a>(&mut self, data: &'a [u8]) -> &'a [u8] { if self.buffer.is_empty() { data } else { let data = self.buffer.consume(data); if self.buffer.is_full() { let mut u32s = UnalignedBuffer::new(self.buffer.data()); self.core.ingest_chunks(&mut u32s); debug_assert!(u32s.remaining().is_empty()); self.buffer.len = 0; } data } } pub(crate) fn finish(&self) -> u32 { let mut hash = if self.total_len >= CHUNK_SIZE as u64 { // We have processed at least one full chunk self.core.finish() } else { self.seed.wrapping_add(PRIME_5) }; hash = hash.wrapping_add(self.total_len as u32); let mut buffered_u32s = UnalignedBuffer::::new(self.buffer.data()); for buffered_u32 in &mut buffered_u32s { let k1 = buffered_u32.to_le().wrapping_mul(PRIME_3); hash = hash.wrapping_add(k1); hash = hash.rotate_left(17); hash = hash.wrapping_mul(PRIME_4); } let buffered_u8s = buffered_u32s.remaining(); for &buffered_u8 in buffered_u8s { let k1 = u32::from(buffered_u8).wrapping_mul(PRIME_5); hash = hash.wrapping_add(k1); hash = hash.rotate_left(11); hash = hash.wrapping_mul(PRIME_1); } // The final intermixing hash ^= hash >> 15; hash = hash.wrapping_mul(PRIME_2); hash ^= hash >> 13; hash = hash.wrapping_mul(PRIME_3); hash ^= hash >> 16; hash } pub fn seed(&self) -> u32 { self.seed } /// Get the total number of bytes hashed, truncated to 32 bits. /// For the full 64-bit byte count, use `total_len_64` pub fn total_len(&self) -> u32 { self.total_len as u32 } /// Get the total number of bytes hashed. pub fn total_len_64(&self) -> u64 { self.total_len } } impl Default for XxHash32 { fn default() -> XxHash32 { XxHash32::with_seed(0) } } impl Hasher for XxHash32 { fn finish(&self) -> u64 { u64::from(XxHash32::finish(self)) } fn write(&mut self, bytes: &[u8]) { XxHash32::write(self, bytes) } } #[cfg(feature = "std")] pub use crate::std_support::thirty_two::RandomXxHashBuilder32; #[cfg(test)] mod test { use super::{RandomXxHashBuilder32, XxHash32}; use std::collections::HashMap; use std::hash::BuildHasherDefault; use std::prelude::v1::*; #[test] fn ingesting_byte_by_byte_is_equivalent_to_large_chunks() { let bytes: Vec<_> = (0..32).map(|_| 0).collect(); let mut byte_by_byte = XxHash32::with_seed(0); for byte in bytes.chunks(1) { byte_by_byte.write(byte); } let mut one_chunk = XxHash32::with_seed(0); one_chunk.write(&bytes); assert_eq!(byte_by_byte.core, one_chunk.core); } #[test] fn hash_of_nothing_matches_c_implementation() { let mut hasher = XxHash32::with_seed(0); hasher.write(&[]); assert_eq!(hasher.finish(), 0x02cc_5d05); } #[test] fn hash_of_single_byte_matches_c_implementation() { let mut hasher = XxHash32::with_seed(0); hasher.write(&[42]); assert_eq!(hasher.finish(), 0xe0fe_705f); } #[test] fn hash_of_multiple_bytes_matches_c_implementation() { let mut hasher = XxHash32::with_seed(0); hasher.write(b"Hello, world!\0"); assert_eq!(hasher.finish(), 0x9e5e_7e93); } #[test] fn hash_of_multiple_chunks_matches_c_implementation() { let bytes: Vec<_> = (0..100).collect(); let mut hasher = XxHash32::with_seed(0); hasher.write(&bytes); assert_eq!(hasher.finish(), 0x7f89_ba44); } #[test] fn hash_with_different_seed_matches_c_implementation() { let mut hasher = XxHash32::with_seed(0x42c9_1977); hasher.write(&[]); assert_eq!(hasher.finish(), 0xd6bf_8459); } #[test] fn hash_with_different_seed_and_multiple_chunks_matches_c_implementation() { let bytes: Vec<_> = (0..100).collect(); let mut hasher = XxHash32::with_seed(0x42c9_1977); hasher.write(&bytes); assert_eq!(hasher.finish(), 0x6d2f_6c17); } #[test] fn can_be_used_in_a_hashmap_with_a_default_seed() { let mut hash: HashMap<_, _, BuildHasherDefault> = Default::default(); hash.insert(42, "the answer"); assert_eq!(hash.get(&42), Some(&"the answer")); } #[test] fn can_be_used_in_a_hashmap_with_a_random_seed() { let mut hash: HashMap<_, _, RandomXxHashBuilder32> = Default::default(); hash.insert(42, "the answer"); assert_eq!(hash.get(&42), Some(&"the answer")); } #[cfg(feature = "serialize")] type TestResult = Result>; #[cfg(feature = "serialize")] #[test] fn test_serialization_cycle() -> TestResult { let mut hasher = XxHash32::with_seed(0); hasher.write(b"Hello, world!\0"); hasher.finish(); let serialized = serde_json::to_string(&hasher)?; let unserialized: XxHash32 = serde_json::from_str(&serialized)?; assert_eq!(hasher, unserialized); Ok(()) } #[cfg(feature = "serialize")] #[test] fn test_serialization_stability() -> TestResult { let mut hasher = XxHash32::with_seed(0); hasher.write(b"Hello, world!\0"); hasher.finish(); let serialized = r#"{ "total_len": 14, "seed": 0, "core": { "v1": 606290984, "v2": 2246822519, "v3": 0, "v4": 1640531535 }, "buffer": [ 72, 101, 108, 108, 111, 44, 32, 119, 111, 114, 108, 100, 33, 0, 0, 0 ], "buffer_usage": 14 }"#; let unserialized: XxHash32 = serde_json::from_str(serialized).unwrap(); assert_eq!(hasher, unserialized); Ok(()) } // This test validates wraparound/truncation behavior for very large inputs // of a 32-bit hash, but runs very slowly in the normal "cargo test" // build config since it hashes 4.3GB of data. It runs reasonably quick // under "cargo test --release". /* #[test] fn len_overflow_32bit() { // Hash 4.3 billion (4_300_000_000) bytes, which overflows a u32. let bytes200: Vec = (0..200).collect(); let mut hasher = XxHash32::with_seed(0); for _ in 0..(4_300_000_000u64 / 200u64) { hasher.write(&bytes200); } assert_eq!(hasher.total_len_64(), 0x0000_0001_004c_cb00); assert_eq!(hasher.total_len(), 0x004c_cb00); // retult is tested against the C implementation assert_eq!(hasher.finish(), 0x1522_4ca7); } */ } twox-hash-1.6.3/src/xxh3.rs000064400000000000000000001515130072674642500136420ustar 00000000000000//! The in-progress XXH3 algorithm. //! //! Please read [the notes in original implementation][warning] to //! learn about when to use these algorithms. Specifically, the //! version of code this crate reproduces says: //! //! > The algorithm is currently in development, meaning its return //! values might still change in future versions. However, the API //! is stable, and can be used in production, typically for //! generation of ephemeral hashes (produced and consumed in same //! session). //! //! [warning]: https://github.com/Cyan4973/xxHash#new-hash-algorithms use alloc::vec::Vec; use core::convert::TryInto; use core::hash::Hasher; use core::mem; use core::ops::{Deref, DerefMut}; use core::slice; #[cfg(target_arch = "x86")] use core::arch::x86::*; #[cfg(target_arch = "x86_64")] use core::arch::x86_64::*; use cfg_if::cfg_if; use static_assertions::{const_assert, const_assert_eq}; #[cfg(feature = "serialize")] use serde::{Deserialize, Serialize}; use crate::sixty_four::{ PRIME_1 as PRIME64_1, PRIME_2 as PRIME64_2, PRIME_3 as PRIME64_3, PRIME_4 as PRIME64_4, PRIME_5 as PRIME64_5, }; use crate::thirty_two::{PRIME_1 as PRIME32_1, PRIME_2 as PRIME32_2, PRIME_3 as PRIME32_3}; #[cfg(feature = "std")] pub use crate::std_support::xxh3::{RandomHashBuilder128, RandomHashBuilder64}; #[inline(always)] pub fn hash64(data: &[u8]) -> u64 { hash64_with_seed(data, 0) } #[inline(always)] pub fn hash64_with_seed(data: &[u8], seed: u64) -> u64 { let len = data.len(); if len <= 16 { hash_len_0to16_64bits(data, len, &SECRET, seed) } else if len <= 128 { hash_len_17to128_64bits(data, len, &SECRET, seed) } else if len <= MIDSIZE_MAX { hash_len_129to240_64bits(data, len, &SECRET, seed) } else { hash_long_64bits_with_seed(data, len, seed) } } #[inline(always)] pub fn hash64_with_secret(data: &[u8], secret: &[u8]) -> u64 { debug_assert!(secret.len() >= SECRET_SIZE_MIN); let len = data.len(); if len <= 16 { hash_len_0to16_64bits(data, len, secret, 0) } else if len <= 128 { hash_len_17to128_64bits(data, len, secret, 0) } else if len <= MIDSIZE_MAX { hash_len_129to240_64bits(data, len, secret, 0) } else { hash_long_64bits_with_secret(data, len, secret) } } #[inline(always)] pub fn hash128(data: &[u8]) -> u128 { hash128_with_seed(data, 0) } #[inline(always)] pub fn hash128_with_seed(data: &[u8], seed: u64) -> u128 { let len = data.len(); if len <= 16 { hash_len_0to16_128bits(data, len, &SECRET, seed) } else if len <= 128 { hash_len_17to128_128bits(data, len, &SECRET, seed) } else if len <= MIDSIZE_MAX { hash_len_129to240_128bits(data, len, &SECRET, seed) } else { hash_long_128bits_with_seed(data, len, seed) } } #[inline(always)] pub fn hash128_with_secret(data: &[u8], secret: &[u8]) -> u128 { debug_assert!(secret.len() >= SECRET_SIZE_MIN); let len = data.len(); if len <= 16 { hash_len_0to16_128bits(data, len, secret, 0) } else if len <= 128 { hash_len_17to128_128bits(data, len, secret, 0) } else if len <= MIDSIZE_MAX { hash_len_129to240_128bits(data, len, secret, 0) } else { hash_long_128bits_with_secret(data, len, secret) } } /// Calculates the 64-bit hash. #[cfg_attr(feature = "serialize", derive(Deserialize, Serialize))] #[derive(Clone, Default)] pub struct Hash64(State); impl Hash64 { pub fn with_seed(seed: u64) -> Self { Self(State::with_seed(seed)) } pub fn with_secret>>(secret: S) -> Self { Self(State::with_secret(secret)) } } impl Hasher for Hash64 { #[inline(always)] fn finish(&self) -> u64 { self.0.digest64() } #[inline(always)] fn write(&mut self, bytes: &[u8]) { self.0.update(bytes, AccWidth::Acc64Bits) } } /// Calculates the 128-bit hash. #[cfg_attr(feature = "serialize", derive(Deserialize, Serialize))] #[derive(Clone, Default)] pub struct Hash128(State); impl Hash128 { pub fn with_seed(seed: u64) -> Self { Self(State::with_seed(seed)) } pub fn with_secret>>(secret: S) -> Self { Self(State::with_secret(secret)) } } impl Hasher for Hash128 { #[inline(always)] fn finish(&self) -> u64 { self.0.digest128() as u64 } #[inline(always)] fn write(&mut self, bytes: &[u8]) { self.0.update(bytes, AccWidth::Acc128Bits) } } pub trait HasherExt: Hasher { fn finish_ext(&self) -> u128; } impl HasherExt for Hash128 { #[inline(always)] fn finish_ext(&self) -> u128 { self.0.digest128() } } /* ========================================== * XXH3 default settings * ========================================== */ const SECRET_DEFAULT_SIZE: usize = 192; const SECRET_SIZE_MIN: usize = 136; const SECRET: Secret = Secret([ 0xb8, 0xfe, 0x6c, 0x39, 0x23, 0xa4, 0x4b, 0xbe, 0x7c, 0x01, 0x81, 0x2c, 0xf7, 0x21, 0xad, 0x1c, 0xde, 0xd4, 0x6d, 0xe9, 0x83, 0x90, 0x97, 0xdb, 0x72, 0x40, 0xa4, 0xa4, 0xb7, 0xb3, 0x67, 0x1f, 0xcb, 0x79, 0xe6, 0x4e, 0xcc, 0xc0, 0xe5, 0x78, 0x82, 0x5a, 0xd0, 0x7d, 0xcc, 0xff, 0x72, 0x21, 0xb8, 0x08, 0x46, 0x74, 0xf7, 0x43, 0x24, 0x8e, 0xe0, 0x35, 0x90, 0xe6, 0x81, 0x3a, 0x26, 0x4c, 0x3c, 0x28, 0x52, 0xbb, 0x91, 0xc3, 0x00, 0xcb, 0x88, 0xd0, 0x65, 0x8b, 0x1b, 0x53, 0x2e, 0xa3, 0x71, 0x64, 0x48, 0x97, 0xa2, 0x0d, 0xf9, 0x4e, 0x38, 0x19, 0xef, 0x46, 0xa9, 0xde, 0xac, 0xd8, 0xa8, 0xfa, 0x76, 0x3f, 0xe3, 0x9c, 0x34, 0x3f, 0xf9, 0xdc, 0xbb, 0xc7, 0xc7, 0x0b, 0x4f, 0x1d, 0x8a, 0x51, 0xe0, 0x4b, 0xcd, 0xb4, 0x59, 0x31, 0xc8, 0x9f, 0x7e, 0xc9, 0xd9, 0x78, 0x73, 0x64, 0xea, 0xc5, 0xac, 0x83, 0x34, 0xd3, 0xeb, 0xc3, 0xc5, 0x81, 0xa0, 0xff, 0xfa, 0x13, 0x63, 0xeb, 0x17, 0x0d, 0xdd, 0x51, 0xb7, 0xf0, 0xda, 0x49, 0xd3, 0x16, 0x55, 0x26, 0x29, 0xd4, 0x68, 0x9e, 0x2b, 0x16, 0xbe, 0x58, 0x7d, 0x47, 0xa1, 0xfc, 0x8f, 0xf8, 0xb8, 0xd1, 0x7a, 0xd0, 0x31, 0xce, 0x45, 0xcb, 0x3a, 0x8f, 0x95, 0x16, 0x04, 0x28, 0xaf, 0xd7, 0xfb, 0xca, 0xbb, 0x4b, 0x40, 0x7e, ]); #[repr(align(64))] #[derive(Clone)] struct Secret([u8; SECRET_DEFAULT_SIZE]); const_assert_eq!(mem::size_of::() % 16, 0); impl Default for Secret { #[inline(always)] fn default() -> Self { SECRET } } impl Deref for Secret { type Target = [u8]; #[inline(always)] fn deref(&self) -> &Self::Target { &self.0[..] } } cfg_if! { if #[cfg(feature = "serialize")] { impl Serialize for Secret { fn serialize(&self, serializer: S) -> Result where S: serde::Serializer, { serializer.serialize_bytes(self) } } impl<'de> Deserialize<'de> for Secret { fn deserialize(deserializer: D) -> Result where D: serde::Deserializer<'de>, { deserializer.deserialize_bytes(SecretVisitor) } } struct SecretVisitor; impl<'de> serde::de::Visitor<'de> for SecretVisitor { type Value = Secret; fn expecting(&self, formatter: &mut core::fmt::Formatter) -> core::fmt::Result { formatter.write_str("secret with a bytes array") } fn visit_bytes(self, v: &[u8]) -> Result where E: serde::de::Error, { if v.len() == SECRET_DEFAULT_SIZE { let mut secret = [0; SECRET_DEFAULT_SIZE]; secret.copy_from_slice(v); Ok(Secret(secret)) } else { Err(E::custom("incomplete secret data")) } } } } } impl Secret { #[inline(always)] pub fn with_seed(seed: u64) -> Self { let mut secret = [0; SECRET_DEFAULT_SIZE]; for off in (0..SECRET_DEFAULT_SIZE).step_by(16) { secret[off..].write_u64_le(SECRET[off..].read_u64_le().wrapping_add(seed)); secret[off + 8..].write_u64_le(SECRET[off + 8..].read_u64_le().wrapping_sub(seed)); } Secret(secret) } } cfg_if! { if #[cfg(target_feature = "avx2")] { #[repr(align(32))] #[cfg_attr(feature = "serialize", derive(Deserialize, Serialize))] #[derive(Clone)] struct Acc([u64; ACC_NB]); } else if #[cfg(target_feature = "sse2")] { #[repr(align(16))] #[cfg_attr(feature = "serialize", derive(Deserialize, Serialize))] #[derive(Clone)] struct Acc([u64; ACC_NB]); } else { #[repr(align(8))] #[cfg_attr(feature = "serialize", derive(Deserialize, Serialize))] #[derive(Clone)] struct Acc([u64; ACC_NB]); } } const ACC_SIZE: usize = mem::size_of::(); const_assert_eq!(ACC_SIZE, 64); impl Default for Acc { #[inline(always)] fn default() -> Self { Acc([ u64::from(PRIME32_3), PRIME64_1, PRIME64_2, PRIME64_3, PRIME64_4, u64::from(PRIME32_2), PRIME64_5, u64::from(PRIME32_1), ]) } } impl Deref for Acc { type Target = [u64]; #[inline(always)] fn deref(&self) -> &Self::Target { &self.0 } } impl DerefMut for Acc { #[inline(always)] fn deref_mut(&mut self) -> &mut Self::Target { &mut self.0 } } trait Buf { fn read_u32_le(&self) -> u32; fn read_u64_le(&self) -> u64; } trait BufMut { fn write_u32_le(&mut self, n: u32); fn write_u64_le(&mut self, n: u64); } impl Buf for [u8] { #[inline(always)] fn read_u32_le(&self) -> u32 { let buf = &self[..mem::size_of::()]; u32::from_le_bytes(buf.try_into().unwrap()) } #[inline(always)] fn read_u64_le(&self) -> u64 { let buf = &self[..mem::size_of::()]; u64::from_le_bytes(buf.try_into().unwrap()) } } impl BufMut for [u8] { #[inline(always)] fn write_u32_le(&mut self, n: u32) { self[..mem::size_of::()].copy_from_slice(&n.to_le_bytes()[..]); } #[inline(always)] fn write_u64_le(&mut self, n: u64) { self[..mem::size_of::()].copy_from_slice(&n.to_le_bytes()[..]); } } /* ========================================== * Short keys * ========================================== */ #[inline(always)] fn hash_len_0to16_64bits(data: &[u8], len: usize, key: &[u8], seed: u64) -> u64 { debug_assert!(len <= 16); if len > 8 { hash_len_9to16_64bits(data, len, key, seed) } else if len >= 4 { hash_len_4to8_64bits(data, len, key, seed) } else if len > 0 { hash_len_1to3_64bits(data, len, key, seed) } else { 0 } } #[inline(always)] fn hash_len_9to16_64bits(data: &[u8], len: usize, key: &[u8], seed: u64) -> u64 { debug_assert!((9..=16).contains(&len)); let ll1 = data.read_u64_le() ^ key.read_u64_le().wrapping_add(seed); let ll2 = data[len - 8..].read_u64_le() ^ key[8..].read_u64_le().wrapping_sub(seed); let acc = (len as u64) .wrapping_add(ll1) .wrapping_add(ll2) .wrapping_add(mul128_fold64(ll1, ll2)); avalanche(acc) } #[inline(always)] fn hash_len_4to8_64bits(data: &[u8], len: usize, key: &[u8], seed: u64) -> u64 { debug_assert!((4..=8).contains(&len)); let in1 = u64::from(data.read_u32_le()); let in2 = u64::from(data[len - 4..].read_u32_le()); let in64 = in1.wrapping_add(in2 << 32); let keyed = in64 ^ key.read_u64_le().wrapping_add(seed); let mix64 = (len as u64).wrapping_add((keyed ^ (keyed >> 51)).wrapping_mul(u64::from(PRIME32_1))); avalanche((mix64 ^ (mix64 >> 47)).wrapping_mul(PRIME64_2)) } #[inline(always)] fn hash_len_1to3_64bits(data: &[u8], len: usize, key: &[u8], seed: u64) -> u64 { debug_assert!((1..=3).contains(&len)); let c1 = u32::from(data[0]); let c2 = u32::from(data[len >> 1]); let c3 = u32::from(data[len - 1]); let combined = c1 + (c2 << 8) + (c3 << 16) + ((len as u32) << 24); let keyed = u64::from(combined) ^ u64::from(key.read_u32_le()).wrapping_add(seed); let mixed = keyed.wrapping_mul(PRIME64_1); avalanche(mixed) } #[inline(always)] fn hash_len_17to128_64bits(data: &[u8], len: usize, secret: &[u8], seed: u64) -> u64 { debug_assert!((17..=128).contains(&len)); debug_assert!(secret.len() >= SECRET_SIZE_MIN); let mut acc = PRIME64_1.wrapping_mul(len as u64); if len > 32 { if len > 64 { if len > 96 { acc = acc .wrapping_add(mix_16bytes(&data[48..], &secret[96..], seed)) .wrapping_add(mix_16bytes(&data[len - 64..], &secret[112..], seed)); } acc = acc .wrapping_add(mix_16bytes(&data[32..], &secret[64..], seed)) .wrapping_add(mix_16bytes(&data[len - 48..], &secret[80..], seed)); } acc = acc .wrapping_add(mix_16bytes(&data[16..], &secret[32..], seed)) .wrapping_add(mix_16bytes(&data[len - 32..], &secret[48..], seed)); } acc = acc .wrapping_add(mix_16bytes(data, secret, seed)) .wrapping_add(mix_16bytes(&data[len - 16..], &secret[16..], seed)); avalanche(acc) } const MIDSIZE_MAX: usize = 240; const MIDSIZE_STARTOFFSET: usize = 3; const MIDSIZE_LASTOFFSET: usize = 17; #[inline(always)] fn hash_len_129to240_64bits(data: &[u8], len: usize, secret: &[u8], seed: u64) -> u64 { debug_assert!((129..=MIDSIZE_MAX).contains(&len)); debug_assert!(secret.len() >= SECRET_SIZE_MIN); let acc = (len as u64).wrapping_mul(PRIME64_1); let acc = (0..8).fold(acc, |acc, i| { acc.wrapping_add(mix_16bytes(&data[16 * i..], &secret[16 * i..], seed)) }); let acc = avalanche(acc); let nb_rounds = len / 16; debug_assert!(nb_rounds >= 8); let acc = (8..nb_rounds).fold(acc, |acc, i| { acc.wrapping_add(mix_16bytes( &data[16 * i..], &secret[16 * (i - 8) + MIDSIZE_STARTOFFSET..], seed, )) }); avalanche(acc.wrapping_add(mix_16bytes( &data[len - 16..], &secret[SECRET_SIZE_MIN - MIDSIZE_LASTOFFSET..], seed, ))) } /* ========================================== * Long keys * ========================================== */ const STRIPE_LEN: usize = 64; const SECRET_CONSUME_RATE: usize = 8; // nb of secret bytes consumed at each accumulation const SECRET_MERGEACCS_START: usize = 11; // do not align on 8, so that secret is different from accumulator const SECRET_LASTACC_START: usize = 7; // do not align on 8, so that secret is different from scrambler const ACC_NB: usize = STRIPE_LEN / mem::size_of::(); #[derive(Debug, Clone, Copy, PartialEq)] pub(crate) enum AccWidth { Acc64Bits, Acc128Bits, } #[inline(always)] fn hash_long_64bits_with_default_secret(data: &[u8], len: usize) -> u64 { hash_long_internal(data, len, &SECRET) } #[inline(always)] fn hash_long_64bits_with_secret(data: &[u8], len: usize, secret: &[u8]) -> u64 { hash_long_internal(data, len, secret) } /// Generate a custom key, based on alteration of default kSecret with the seed, /// and then use this key for long mode hashing. /// /// This operation is decently fast but nonetheless costs a little bit of time. /// Try to avoid it whenever possible (typically when `seed.is_none()`). #[inline(always)] fn hash_long_64bits_with_seed(data: &[u8], len: usize, seed: u64) -> u64 { if seed == 0 { hash_long_64bits_with_default_secret(data, len) } else { let secret = Secret::with_seed(seed); hash_long_internal(data, len, &secret) } } #[inline(always)] fn hash_long_internal(data: &[u8], len: usize, secret: &[u8]) -> u64 { let mut acc = Acc::default(); hash_long_internal_loop(&mut acc, data, len, secret, AccWidth::Acc64Bits); merge_accs( &acc, &secret[SECRET_MERGEACCS_START..], (len as u64).wrapping_mul(PRIME64_1), ) } #[inline(always)] fn hash_long_internal_loop( acc: &mut [u64], data: &[u8], len: usize, secret: &[u8], acc_width: AccWidth, ) { let secret_len = secret.len(); let nb_rounds = (secret_len - STRIPE_LEN) / SECRET_CONSUME_RATE; let block_len = STRIPE_LEN * nb_rounds; debug_assert!(secret_len >= SECRET_SIZE_MIN); let mut chunks = data.chunks_exact(block_len); for chunk in &mut chunks { accumulate(acc, chunk, secret, nb_rounds, acc_width); unsafe { scramble_acc(acc, &secret[secret_len - STRIPE_LEN..]); } } /* last partial block */ debug_assert!(len > STRIPE_LEN); let nb_stripes = (len % block_len) / STRIPE_LEN; debug_assert!(nb_stripes < (secret_len / SECRET_CONSUME_RATE)); accumulate(acc, chunks.remainder(), secret, nb_stripes, acc_width); /* last stripe */ if (len & (STRIPE_LEN - 1)) != 0 { unsafe { accumulate512( acc, &data[len - STRIPE_LEN..], &secret[secret_len - STRIPE_LEN - SECRET_LASTACC_START..], acc_width, ); } } } #[inline(always)] fn accumulate(acc: &mut [u64], data: &[u8], secret: &[u8], nb_stripes: usize, acc_width: AccWidth) { for n in 0..nb_stripes { unsafe { accumulate512( acc, &data[n * STRIPE_LEN..], &secret[n * SECRET_CONSUME_RATE..], acc_width, ); } } } #[inline(always)] const fn _mm_shuffle(z: u32, y: u32, x: u32, w: u32) -> i32 { ((z << 6) | (y << 4) | (x << 2) | w) as i32 } #[cfg(target_feature = "avx2")] mod avx2 { use super::*; #[target_feature(enable = "avx2")] pub(crate) unsafe fn accumulate512( acc: &mut [u64], data: &[u8], keys: &[u8], acc_width: AccWidth, ) { let xacc = acc.as_mut_ptr() as *mut __m256i; let xdata = data.as_ptr() as *const __m256i; let xkey = keys.as_ptr() as *const __m256i; for i in 0..STRIPE_LEN / mem::size_of::<__m256i>() { let d = _mm256_loadu_si256(xdata.add(i)); let k = _mm256_loadu_si256(xkey.add(i)); let dk = _mm256_xor_si256(d, k); // uint32 dk[8] = {d0+k0, d1+k1, d2+k2, d3+k3, ...} let mul = _mm256_mul_epu32(dk, _mm256_shuffle_epi32(dk, 0x31)); // uint64 res[4] = {dk0*dk1, dk2*dk3, ...} xacc.add(i).write(if acc_width == AccWidth::Acc128Bits { let dswap = _mm256_shuffle_epi32(d, _mm_shuffle(1, 0, 3, 2)); let add = _mm256_add_epi64(xacc.add(i).read(), dswap); _mm256_add_epi64(mul, add) } else { let add = _mm256_add_epi64(xacc.add(i).read(), d); _mm256_add_epi64(mul, add) }) } } #[target_feature(enable = "avx2")] pub unsafe fn scramble_acc(acc: &mut [u64], key: &[u8]) { let xacc = acc.as_mut_ptr() as *mut __m256i; let xkey = key.as_ptr() as *const __m256i; let prime32 = _mm256_set1_epi32(PRIME32_1 as i32); for i in 0..STRIPE_LEN / mem::size_of::<__m256i>() { let data = xacc.add(i).read(); let shifted = _mm256_srli_epi64(data, 47); let data = _mm256_xor_si256(data, shifted); let k = _mm256_loadu_si256(xkey.add(i)); let dk = _mm256_xor_si256(data, k); /* U32 dk[4] = {d0+k0, d1+k1, d2+k2, d3+k3} */ let dk1 = _mm256_mul_epu32(dk, prime32); let d2 = _mm256_shuffle_epi32(dk, 0x31); let dk2 = _mm256_mul_epu32(d2, prime32); let dk2h = _mm256_slli_epi64(dk2, 32); xacc.add(i).write(_mm256_add_epi64(dk1, dk2h)); } } } #[cfg(all(target_feature = "sse2", not(target_feature = "avx2")))] mod sse2 { use super::*; #[target_feature(enable = "sse2")] #[allow(clippy::cast_ptr_alignment)] pub(crate) unsafe fn accumulate512( acc: &mut [u64], data: &[u8], keys: &[u8], acc_width: AccWidth, ) { let xacc = acc.as_mut_ptr() as *mut __m128i; let xdata = data.as_ptr() as *const __m128i; let xkey = keys.as_ptr() as *const __m128i; for i in 0..STRIPE_LEN / mem::size_of::<__m128i>() { let d = _mm_loadu_si128(xdata.add(i)); let k = _mm_loadu_si128(xkey.add(i)); let dk = _mm_xor_si128(d, k); // uint32 dk[4] = {d0+k0, d1+k1, d2+k2, d3+k3} */ let mul = _mm_mul_epu32(dk, _mm_shuffle_epi32(dk, 0x31)); // uint64 res[4] = {dk0*dk1, dk2*dk3, ...} */ xacc.add(i).write(if acc_width == AccWidth::Acc128Bits { let dswap = _mm_shuffle_epi32(d, _mm_shuffle(1, 0, 3, 2)); let add = _mm_add_epi64(xacc.add(i).read(), dswap); _mm_add_epi64(mul, add) } else { let add = _mm_add_epi64(xacc.add(i).read(), d); _mm_add_epi64(mul, add) }) } } #[target_feature(enable = "sse2")] #[allow(clippy::cast_ptr_alignment)] pub unsafe fn scramble_acc(acc: &mut [u64], key: &[u8]) { let xacc = acc.as_mut_ptr() as *mut __m128i; let xkey = key.as_ptr() as *const __m128i; let prime32 = _mm_set1_epi32(PRIME32_1 as i32); for i in 0..STRIPE_LEN / mem::size_of::<__m128i>() { let data = xacc.add(i).read(); let shifted = _mm_srli_epi64(data, 47); let data = _mm_xor_si128(data, shifted); let k = _mm_loadu_si128(xkey.add(i)); let dk = _mm_xor_si128(data, k); let dk1 = _mm_mul_epu32(dk, prime32); let d2 = _mm_shuffle_epi32(dk, 0x31); let dk2 = _mm_mul_epu32(d2, prime32); let dk2h = _mm_slli_epi64(dk2, 32); xacc.add(i).write(_mm_add_epi64(dk1, dk2h)); } } } #[cfg(not(any(target_feature = "avx2", target_feature = "sse2")))] mod generic { use super::*; #[inline(always)] pub(crate) unsafe fn accumulate512( acc: &mut [u64], data: &[u8], key: &[u8], acc_width: AccWidth, ) { for i in (0..ACC_NB).step_by(2) { let in1 = data[8 * i..].read_u64_le(); let in2 = data[8 * (i + 1)..].read_u64_le(); let key1 = key[8 * i..].read_u64_le(); let key2 = key[8 * (i + 1)..].read_u64_le(); let data_key1 = key1 ^ in1; let data_key2 = key2 ^ in2; acc[i] = acc[i].wrapping_add(mul32_to64(data_key1, data_key1 >> 32)); acc[i + 1] = acc[i + 1].wrapping_add(mul32_to64(data_key2, data_key2 >> 32)); if acc_width == AccWidth::Acc128Bits { acc[i] = acc[i].wrapping_add(in2); acc[i + 1] = acc[i + 1].wrapping_add(in1); } else { acc[i] = acc[i].wrapping_add(in1); acc[i + 1] = acc[i + 1].wrapping_add(in2); } } } #[inline(always)] fn mul32_to64(a: u64, b: u64) -> u64 { (a & 0xFFFFFFFF).wrapping_mul(b & 0xFFFFFFFF) } #[inline(always)] pub unsafe fn scramble_acc(acc: &mut [u64], key: &[u8]) { for i in 0..ACC_NB { let key64 = key[8 * i..].read_u64_le(); let mut acc64 = acc[i]; acc64 ^= acc64 >> 47; acc64 ^= key64; acc64 = acc64.wrapping_mul(u64::from(PRIME32_1)); acc[i] = acc64; } } } cfg_if! { if #[cfg(target_feature = "avx2")] { use avx2::{accumulate512, scramble_acc}; } else if #[cfg(target_feature = "sse2")] { use sse2::{accumulate512, scramble_acc}; } else { use generic::{accumulate512, scramble_acc}; } } #[inline(always)] fn merge_accs(acc: &[u64], secret: &[u8], start: u64) -> u64 { avalanche( start .wrapping_add(mix2accs(acc, secret)) .wrapping_add(mix2accs(&acc[2..], &secret[16..])) .wrapping_add(mix2accs(&acc[4..], &secret[32..])) .wrapping_add(mix2accs(&acc[6..], &secret[48..])), ) } #[inline(always)] fn mix2accs(acc: &[u64], secret: &[u8]) -> u64 { mul128_fold64( acc[0] ^ secret.read_u64_le(), acc[1] ^ secret[8..].read_u64_le(), ) } #[inline(always)] fn mix_16bytes(data: &[u8], key: &[u8], seed: u64) -> u64 { let ll1 = data.read_u64_le(); let ll2 = data[8..].read_u64_le(); mul128_fold64( ll1 ^ key.read_u64_le().wrapping_add(seed), ll2 ^ key[8..].read_u64_le().wrapping_sub(seed), ) } #[inline(always)] fn mul128_fold64(ll1: u64, ll2: u64) -> u64 { let lll = u128::from(ll1).wrapping_mul(u128::from(ll2)); (lll as u64) ^ ((lll >> 64) as u64) } #[inline(always)] fn avalanche(mut h64: u64) -> u64 { h64 ^= h64 >> 37; h64 = h64.wrapping_mul(PRIME64_3); h64 ^ (h64 >> 32) } /* === XXH3 streaming === */ const INTERNAL_BUFFER_SIZE: usize = 256; const INTERNAL_BUFFER_STRIPES: usize = INTERNAL_BUFFER_SIZE / STRIPE_LEN; const_assert!(INTERNAL_BUFFER_SIZE >= MIDSIZE_MAX); const_assert_eq!(INTERNAL_BUFFER_SIZE % STRIPE_LEN, 0); #[repr(align(64))] #[cfg_attr(feature = "serialize", derive(Deserialize, Serialize))] #[derive(Clone)] struct State { acc: Acc, secret: With, buf: Vec, seed: u64, total_len: usize, nb_stripes_so_far: usize, } #[cfg_attr(feature = "serialize", derive(Deserialize, Serialize))] #[derive(Clone)] enum With { Default(Secret), Custom(Secret), Ref(Vec), } impl Deref for With { type Target = [u8]; fn deref(&self) -> &Self::Target { match self { With::Default(secret) | With::Custom(secret) => &secret.0[..], With::Ref(secret) => secret, } } } impl Default for State { fn default() -> Self { Self::new(0, With::Default(Secret::default())) } } impl State { fn new(seed: u64, secret: With) -> Self { State { acc: Acc::default(), secret, buf: Vec::with_capacity(INTERNAL_BUFFER_SIZE), seed, total_len: 0, nb_stripes_so_far: 0, } } fn with_seed(seed: u64) -> Self { Self::new(seed, With::Custom(Secret::with_seed(seed))) } fn with_secret>>(secret: S) -> State { let secret = secret.into(); debug_assert!(secret.len() >= SECRET_SIZE_MIN); Self::new(0, With::Ref(secret)) } #[inline(always)] fn secret_limit(&self) -> usize { self.secret.len() - STRIPE_LEN } #[inline(always)] fn nb_stripes_per_block(&self) -> usize { self.secret_limit() / SECRET_CONSUME_RATE } #[inline(always)] fn update(&mut self, mut input: &[u8], acc_width: AccWidth) { let len = input.len(); if len == 0 { return; } self.total_len += len; if self.buf.len() + len <= self.buf.capacity() { self.buf.extend_from_slice(input); return; } let nb_stripes_per_block = self.nb_stripes_per_block(); let secret_limit = self.secret_limit(); if !self.buf.is_empty() { // some data within internal buffer: fill then consume it let (load, rest) = input.split_at(self.buf.capacity() - self.buf.len()); self.buf.extend_from_slice(load); input = rest; self.nb_stripes_so_far = consume_stripes( &mut self.acc, self.nb_stripes_so_far, nb_stripes_per_block, &self.buf, INTERNAL_BUFFER_STRIPES, &self.secret, secret_limit, acc_width, ); self.buf.clear(); } // consume input by full buffer quantities let mut chunks = input.chunks_exact(INTERNAL_BUFFER_SIZE); for chunk in &mut chunks { self.nb_stripes_so_far = consume_stripes( &mut self.acc, self.nb_stripes_so_far, nb_stripes_per_block, chunk, INTERNAL_BUFFER_STRIPES, &self.secret, secret_limit, acc_width, ); } // some remaining input data : buffer it self.buf.extend_from_slice(chunks.remainder()) } #[inline(always)] fn digest_long(&self, acc_width: AccWidth) -> Acc { let mut acc = self.acc.clone(); let secret_limit = self.secret_limit(); if self.buf.len() >= STRIPE_LEN { // digest locally, state remains unaltered, and can continue ingesting more data afterwards let total_nb_stripes = self.buf.len() / STRIPE_LEN; let _nb_stripes_so_far = consume_stripes( &mut acc, self.nb_stripes_so_far, self.nb_stripes_per_block(), &self.buf, total_nb_stripes, &self.secret, secret_limit, acc_width, ); if (self.buf.len() % STRIPE_LEN) != 0 { unsafe { accumulate512( &mut acc, &self.buf[self.buf.len() - STRIPE_LEN..], &self.secret[secret_limit - SECRET_LASTACC_START..], acc_width, ); } } } else if !self.buf.is_empty() { // one last stripe let mut last_stripe = [0u8; STRIPE_LEN]; let catchup_size = STRIPE_LEN - self.buf.len(); last_stripe[..catchup_size].copy_from_slice(unsafe { slice::from_raw_parts( self.buf.as_ptr().add(self.buf.capacity() - catchup_size), catchup_size, ) }); last_stripe[catchup_size..].copy_from_slice(&self.buf); unsafe { accumulate512( &mut acc, &last_stripe[..], &self.secret[secret_limit - SECRET_LASTACC_START..], acc_width, ); } } acc } #[inline(always)] fn digest64(&self) -> u64 { if self.total_len > MIDSIZE_MAX { let acc = self.digest_long(AccWidth::Acc64Bits); merge_accs( &acc, &self.secret[SECRET_MERGEACCS_START..], (self.total_len as u64).wrapping_mul(PRIME64_1), ) } else if self.seed != 0 { hash64_with_seed(&self.buf, self.seed) } else { hash64_with_secret(&self.buf, &self.secret[..self.secret_limit() + STRIPE_LEN]) } } #[inline(always)] fn digest128(&self) -> u128 { let secret_limit = self.secret_limit(); if self.total_len > MIDSIZE_MAX { let acc = self.digest_long(AccWidth::Acc128Bits); debug_assert!(secret_limit + STRIPE_LEN >= ACC_SIZE + SECRET_MERGEACCS_START); let total_len = self.total_len as u64; let low64 = merge_accs( &acc, &self.secret[SECRET_MERGEACCS_START..], total_len.wrapping_mul(PRIME64_1), ); let high64 = merge_accs( &acc, &self.secret[secret_limit + STRIPE_LEN - ACC_SIZE - SECRET_MERGEACCS_START..], !total_len.wrapping_mul(PRIME64_2), ); u128::from(low64) + (u128::from(high64) << 64) } else if self.seed != 0 { hash128_with_seed(&self.buf, self.seed) } else { hash128_with_secret(&self.buf, &self.secret[..secret_limit + STRIPE_LEN]) } } } #[inline(always)] #[allow(clippy::too_many_arguments)] fn consume_stripes( acc: &mut [u64], nb_stripes_so_far: usize, nb_stripes_per_block: usize, data: &[u8], total_stripes: usize, secret: &[u8], secret_limit: usize, acc_width: AccWidth, ) -> usize { debug_assert!(nb_stripes_so_far < nb_stripes_per_block); if nb_stripes_per_block - nb_stripes_so_far <= total_stripes { let nb_stripes = nb_stripes_per_block - nb_stripes_so_far; accumulate( acc, data, &secret[nb_stripes_so_far * SECRET_CONSUME_RATE..], nb_stripes, acc_width, ); unsafe { scramble_acc(acc, &secret[secret_limit..]); } accumulate( acc, &data[nb_stripes * STRIPE_LEN..], secret, total_stripes - nb_stripes, acc_width, ); total_stripes - nb_stripes } else { accumulate( acc, data, &secret[nb_stripes_so_far * SECRET_CONSUME_RATE..], total_stripes, acc_width, ); nb_stripes_so_far + total_stripes } } /* ========================================== * XXH3 128 bits (=> XXH128) * ========================================== */ #[inline(always)] fn hash_len_0to16_128bits(data: &[u8], len: usize, secret: &[u8], seed: u64) -> u128 { debug_assert!(len <= 16); if len > 8 { hash_len_9to16_128bits(data, len, secret, seed) } else if len >= 4 { hash_len_4to8_128bits(data, len, secret, seed) } else if len > 0 { hash_len_1to3_128bits(data, len, secret, seed) } else { 0 } } #[inline(always)] fn hash_len_1to3_128bits(data: &[u8], len: usize, key: &[u8], seed: u64) -> u128 { debug_assert!((1..=3).contains(&len)); let c1 = u32::from(data[0]); let c2 = u32::from(data[len >> 1]); let c3 = u32::from(data[len - 1]); let combinedl = c1 + (c2 << 8) + (c3 << 16) + ((len as u32) << 24); let combinedh = combinedl.swap_bytes(); let keyedl = u64::from(combinedl) ^ u64::from(key.read_u32_le()).wrapping_add(seed); let keyedh = u64::from(combinedh) ^ u64::from(key[4..].read_u32_le()).wrapping_sub(seed); let mixedl = keyedl.wrapping_mul(PRIME64_1); let mixedh = keyedh.wrapping_mul(PRIME64_2); u128::from(avalanche(mixedl)) + (u128::from(avalanche(mixedh)) << 64) } #[inline(always)] fn hash_len_4to8_128bits(data: &[u8], len: usize, key: &[u8], seed: u64) -> u128 { debug_assert!((4..=8).contains(&len)); let in1 = u64::from(data.read_u32_le()); let in2 = u64::from(data[len - 4..].read_u32_le()); let in64l = in1.wrapping_add(in2 << 32); let in64h = in64l.swap_bytes(); let keyedl = in64l ^ key.read_u64_le().wrapping_add(seed); let keyedh = in64h ^ key[8..].read_u64_le().wrapping_sub(seed); let mix64l1 = (len as u64).wrapping_add((keyedl ^ (keyedl >> 51)).wrapping_mul(u64::from(PRIME32_1))); let mix64l2 = (mix64l1 ^ (mix64l1 >> 47)).wrapping_mul(PRIME64_2); let mix64h1 = (keyedh ^ (keyedh >> 47)) .wrapping_mul(PRIME64_1) .wrapping_sub(len as u64); let mix64h2 = (mix64h1 ^ (mix64h1 >> 43)).wrapping_mul(PRIME64_4); u128::from(avalanche(mix64l2)) + (u128::from(avalanche(mix64h2)) << 64) } #[inline(always)] fn hash_len_9to16_128bits(data: &[u8], len: usize, key: &[u8], seed: u64) -> u128 { debug_assert!((9..=16).contains(&len)); let ll1 = data.read_u64_le() ^ key.read_u64_le().wrapping_add(seed); let ll2 = data[len - 8..].read_u64_le() ^ key[8..].read_u64_le().wrapping_sub(seed); let inlow = ll1 ^ ll2; let m128 = u128::from(inlow).wrapping_mul(u128::from(PRIME64_1)); let high64 = ((m128 >> 64) as u64).wrapping_add(ll2.wrapping_mul(PRIME64_1)); let low64 = (m128 as u64) ^ (high64 >> 32); let h128 = u128::from(low64).wrapping_mul(u128::from(PRIME64_2)); let high64 = ((h128 >> 64) as u64).wrapping_add(high64.wrapping_mul(PRIME64_2)); let low64 = h128 as u64; u128::from(avalanche(low64)) + (u128::from(avalanche(high64)) << 64) } #[inline(always)] fn hash_len_17to128_128bits(data: &[u8], len: usize, secret: &[u8], seed: u64) -> u128 { debug_assert!((17..=128).contains(&len)); debug_assert!(secret.len() >= SECRET_SIZE_MIN); let mut acc1 = PRIME64_1.wrapping_mul(len as u64); let mut acc2 = 0u64; if len > 32 { if len > 64 { if len > 96 { acc1 = acc1.wrapping_add(mix_16bytes(&data[48..], &secret[96..], seed)); acc2 = acc2.wrapping_add(mix_16bytes(&data[len - 64..], &secret[112..], seed)); } acc1 = acc1.wrapping_add(mix_16bytes(&data[32..], &secret[64..], seed)); acc2 = acc2.wrapping_add(mix_16bytes(&data[len - 48..], &secret[80..], seed)); } acc1 = acc1.wrapping_add(mix_16bytes(&data[16..], &secret[32..], seed)); acc2 = acc2.wrapping_add(mix_16bytes(&data[len - 32..], &secret[48..], seed)); } acc1 = acc1.wrapping_add(mix_16bytes(data, secret, seed)); acc2 = acc2.wrapping_add(mix_16bytes(&data[len - 16..], &secret[16..], seed)); let low64 = acc1.wrapping_add(acc2); let high64 = acc1 .wrapping_mul(PRIME64_1) .wrapping_add(acc2.wrapping_mul(PRIME64_4)) .wrapping_add((len as u64).wrapping_sub(seed).wrapping_mul(PRIME64_2)); u128::from(avalanche(low64)) + (u128::from(0u64.wrapping_sub(avalanche(high64))) << 64) } #[inline(always)] fn hash_len_129to240_128bits(data: &[u8], len: usize, secret: &[u8], seed: u64) -> u128 { debug_assert!((129..=MIDSIZE_MAX).contains(&len)); debug_assert!(secret.len() >= SECRET_SIZE_MIN); let acc1 = (len as u64).wrapping_mul(PRIME64_1); let acc2 = 0u64; let (acc1, acc2) = (0..4).fold((acc1, acc2), |(acc1, acc2), i| { ( acc1.wrapping_add(mix_16bytes(&data[32 * i..], &secret[32 * i..], seed)), acc2.wrapping_add(mix_16bytes( &data[32 * i + 16..], &secret[32 * i + 16..], 0u64.wrapping_sub(seed), )), ) }); let acc1 = avalanche(acc1); let acc2 = avalanche(acc2); let nb_rounds = len / 32; debug_assert!(nb_rounds >= 4); let (acc1, acc2) = (4..nb_rounds).fold((acc1, acc2), |(acc1, acc2), i| { ( acc1.wrapping_add(mix_16bytes( &data[32 * i..], &secret[32 * (i - 4) + MIDSIZE_STARTOFFSET..], seed, )), acc2.wrapping_add(mix_16bytes( &data[32 * i + 16..], &secret[32 * (i - 4) + 16 + MIDSIZE_STARTOFFSET..], 0u64.wrapping_sub(seed), )), ) }); // last bytes let acc1 = acc1.wrapping_add(mix_16bytes( &data[len - 16..], &secret[SECRET_SIZE_MIN - MIDSIZE_LASTOFFSET..], seed, )); let acc2 = acc2.wrapping_add(mix_16bytes( &data[len - 32..], &secret[SECRET_SIZE_MIN - MIDSIZE_LASTOFFSET - 16..], 0u64.wrapping_sub(seed), )); let low64 = acc1.wrapping_add(acc2); let high64 = acc1 .wrapping_mul(PRIME64_1) .wrapping_add(acc2.wrapping_mul(PRIME64_4)) .wrapping_add((len as u64).wrapping_sub(seed).wrapping_mul(PRIME64_2)); u128::from(avalanche(low64)) + (u128::from(0u64.wrapping_sub(avalanche(high64))) << 64) } #[inline] fn hash_long_128bits_with_default_secret(data: &[u8], len: usize) -> u128 { hash_long_128bits_internal(data, len, &SECRET) } #[inline] fn hash_long_128bits_with_secret(data: &[u8], len: usize, secret: &[u8]) -> u128 { hash_long_128bits_internal(data, len, secret) } #[inline] fn hash_long_128bits_with_seed(data: &[u8], len: usize, seed: u64) -> u128 { if seed == 0 { hash_long_128bits_with_default_secret(data, len) } else { let secret = Secret::with_seed(seed); hash_long_128bits_internal(data, len, &secret) } } #[inline(always)] fn hash_long_128bits_internal(data: &[u8], len: usize, secret: &[u8]) -> u128 { let mut acc = Acc::default(); hash_long_internal_loop(&mut acc, data, len, secret, AccWidth::Acc128Bits); debug_assert!(secret.len() >= acc.len() + SECRET_MERGEACCS_START); let low64 = merge_accs( &acc, &secret[SECRET_MERGEACCS_START..], (len as u64).wrapping_mul(PRIME64_1), ); let high64 = merge_accs( &acc, &secret[secret.len() - ACC_SIZE - SECRET_MERGEACCS_START..], !(len as u64).wrapping_mul(PRIME64_2), ); u128::from(low64) + (u128::from(high64) << 64) } /* === XXH3 128-bit streaming === */ /* all the functions are actually the same as for 64-bit streaming variant, just the reset one is different (different initial acc values for 0,5,6,7), and near the end of the digest function */ #[cfg(test)] mod tests { use alloc::vec; use super::*; const PRIME: u64 = 2654435761; const PRIME64: u64 = 11400714785074694797; const SANITY_BUFFER_SIZE: usize = 2243; fn sanity_buffer() -> [u8; SANITY_BUFFER_SIZE] { let mut buf = [0; SANITY_BUFFER_SIZE]; let mut byte_gen: u64 = PRIME; for b in buf.iter_mut() { *b = (byte_gen >> 56) as u8; byte_gen = byte_gen.wrapping_mul(PRIME64); } buf } #[test] fn hash_64bits_sanity_check() { let buf = sanity_buffer(); let test_cases = vec![ (&[][..], 0, 0), /* zero-length hash is always 0 */ (&[][..], PRIME64, 0), (&buf[..1], 0, 0x7198D737CFE7F386), /* 1 - 3 */ (&buf[..1], PRIME64, 0xB70252DB7161C2BD), /* 1 - 3 */ (&buf[..6], 0, 0x22CBF5F3E1F6257C), /* 4 - 8 */ (&buf[..6], PRIME64, 0x6398631C12AB94CE), /* 4 - 8 */ (&buf[..12], 0, 0xD5361CCEEBB5A0CC), /* 9 - 16 */ (&buf[..12], PRIME64, 0xC4C125E75A808C3D), /* 9 - 16 */ (&buf[..24], 0, 0x46796F3F78B20F6B), /* 17 - 32 */ (&buf[..24], PRIME64, 0x60171A7CD0A44C10), /* 17 - 32 */ (&buf[..48], 0, 0xD8D4D3590D136E11), /* 33 - 64 */ (&buf[..48], PRIME64, 0x05441F2AEC2A1296), /* 33 - 64 */ (&buf[..80], 0, 0xA1DC8ADB3145B86A), /* 65 - 96 */ (&buf[..80], PRIME64, 0xC9D55256965B7093), /* 65 - 96 */ (&buf[..112], 0, 0xE43E5717A61D3759), /* 97 -128 */ (&buf[..112], PRIME64, 0x5A5F89A3FECE44A5), /* 97 -128 */ (&buf[..195], 0, 0x6F747739CBAC22A5), /* 129-240 */ (&buf[..195], PRIME64, 0x33368E23C7F95810), /* 129-240 */ (&buf[..403], 0, 0x4834389B15D981E8), /* one block, last stripe is overlapping */ (&buf[..403], PRIME64, 0x85CE5DFFC7B07C87), /* one block, last stripe is overlapping */ (&buf[..512], 0, 0x6A1B982631F059A8), /* one block, finishing at stripe boundary */ (&buf[..512], PRIME64, 0x10086868CF0ADC99), /* one block, finishing at stripe boundary */ (&buf[..2048], 0, 0xEFEFD4449323CDD4), /* 2 blocks, finishing at block boundary */ (&buf[..2048], PRIME64, 0x01C85E405ECA3F6E), /* 2 blocks, finishing at block boundary */ (&buf[..2240], 0, 0x998C0437486672C7), /* 3 blocks, finishing at stripe boundary */ (&buf[..2240], PRIME64, 0x4ED38056B87ABC7F), /* 3 blocks, finishing at stripe boundary */ (&buf[..2243], 0, 0xA559D20581D742D3), /* 3 blocks, last stripe is overlapping */ (&buf[..2243], PRIME64, 0x96E051AB57F21FC8), /* 3 blocks, last stripe is overlapping */ ]; for (buf, seed, result) in test_cases { { let hash = hash64_with_seed(buf, seed); assert_eq!( hash, result, "hash64_with_seed(&buf[..{}], seed={}) failed, got 0x{:X}, expected 0x{:X}", buf.len(), seed, hash, result ); } // streaming API test // single ingestio { let mut hasher = Hash64::with_seed(seed); hasher.write(buf); let hash = hasher.finish(); assert_eq!( hash, result, "Hash64::update(&buf[..{}]) with seed={} failed, got 0x{:X}, expected 0x{:X}", buf.len(), seed, hash, result ); } if buf.len() > 3 { // 2 ingestions let mut hasher = Hash64::with_seed(seed); hasher.write(&buf[..3]); hasher.write(&buf[3..]); let hash = hasher.finish(); assert_eq!( hash, result, "Hash64::update(&buf[..3], &buf[3..{}]) with seed={} failed, got 0x{:X}, expected 0x{:X}", buf.len(), seed, hash, result ); } // byte by byte ingestion { let mut hasher = Hash64::with_seed(seed); for chunk in buf.chunks(1) { hasher.write(chunk); } let hash = hasher.finish(); assert_eq!( hash, result, "Hash64::update(&buf[..{}].chunks(1)) with seed={} failed, got 0x{:X}, expected 0x{:X}", buf.len(), seed, hash, result ); } } } #[test] fn hash_64bits_with_secret_sanity_check() { let buf = sanity_buffer(); let secret = &buf[7..7 + SECRET_SIZE_MIN + 11]; let test_cases = vec![ (&[][..], secret, 0), /* zero-length hash is always 0 */ (&buf[..1], secret, 0x7F69735D618DB3F0), /* 1 - 3 */ (&buf[..6], secret, 0xBFCC7CB1B3554DCE), /* 6 - 8 */ (&buf[..12], secret, 0x8C50DC90AC9206FC), /* 9 - 16 */ (&buf[..24], secret, 0x1CD2C2EE9B9A0928), /* 17 - 32 */ (&buf[..48], secret, 0xA785256D9D65D514), /* 33 - 64 */ (&buf[..80], secret, 0x6F3053360D21BBB7), /* 65 - 96 */ (&buf[..112], secret, 0x560E82D25684154C), /* 97 -128 */ (&buf[..195], secret, 0xBA5BDDBC5A767B11), /* 129-240 */ (&buf[..403], secret, 0xFC3911BBA656DB58), /* one block, last stripe is overlapping */ (&buf[..512], secret, 0x306137DD875741F1), /* one block, finishing at stripe boundary */ (&buf[..2048], secret, 0x2836B83880AD3C0C), /* > one block, at least one scrambling */ (&buf[..2243], secret, 0x3446E248A00CB44A), /* > one block, at least one scrambling, last stripe unaligned */ ]; for (buf, secret, result) in test_cases { { let hash = hash64_with_secret(buf, secret); assert_eq!( hash, result, "hash64_with_secret(&buf[..{}], secret) failed, got 0x{:X}, expected 0x{:X}", buf.len(), hash, result ); } // streaming API test // single ingestio { let mut hasher = Hash64::with_secret(secret); hasher.write(buf); let hash = hasher.finish(); assert_eq!( hash, result, "Hash64::update(&buf[..{}]) with secret failed, got 0x{:X}, expected 0x{:X}", buf.len(), hash, result ); } // byte by byte ingestion { let mut hasher = Hash64::with_secret(secret); for chunk in buf.chunks(1) { hasher.write(chunk); } let hash = hasher.finish(); assert_eq!( hash, result, "Hash64::update(&buf[..{}].chunks(1)) with secret failed, got 0x{:X}, expected 0x{:X}", buf.len(), hash, result ); } } } #[test] fn hash_128bits_sanity_check() { let buf = sanity_buffer(); let test_cases = vec![ (&[][..], 0, 0u64, 0u64), /* zero-length hash is { seed, -seed } by default */ (&[][..], PRIME, 0, 0), (&buf[..1], 0, 0x7198D737CFE7F386, 0x3EE70EA338F3F1E8), /* 1-3 */ (&buf[..1], PRIME, 0x8E05996EC27C0F46, 0x90DFC659A8BDCC0C), /* 1-3 */ (&buf[..6], 0, 0x22CBF5F3E1F6257C, 0xD4E6C2B94FFC3BFA), /* 4-8 */ (&buf[..6], PRIME, 0x97B28D3079F8541F, 0xEFC0B954298E6555), /* 4-8 */ (&buf[..12], 0, 0x0E0CD01F05AC2F0D, 0x2B55C95951070D4B), /* 9-16 */ (&buf[..12], PRIME, 0xA9DE561CA04CDF37, 0x609E31FDC00A43C9), /* 9-16 */ (&buf[..24], 0, 0x46796F3F78B20F6B, 0x58FF55C3926C13FA), /* 17-32 */ (&buf[..24], PRIME, 0x30D5C4E9EB415C55, 0x8868344B3A4645D0), /* 17-32 */ (&buf[..48], 0, 0xD8D4D3590D136E11, 0x5527A42843020A62), /* 33-64 */ (&buf[..48], PRIME, 0x1D8834E1A5407A1C, 0x44375B9FB060F541), /* 33-64 */ (&buf[..81], 0, 0x4B9B448ED8DFD3DD, 0xE805A6D1A43D70E5), /* 65-96 */ (&buf[..81], PRIME, 0xD2D6B075945617BA, 0xE58BE5736F6E7550), /* 65-96 */ (&buf[..103], 0, 0xC5A9F97B29EFA44E, 0x254DB7BE881E125C), /* 97-128 */ (&buf[..103], PRIME, 0xFA2086367CDB177F, 0x0AEDEA68C988B0C0), /* 97-128 */ (&buf[..192], 0, 0xC3142FDDD9102A3F, 0x06F1747E77185F97), /* 129-240 */ (&buf[..192], PRIME, 0xA89F07B35987540F, 0xCF1B35FB2C557F54), /* 129-240 */ (&buf[..222], 0, 0xA61AC4EB3295F86B, 0x33FA7B7598C28A07), /* 129-240 */ (&buf[..222], PRIME, 0x54135EB88AD8B75E, 0xBC45CE6AE50BCF53), /* 129-240 */ (&buf[..403], 0, 0xB0C48E6D18E9D084, 0xB16FC17E992FF45D), /* one block, last stripe is overlapping */ (&buf[..403], PRIME64, 0x0A1D320C9520871D, 0xCE11CB376EC93252), /* one block, last stripe is overlapping */ (&buf[..512], 0, 0xA03428558AC97327, 0x4ECF51281BA406F7), /* one block, finishing at stripe boundary */ (&buf[..512], PRIME64, 0xAF67A482D6C893F2, 0x1382D92F25B84D90), /* one block, finishing at stripe boundary */ (&buf[..2048], 0, 0x21901B416B3B9863, 0x212AF8E6326F01E0), /* two blocks, finishing at block boundary */ (&buf[..2048], PRIME, 0xBDBB2282577DADEC, 0xF78CDDC2C9A9A692), /* two blocks, finishing at block boundary */ (&buf[..2240], 0, 0x00AD52FA9385B6FE, 0xC705BAD3356CE302), /* two blocks, ends at stripe boundary */ (&buf[..2240], PRIME, 0x10FD0072EC68BFAA, 0xE1312F3458817F15), /* two blocks, ends at stripe boundary */ (&buf[..2237], 0, 0x970C91411533862C, 0x4BBD06FF7BFF0AB1), /* two blocks, ends at stripe boundary */ (&buf[..2237], PRIME, 0xD80282846D814431, 0x14EBB157B84D9785), /* two blocks, ends at stripe boundary */ ]; for (buf, seed, lo, hi) in test_cases { let result = u128::from(lo) + (u128::from(hi) << 64); { let hash = hash128_with_seed(buf, seed); assert_eq!( hash, result, "hash128_with_seed(&buf[..{}], seed={}) failed, got 0x{:X}, expected 0x{:X}", buf.len(), seed, hash, result ); } // streaming API test // single ingestio { let mut hasher = Hash128::with_seed(seed); hasher.write(buf); let hash = hasher.finish_ext(); assert_eq!( hash, result, "Hash128::update(&buf[..{}]) with seed={} failed, got 0x{:X}, expected 0x{:X}", buf.len(), seed, hash, result ); } if buf.len() > 3 { // 2 ingestions let mut hasher = Hash128::with_seed(seed); hasher.write(&buf[..3]); hasher.write(&buf[3..]); let hash = hasher.finish_ext(); assert_eq!( hash, result, "Hash64::update(&buf[..3], &buf[3..{}]) with seed={} failed, got 0x{:X}, expected 0x{:X}", buf.len(), seed, hash, result ); } // byte by byte ingestion { let mut hasher = Hash128::with_seed(seed); for chunk in buf.chunks(1) { hasher.write(chunk); } let hash = hasher.finish_ext(); assert_eq!( hash, result, "Hash64::update(&buf[..{}].chunks(1)) with seed={} failed, got 0x{:X}, expected 0x{:X}", buf.len(), seed, hash, result ); } } } }