foldhash-0.1.4/.cargo_vcs_info.json0000644000000001360000000000100126260ustar { "git": { "sha1": "f2f514c17eeec8e06277836bc2b49c9ca2bc24ce" }, "path_in_vcs": "" }foldhash-0.1.4/.gitignore000064400000000000000000000000611046102023000134030ustar 00000000000000/target /Cargo.lock /bench_results /out .DS_Storefoldhash-0.1.4/Cargo.toml0000644000000026740000000000100106350ustar # THIS FILE IS AUTOMATICALLY GENERATED BY CARGO # # When uploading crates to the registry Cargo will automatically # "normalize" Cargo.toml files for maximal compatibility # with all versions of Cargo and also rewrite `path` dependencies # to registry (e.g., crates.io) dependencies. # # If you are reading this file be aware that the original Cargo.toml # will likely look very different (and much more reasonable). # See Cargo.toml.orig for the original contents. [package] edition = "2021" name = "foldhash" version = "0.1.4" authors = ["Orson Peters "] build = false exclude = [ "benches", "tools", "assets", ] autolib = false autobins = false autoexamples = false autotests = false autobenches = false description = "A fast, non-cryptographic, minimally DoS-resistant hashing algorithm." readme = "README.md" keywords = [ "hash", "hasher", "no-std", ] categories = [ "algorithms", "no-std", ] license = "Zlib" repository = "https://github.com/orlp/foldhash" [profile.release] lto = "thin" [lib] name = "foldhash" path = "src/lib.rs" bench = false [dependencies] [dev-dependencies.ahash] version = "0.8" [dev-dependencies.chrono] version = "0.4" [dev-dependencies.criterion] version = "0.5" [dev-dependencies.fxhash] version = "0.2" [dev-dependencies.hashbrown] version = "0.14" [dev-dependencies.rand] version = "0.8" [dev-dependencies.uuid] version = "1.8" [features] default = ["std"] std = [] foldhash-0.1.4/Cargo.toml.orig000064400000000000000000000014171046102023000143100ustar 00000000000000[package] name = "foldhash" version = "0.1.4" authors = ["Orson Peters "] license = "Zlib" repository = "https://github.com/orlp/foldhash" readme = "README.md" keywords = ["hash", "hasher", "no-std"] categories = ["algorithms", "no-std"] description = "A fast, non-cryptographic, minimally DoS-resistant hashing algorithm." edition = "2021" exclude = ["benches", "tools", "assets"] [features] default = ["std"] std = [] [dependencies] [dev-dependencies] criterion = "0.5" hashbrown = "0.14" uuid = "1.8" rand = "0.8" ahash = "0.8" fxhash = "0.2" chrono = "0.4" [lib] bench = false [[bench]] name = "bench" path = "benches/bench.rs" harness = false [[bench]] name = "avalanche" path = "benches/avalanche.rs" harness = false [profile.release] lto = "thin"foldhash-0.1.4/LICENSE000064400000000000000000000015301046102023000124220ustar 00000000000000Copyright (c) 2024 Orson Peters This software is provided 'as-is', without any express or implied warranty. In no event will the authors be held liable for any damages arising from the use of this software. Permission is granted to anyone to use this software for any purpose, including commercial applications, and to alter it and redistribute it freely, subject to the following restrictions: 1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. 2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. 3. This notice may not be removed or altered from any source distribution.foldhash-0.1.4/README.md000064400000000000000000000431251046102023000127020ustar 00000000000000# Foldhash This repository contains foldhash, a fast, non-cryptographic, minimally DoS-resistant hashing algorithm implemented in Rust designed for computational uses such as hash maps, bloom filters, count sketching, etc. When should you **not** use foldhash: - You are afraid of people studying your long-running program's behavior to reverse engineer its internal random state and using this knowledge to create many colliding inputs for computational complexity attacks. For more details see the section "HashDoS resistance". - You expect foldhash to have a consistent output across versions or platforms, such as for persistent file formats or communication protocols. - You are relying on foldhash's properties for any kind of security. Foldhash is **not appropriate for any cryptographic purpose**. Foldhash has two variants, one optimized for speed which is ideal for data structures such as hash maps and bloom filters, and one optimized for statistical quality which is ideal for algorithms such as [HyperLogLog](https://en.wikipedia.org/wiki/HyperLogLog) and [MinHash](https://en.wikipedia.org/wiki/MinHash). Foldhash can be used in a `#![no_std]` environment by disabling its default `"std"` feature. ## Performance We evaluated foldhash against three commonly used hashes in Rust: [aHash](https://github.com/tkaitchuck/aHash) v0.8.11, [fxhash](https://github.com/cbreeden/fxhash) v0.2.1, and [SipHash-1-3](https://en.wikipedia.org/wiki/SipHash), the default hash algorithm in Rust at the time of writing. We evaluated both variants foldhash provides, `foldhash-f` and `foldhash-q`, which correspond to `foldhash::fast` and `foldhash::quality` in the crate respectively. First we note that hashers with random state inflate the size of your `HashMap`, which may or may not be important for your performance: ```rust std::mem::size_of::>() = 40 // (both variants) std::mem::size_of::>() = 64 std::mem::size_of::>() = 32 std::mem::size_of::>() = 48 ``` We tested runtime performance on two machines, one with a 2023 Apple M2 CPU, one with a 2023 Intel Xeon Platinum 8481C server CPU, both with stable Rust 1.80.1. Since one of our competitors (aHash) is reliant on AES-based instructions for optimal performance we have included both a benchmark with and without `-C target-cpu=native` for the Intel machine. We tested across a wide variety of data types we consider representative of types / distributions one might hash in the real world, in the context of a hash table key: - `u32` - random 32-bit unsigned integers, - `u32pair` - pairs of random 32-bit unsigned integers, - `u64` - random 64-bit unsigned integers, - `u64pair` - pairs of random 64-bit unsigned integers, - `u64lobits` - 64-bit unsigned integers where only the bottom 16 bits vary, - `u64hibits` - 64-bit unsigned integers where only the top 16 bits vary, - `ipv4` - [`std::net::Ipv4Addr`](https://doc.rust-lang.org/std/net/struct.Ipv4Addr.html), which is equivalent to `[u8; 4]`, - `ipv6` - [`std::net::Ipv6Addr`](https://doc.rust-lang.org/std/net/struct.Ipv6Addr.html), which is equivalent to `[u8; 16]`, - `rgba` - random `(u8, u8, u8, u8)` tuples, - `strenglishword` - strings containing words sampled uniformly from the top 10,000 most common English words, - `struuid` - random UUIDs, hashed in string representation, - `strurl` - strings containing URLs sampled uniformly from a corpus of 10,000 URLs, - `strdate` - random `YYYY-MM-DD` date strings, - `accesslog` - `(u128, u32, chrono::NaiveDate, bool)`, meant to simulate a typical larger compound type, in this case `(resource_id, user_id, date, success)` for an access log. - `kilobyte` - random bytestrings one kilobyte in length, - `tenkilobyte` - random bytestrings ten kilobytes in length. We tested the performance of hashing the above data types in the following four contexts: - `hashonly` - only the time it takes to hash the value, - `lookupmiss` - the time it takes to do a lookup in a 1,000 element hash map of random elements, only sampling keys of which we know that are not in the hash map, - `lookuphit` - similar to `lookupmiss`, except the keys are sampled from keys known to be in the hash map, - `setbuild` - the time it takes to construct a `HashSet` of 1,000 elements from 1,000 randomly sampled elements each repeated 10 times (so 10,000 inserts, with ~90% duplicates). All times are reported as expected time per operation, so one hash, one lookup, or one insert respectively. The full results [can be found here](https://gist.github.com/orlp/1271ad5b8b775c651cc55773888858eb). To summarize, we will only show the results for `u64` and `strenglishword` here, as well as the observed geometric mean and average rank over the full benchmark. ``` Xeon 8481c ┌────────────────┬────────────┬────────────┬────────────┬─────────┬─────────┬─────────┐ │ avg_rank ┆ 1.58 ┆ 2.66 ┆ 2.09 ┆ 3.70 ┆ 4.97 │ │ geometric_mean ┆ 6.21 ┆ 7.01 ┆ 7.56 ┆ 8.74 ┆ 28.70 │ ╞════════════════╪════════════╪════════════╪════════════╪═════════╪═════════╪═════════╡ │ distr ┆ bench ┆ foldhash-f ┆ foldhash-q ┆ fxhash ┆ ahash ┆ siphash │ ╞════════════════╪════════════╪════════════╪════════════╪═════════╪═════════╪═════════╡ │ u64 ┆ hashonly ┆ 0.79 ┆ 1.03 ┆ 0.67 ┆ 1.23 ┆ 9.09 │ │ u64 ┆ lookupmiss ┆ 2.01 ┆ 2.44 ┆ 1.73 ┆ 2.73 ┆ 12.03 │ │ u64 ┆ lookuphit ┆ 3.04 ┆ 3.59 ┆ 2.64 ┆ 3.84 ┆ 12.65 │ │ u64 ┆ setbuild ┆ 6.13 ┆ 6.52 ┆ 4.88 ┆ 6.66 ┆ 17.80 │ | ... ┆ ... ┆ ... ┆ ... ┆ ... ┆ ... ┆ ... | │ strenglishword ┆ hashonly ┆ 2.63 ┆ 2.98 ┆ 3.24 ┆ 3.57 ┆ 11.87 │ │ strenglishword ┆ lookupmiss ┆ 4.63 ┆ 5.03 ┆ 4.51 ┆ 5.86 ┆ 15.19 │ │ strenglishword ┆ lookuphit ┆ 8.62 ┆ 9.25 ┆ 8.28 ┆ 10.06 ┆ 21.35 │ │ strenglishword ┆ setbuild ┆ 14.77 ┆ 15.57 ┆ 18.86 ┆ 15.72 ┆ 35.36 │ └────────────────┴────────────┴────────────┴────────────┴─────────┴─────────┴─────────┘ Xeon 8481c with RUSTFLAGS="-C target-cpu=native" ┌────────────────┬────────────┬────────────┬────────────┬─────────┬─────────┬─────────┐ │ avg_rank ┆ 1.89 ┆ 3.12 ┆ 2.25 ┆ 2.77 ┆ 4.97 │ │ geometric_mean ┆ 6.00 ┆ 6.82 ┆ 7.39 ┆ 6.94 ┆ 29.49 │ ╞════════════════╪════════════╪════════════╪════════════╪═════════╪═════════╪═════════╡ │ distr ┆ bench ┆ foldhash-f ┆ foldhash-q ┆ fxhash ┆ ahash ┆ siphash │ ╞════════════════╪════════════╪════════════╪════════════╪═════════╪═════════╪═════════╡ │ u64 ┆ hashonly ┆ 0.79 ┆ 1.01 ┆ 0.67 ┆ 1.34 ┆ 9.24 │ │ u64 ┆ lookupmiss ┆ 1.68 ┆ 2.12 ┆ 1.62 ┆ 1.96 ┆ 12.04 │ │ u64 ┆ lookuphit ┆ 2.68 ┆ 3.19 ┆ 2.28 ┆ 3.16 ┆ 13.09 │ │ u64 ┆ setbuild ┆ 6.16 ┆ 6.42 ┆ 4.75 ┆ 7.03 ┆ 18.88 │ | ... ┆ ... ┆ ... ┆ ... ┆ ... ┆ ... ┆ ... | │ strenglishword ┆ hashonly ┆ 2.60 ┆ 2.97 ┆ 3.25 ┆ 3.04 ┆ 11.58 │ │ strenglishword ┆ lookupmiss ┆ 4.41 ┆ 4.96 ┆ 4.82 ┆ 4.79 ┆ 32.31 │ │ strenglishword ┆ lookuphit ┆ 8.68 ┆ 9.35 ┆ 8.46 ┆ 8.63 ┆ 21.48 │ │ strenglishword ┆ setbuild ┆ 15.01 ┆ 16.34 ┆ 19.34 ┆ 15.37 ┆ 35.22 │ └────────────────┴────────────┴────────────┴────────────┴─────────┴─────────┴─────────┘ Apple M2 ┌────────────────┬────────────┬────────────┬────────────┬─────────┬─────────┬─────────┐ │ avg_rank ┆ 1.62 ┆ 2.81 ┆ 2.02 ┆ 3.58 ┆ 4.97 │ │ geometric_mean ┆ 4.41 ┆ 4.86 ┆ 5.39 ┆ 5.71 ┆ 21.94 │ ╞════════════════╪════════════╪════════════╪════════════╪═════════╪═════════╪═════════╡ │ distr ┆ bench ┆ foldhash-f ┆ foldhash-q ┆ fxhash ┆ ahash ┆ siphash │ ╞════════════════╪════════════╪════════════╪════════════╪═════════╪═════════╪═════════╡ │ u64 ┆ hashonly ┆ 0.60 ┆ 0.70 ┆ 0.41 ┆ 0.78 ┆ 6.61 │ │ u64 ┆ lookupmiss ┆ 1.50 ┆ 1.61 ┆ 1.23 ┆ 1.65 ┆ 8.28 │ │ u64 ┆ lookuphit ┆ 1.78 ┆ 2.10 ┆ 1.57 ┆ 2.25 ┆ 8.53 │ │ u64 ┆ setbuild ┆ 4.74 ┆ 5.19 ┆ 3.61 ┆ 5.38 ┆ 15.36 │ | ... ┆ ... ┆ ... ┆ ... ┆ ... ┆ ... ┆ ... | │ strenglishword ┆ hashonly ┆ 1.84 ┆ 2.13 ┆ 1.85 ┆ 2.13 ┆ 11.61 │ │ strenglishword ┆ lookupmiss ┆ 2.71 ┆ 2.96 ┆ 2.47 ┆ 2.99 ┆ 9.27 │ │ strenglishword ┆ lookuphit ┆ 7.54 ┆ 8.77 ┆ 7.83 ┆ 8.77 ┆ 18.65 │ │ strenglishword ┆ setbuild ┆ 16.61 ┆ 17.09 ┆ 14.83 ┆ 16.52 ┆ 26.42 │ └────────────────┴────────────┴────────────┴────────────┴─────────┴─────────┴─────────┘ ``` We note from the above benchmark that for hash table performance the extra quality that `foldhash-q` provides is almost never actually worth the small but also non-negligible computational overhead it has over `foldhash-f`. This is our justification for providing `foldhash::fast` as a default choice for hash tables, even though it has measurable biases (see also the "Quality" section). fxhash generally does fairly well for small inputs on the benchmarks, however it has structural weaknesses as a hash which makes it ill-advised to use as a general-purpose hash function in our opinion. For example the `lookuphit` benchmark on Apple M2 for `u64hibits` takes 1.77 nanoseconds per lookup for foldhash, but 67.72 nanoseconds for fxhash (due to everything colliding - the effects would be even worse with a larger hash map). In our opinion foldhash-f strikes the right balance between quality and performance for hash tables, whereas fxhash flies a bit too close to the sun. aHash is faster than foldhash for medium-long strings when compiled with AES instruction support, but is slower in almost every other scenario or when AES instructions are unavailable. ## Quality Foldhash-f is a fairly strong hash in terms of collisions *on its full 64-bit output*. However, statistical tests such as [SMHasher3](https://gitlab.com/fwojcik/smhasher3) can distinguish it from an ideal hash function in tests that focus on the relationship between individual input/output bits. One such property is avalanching: changing a single bit in the input does not flip every other bit with 50% probability when using foldhash-f like it should if it behaved like a proper random oracle. As the benchmarks above show, spending more effort in foldhash-f to improve the hash quality does not lead to better hash table performance. However, there are also use cases for hash functions where it is important that (each bit of) the hash is unbiased and a random function of all bits of the input, such as in algorithms as HyperLogLog or MinHash. For this purpose we also provide foldhash-q, which is simply a post-processed version of foldhash-f to properly avalanche all the bits. Foldhash-q passes the [SMHasher3](https://gitlab.com/fwojcik/smhasher3) test suite [without any failures](https://github.com/orlp/foldhash_smhasher3). You can also plot the worst-case probability (where 50% is ideal) that any particular output bit flips if you flip an input bit, which nicely visualizes how fxhash and foldhash-f fail this avalanche property but foldhash-q and SipHash-1-3 pass: | FxHash | Foldhash-f | Foldhash-q | SipHash-1-3 | |--------|------------|------------|-------------| | | | | ## Background The name foldhash is derived from the *folded multiply*. This technique compresses two 64-bit words into a single 64-bit word while simultaneously thoroughly mixing the bits. It does this using a 64 x 64 bit -> 128 bit multiplication followed by folding the two halves of the 128-bit product together using a XOR operation: ```rust let full = (x as u128) * (y as u128); let lo = full as u64; let hi = (full >> 64) as u64; let folded = lo ^ hi; ``` We're not aware of a formal analysis of this operation, but empirically it works very well. An informal intuition for why it should work is that multiplication can be seen as the sum of many shifted copies of one of the arguments, only including those shifted copies where the other argument has set bits, e.g. for multiplying 4-bit words `abcd` and `efgh`: ``` abcd * efgh = abcd * e abcd * f abcd * g abcd * h --------------- + ``` Note that the middle bits of the product are a function of many of the input bits, whereas the top-most and bottom-most bits are impacted by fewer of the input bits. By folding the top half back onto the bottom half these effects compensate each other, making all the output bits affected by much of the input. We did not invent the folded multiply, it was previously used in essentially the same way in [aHash](https://github.com/tkaitchuck/aHash), [wyhash](https://github.com/wangyi-fudan/wyhash), and [xxhash3](https://github.com/Cyan4973/xxHash). The operation was also used in [mum-hash](https://github.com/vnmakarov/mum-hash), and probably others. We do not know who originally invented it, the earliest reference we could find was Steven Fuerst [blogging about it](https://web.archive.org/web/20121213174842/http://locklessinc.com/articles/crypto_hash/) in 2012. ## HashDoS resistance The folded multiply has a fairly glaring flaw: if one of the halves is zero, the output is zero. This makes it trivial to create a large number of hash collisions (even by accident, as zeroes are a common input to hashes). To combat this, every folded multiply in foldhash has the following form: ```rust folded_multiply(input1 ^ secret1, input2 ^ secret2) ``` Here `secret1` or `secret2` are either secret random numbers generated by foldhash beforehand, or partial hash results influenced by such a secret prior. This (plus other careful design throughout the hash function) ensures that it is not possible to create a list of inputs that collide for every instance of foldhash, and also prevents certain access patterns on hash tables going quadratric by ensuring that each hash table uses a different seed and thus a different access pattern. It is these two properties that we refer to when we claim foldhash is "minimally DoS-resistant": it does the bare minimum to defeat very simple attacks. However, to be crystal clear, **foldhash does not claim to provide HashDoS resistance against interactive attackers**. For a student of cryptography it should be trivial to derive the secret values from direct observation of hash outputs, and feasible to derive the secret values from indirect observation of hashes, such as through timing attacks or hash table iteration. Once an attacker knows the secret values, they can once again create infinite hash collisions with ease. foldhash-0.1.4/src/convenience.rs000064400000000000000000000040211046102023000150440ustar 00000000000000use super::fast::{FixedState, RandomState}; /// Type alias for [`std::collections::HashMap`]. pub type HashMap = std::collections::HashMap; /// Type alias for [`std::collections::HashSet`]. pub type HashSet = std::collections::HashSet; /// A convenience extension trait to enable [`HashMap::new`] for hash maps that use `foldhash`. pub trait HashMapExt { /// Creates an empty `HashMap`. fn new() -> Self; /// Creates an empty `HashMap` with at least the specified capacity. fn with_capacity(capacity: usize) -> Self; } impl HashMapExt for std::collections::HashMap { fn new() -> Self { Self::with_hasher(RandomState::default()) } fn with_capacity(capacity: usize) -> Self { Self::with_capacity_and_hasher(capacity, RandomState::default()) } } impl HashMapExt for std::collections::HashMap { fn new() -> Self { Self::with_hasher(FixedState::default()) } fn with_capacity(capacity: usize) -> Self { Self::with_capacity_and_hasher(capacity, FixedState::default()) } } /// A convenience extension trait to enable [`HashSet::new`] for hash sets that use `foldhash`. pub trait HashSetExt { /// Creates an empty `HashSet`. fn new() -> Self; /// Creates an empty `HashSet` with at least the specified capacity. fn with_capacity(capacity: usize) -> Self; } impl HashSetExt for std::collections::HashSet { fn new() -> Self { Self::with_hasher(RandomState::default()) } fn with_capacity(capacity: usize) -> Self { Self::with_capacity_and_hasher(capacity, RandomState::default()) } } impl HashSetExt for std::collections::HashSet { fn new() -> Self { Self::with_hasher(FixedState::default()) } fn with_capacity(capacity: usize) -> Self { Self::with_capacity_and_hasher(capacity, FixedState::default()) } } foldhash-0.1.4/src/lib.rs000064400000000000000000000330751046102023000133310ustar 00000000000000//! This crate provides foldhash, a fast, non-cryptographic, minimally //! DoS-resistant hashing algorithm designed for computational uses such as //! hashmaps, bloom filters, count sketching, etc. //! //! When should you **not** use foldhash: //! //! - You are afraid of people studying your long-running program's behavior //! to reverse engineer its internal random state and using this knowledge to //! create many colliding inputs for computational complexity attacks. //! //! - You expect foldhash to have a consistent output across versions or //! platforms, such as for persistent file formats or communication protocols. //! //! - You are relying on foldhash's properties for any kind of security. //! Foldhash is **not appropriate for any cryptographic purpose**. //! //! Foldhash has two variants, one optimized for speed which is ideal for data //! structures such as hash maps and bloom filters, and one optimized for //! statistical quality which is ideal for algorithms such as //! [HyperLogLog](https://en.wikipedia.org/wiki/HyperLogLog) and //! [MinHash](https://en.wikipedia.org/wiki/MinHash). //! //! Foldhash can be used in a `#![no_std]` environment by disabling its default //! `"std"` feature. //! //! # Usage //! //! The easiest way to use this crate with the standard library [`HashMap`] or //! [`HashSet`] is to import them from `foldhash` instead, along with the //! extension traits to make [`HashMap::new`] and [`HashMap::with_capacity`] //! work out-of-the-box: //! //! ```rust //! use foldhash::{HashMap, HashMapExt}; //! //! let mut hm = HashMap::new(); //! hm.insert(42, "hello"); //! ``` //! //! You can also avoid the convenience types and do it manually by initializing //! a [`RandomState`](fast::RandomState), for example if you are using a different hash map //! implementation like [`hashbrown`](https://docs.rs/hashbrown/): //! //! ```rust //! use hashbrown::HashMap; //! use foldhash::fast::RandomState; //! //! let mut hm = HashMap::with_hasher(RandomState::default()); //! hm.insert("foo", "bar"); //! ``` //! //! The above methods are the recommended way to use foldhash, which will //! automatically generate a randomly generated hasher instance for you. If you //! absolutely must have determinism you can use [`FixedState`](fast::FixedState) //! instead, but note that this makes you trivially vulnerable to HashDoS //! attacks and might lead to quadratic runtime when moving data from one //! hashmap/set into another: //! //! ```rust //! use std::collections::HashSet; //! use foldhash::fast::FixedState; //! //! let mut hm = HashSet::with_hasher(FixedState::with_seed(42)); //! hm.insert([1, 10, 100]); //! ``` //! //! If you rely on statistical properties of the hash for the correctness of //! your algorithm, such as in [HyperLogLog](https://en.wikipedia.org/wiki/HyperLogLog), //! it is suggested to use the [`RandomState`](quality::RandomState) //! or [`FixedState`](quality::FixedState) from the [`quality`] module instead //! of the [`fast`] module. The latter is optimized purely for speed in hash //! tables and has known statistical imperfections. //! //! Finally, you can also directly use the [`RandomState`](quality::RandomState) //! or [`FixedState`](quality::FixedState) to manually hash items using the //! [`BuildHasher`](std::hash::BuildHasher) trait: //! ```rust //! use std::hash::BuildHasher; //! use foldhash::quality::RandomState; //! //! let random_state = RandomState::default(); //! let hash = random_state.hash_one("hello world"); //! ``` #![cfg_attr(all(not(test), not(feature = "std")), no_std)] #![warn(missing_docs)] use core::hash::Hasher; #[cfg(feature = "std")] mod convenience; mod seed; #[cfg(feature = "std")] pub use convenience::*; // Arbitrary constants with high entropy. Hexadecimal digits of pi were used. const ARBITRARY0: u64 = 0x243f6a8885a308d3; const ARBITRARY1: u64 = 0x13198a2e03707344; const ARBITRARY2: u64 = 0xa4093822299f31d0; const ARBITRARY3: u64 = 0x082efa98ec4e6c89; const ARBITRARY4: u64 = 0x452821e638d01377; const ARBITRARY5: u64 = 0xbe5466cf34e90c6c; const ARBITRARY6: u64 = 0xc0ac29b7c97c50dd; const ARBITRARY7: u64 = 0x3f84d5b5b5470917; const ARBITRARY8: u64 = 0x9216d5d98979fb1b; const ARBITRARY9: u64 = 0xd1310ba698dfb5ac; #[inline(always)] const fn folded_multiply(x: u64, y: u64) -> u64 { #[cfg(target_pointer_width = "64")] { // We compute the full u64 x u64 -> u128 product, this is a single mul // instruction on x86-64, one mul plus one mulhi on ARM64. let full = (x as u128) * (y as u128); let lo = full as u64; let hi = (full >> 64) as u64; // The middle bits of the full product fluctuate the most with small // changes in the input. This is the top bits of lo and the bottom bits // of hi. We can thus make the entire output fluctuate with small // changes to the input by XOR'ing these two halves. lo ^ hi } #[cfg(target_pointer_width = "32")] { // u64 x u64 -> u128 product is prohibitively expensive on 32-bit. // Decompose into 32-bit parts. let lx = x as u32; let ly = y as u32; let hx = (x >> 32) as u32; let hy = (y >> 32) as u32; // u32 x u32 -> u64 the low bits of one with the high bits of the other. let afull = (lx as u64) * (hy as u64); let bfull = (hx as u64) * (ly as u64); // Combine, swapping low/high of one of them so the upper bits of the // product of one combine with the lower bits of the other. afull ^ bfull.rotate_right(32) } } /// The foldhash implementation optimized for speed. pub mod fast { use super::*; pub use seed::fast::{FixedState, RandomState}; /// A [`Hasher`] instance implementing foldhash, optimized for speed. /// /// It can't be created directly, see [`RandomState`] or [`FixedState`]. #[derive(Clone)] pub struct FoldHasher { accumulator: u64, sponge: u128, sponge_len: u8, fold_seed: u64, expand_seed: u64, expand_seed2: u64, expand_seed3: u64, } impl FoldHasher { #[inline] pub(crate) fn with_seed(per_hasher_seed: u64, global_seed: &[u64; 4]) -> FoldHasher { FoldHasher { accumulator: per_hasher_seed, sponge: 0, sponge_len: 0, fold_seed: global_seed[0], expand_seed: global_seed[1], expand_seed2: global_seed[2], expand_seed3: global_seed[3], } } #[inline(always)] fn write_num>(&mut self, x: T) { let bits: usize = 8 * core::mem::size_of::(); if self.sponge_len as usize + bits > 128 { let lo = self.sponge as u64; let hi = (self.sponge >> 64) as u64; self.accumulator = folded_multiply(lo ^ self.accumulator, hi ^ self.fold_seed); self.sponge = x.into(); self.sponge_len = bits as u8; } else { self.sponge |= x.into() << self.sponge_len; self.sponge_len += bits as u8; } } } impl Hasher for FoldHasher { #[inline(always)] fn write(&mut self, bytes: &[u8]) { let mut s0 = self.accumulator; let mut s1 = self.expand_seed; let len = bytes.len(); if len <= 16 { // XOR the input into s0, s1, then multiply and fold. if len >= 8 { s0 ^= u64::from_ne_bytes(bytes[0..8].try_into().unwrap()); s1 ^= u64::from_ne_bytes(bytes[len - 8..].try_into().unwrap()); } else if len >= 4 { s0 ^= u32::from_ne_bytes(bytes[0..4].try_into().unwrap()) as u64; s1 ^= u32::from_ne_bytes(bytes[len - 4..].try_into().unwrap()) as u64; } else if len > 0 { let lo = bytes[0]; let mid = bytes[len / 2]; let hi = bytes[len - 1]; s0 ^= lo as u64; s1 ^= ((hi as u64) << 8) | mid as u64; } self.accumulator = folded_multiply(s0, s1); } else if len < 256 { self.accumulator = hash_bytes_medium(bytes, s0, s1, self.fold_seed); } else { self.accumulator = hash_bytes_long( bytes, s0, s1, self.expand_seed2, self.expand_seed3, self.fold_seed, ); } } #[inline(always)] fn write_u8(&mut self, i: u8) { self.write_num(i); } #[inline(always)] fn write_u16(&mut self, i: u16) { self.write_num(i); } #[inline(always)] fn write_u32(&mut self, i: u32) { self.write_num(i); } #[inline(always)] fn write_u64(&mut self, i: u64) { self.write_num(i); } #[inline(always)] fn write_u128(&mut self, i: u128) { let lo = i as u64; let hi = (i >> 64) as u64; self.accumulator = folded_multiply(lo ^ self.accumulator, hi ^ self.fold_seed); } #[inline(always)] fn write_usize(&mut self, i: usize) { // u128 doesn't implement From. #[cfg(target_pointer_width = "32")] self.write_num(i as u32); #[cfg(target_pointer_width = "64")] self.write_num(i as u64); } #[inline(always)] fn finish(&self) -> u64 { if self.sponge_len > 0 { let lo = self.sponge as u64; let hi = (self.sponge >> 64) as u64; folded_multiply(lo ^ self.accumulator, hi ^ self.fold_seed) } else { self.accumulator } } } } /// The foldhash implementation optimized for quality. pub mod quality { use super::*; pub use seed::quality::{FixedState, RandomState}; /// A [`Hasher`] instance implementing foldhash, optimized for quality. /// /// It can't be created directly, see [`RandomState`] or [`FixedState`]. #[derive(Clone)] pub struct FoldHasher { pub(crate) inner: fast::FoldHasher, } impl Hasher for FoldHasher { #[inline(always)] fn write(&mut self, bytes: &[u8]) { self.inner.write(bytes); } #[inline(always)] fn write_u8(&mut self, i: u8) { self.inner.write_u8(i); } #[inline(always)] fn write_u16(&mut self, i: u16) { self.inner.write_u16(i); } #[inline(always)] fn write_u32(&mut self, i: u32) { self.inner.write_u32(i); } #[inline(always)] fn write_u64(&mut self, i: u64) { self.inner.write_u64(i); } #[inline(always)] fn write_u128(&mut self, i: u128) { self.inner.write_u128(i); } #[inline(always)] fn write_usize(&mut self, i: usize) { self.inner.write_usize(i); } #[inline(always)] fn finish(&self) -> u64 { folded_multiply(self.inner.finish(), ARBITRARY0) } } } /// Hashes strings >= 16 bytes, has unspecified behavior when bytes.len() < 16. fn hash_bytes_medium(bytes: &[u8], mut s0: u64, mut s1: u64, fold_seed: u64) -> u64 { // Process 32 bytes per iteration, 16 bytes from the start, 16 bytes from // the end. On the last iteration these two chunks can overlap, but that is // perfectly fine. let left_to_right = bytes.chunks_exact(16); let mut right_to_left = bytes.rchunks_exact(16); for lo in left_to_right { let hi = right_to_left.next().unwrap(); let unconsumed_start = lo.as_ptr(); let unconsumed_end = hi.as_ptr_range().end; if unconsumed_start >= unconsumed_end { break; } let a = u64::from_ne_bytes(lo[0..8].try_into().unwrap()); let b = u64::from_ne_bytes(lo[8..16].try_into().unwrap()); let c = u64::from_ne_bytes(hi[0..8].try_into().unwrap()); let d = u64::from_ne_bytes(hi[8..16].try_into().unwrap()); s0 = folded_multiply(a ^ s0, c ^ fold_seed); s1 = folded_multiply(b ^ s1, d ^ fold_seed); } s0 ^ s1 } /// Hashes strings >= 16 bytes, has unspecified behavior when bytes.len() < 16. #[cold] #[inline(never)] fn hash_bytes_long( bytes: &[u8], mut s0: u64, mut s1: u64, mut s2: u64, mut s3: u64, fold_seed: u64, ) -> u64 { let chunks = bytes.chunks_exact(64); let remainder = chunks.remainder().len(); for chunk in chunks { let a = u64::from_ne_bytes(chunk[0..8].try_into().unwrap()); let b = u64::from_ne_bytes(chunk[8..16].try_into().unwrap()); let c = u64::from_ne_bytes(chunk[16..24].try_into().unwrap()); let d = u64::from_ne_bytes(chunk[24..32].try_into().unwrap()); let e = u64::from_ne_bytes(chunk[32..40].try_into().unwrap()); let f = u64::from_ne_bytes(chunk[40..48].try_into().unwrap()); let g = u64::from_ne_bytes(chunk[48..56].try_into().unwrap()); let h = u64::from_ne_bytes(chunk[56..64].try_into().unwrap()); s0 = folded_multiply(a ^ s0, e ^ fold_seed); s1 = folded_multiply(b ^ s1, f ^ fold_seed); s2 = folded_multiply(c ^ s2, g ^ fold_seed); s3 = folded_multiply(d ^ s3, h ^ fold_seed); } s0 ^= s2; s1 ^= s3; if remainder > 0 { hash_bytes_medium(&bytes[bytes.len() - remainder.max(16)..], s0, s1, fold_seed) } else { s0 ^ s1 } } foldhash-0.1.4/src/seed.rs000064400000000000000000000275211046102023000135020ustar 00000000000000use core::hash::BuildHasher; // These constants may end up unused depending on platform support. #[allow(unused)] use crate::{ARBITRARY1, ARBITRARY9}; use super::{ folded_multiply, ARBITRARY2, ARBITRARY3, ARBITRARY4, ARBITRARY5, ARBITRARY6, ARBITRARY7, ARBITRARY8, }; /// Used for FixedState, and RandomState if atomics for dynamic init are unavailable. const FIXED_GLOBAL_SEED: [u64; 4] = [ARBITRARY4, ARBITRARY5, ARBITRARY6, ARBITRARY7]; pub mod fast { use super::*; use crate::fast::FoldHasher; /// A [`BuildHasher`] for [`fast::FoldHasher`]s that are randomly initialized. #[derive(Copy, Clone, Debug)] pub struct RandomState { per_hasher_seed: u64, global_seed: global::GlobalSeed, } impl Default for RandomState { fn default() -> Self { // We initialize the per-hasher seed with the stack pointer to ensure // different threads have different seeds, with as side benefit that // stack address randomization gives us further non-determinism. let mut per_hasher_seed = 0; let stack_ptr = core::ptr::addr_of!(per_hasher_seed) as u64; per_hasher_seed = stack_ptr; // If we have the standard library available we use a thread-local // state to ensure RandomStates are different with high probability, // even if the call stack is the same. #[cfg(feature = "std")] { use std::cell::Cell; thread_local! { static PER_HASHER_NONDETERMINISM: Cell = const { Cell::new(0) }; } let nondeterminism = PER_HASHER_NONDETERMINISM.get(); per_hasher_seed = folded_multiply(per_hasher_seed, ARBITRARY1 ^ nondeterminism); PER_HASHER_NONDETERMINISM.set(per_hasher_seed); }; // If we don't have the standard library we instead use a global // atomic instead of a thread-local state. // // PER_HASHER_NONDETERMINISM is loaded and updated in a racy manner, // but this doesn't matter in practice - it is impossible that two // different threads have the same stack location, so they'll almost // surely generate different seeds, and provide a different possible // update for PER_HASHER_NONDETERMINISM. If we would use a proper // fetch_add atomic update then there is a larger chance of // problematic contention. // // We use usize instead of 64-bit atomics for best platform support. #[cfg(not(feature = "std"))] { use core::sync::atomic::{AtomicUsize, Ordering}; static PER_HASHER_NONDETERMINISM: AtomicUsize = AtomicUsize::new(0); let nondeterminism = PER_HASHER_NONDETERMINISM.load(Ordering::Relaxed) as u64; per_hasher_seed = folded_multiply(per_hasher_seed, ARBITRARY1 ^ nondeterminism); PER_HASHER_NONDETERMINISM.store(per_hasher_seed as usize, Ordering::Relaxed); } // One extra mixing step to ensure good random bits. per_hasher_seed = folded_multiply(per_hasher_seed, ARBITRARY2); Self { per_hasher_seed, global_seed: global::GlobalSeed::new(), } } } impl BuildHasher for RandomState { type Hasher = FoldHasher; #[inline(always)] fn build_hasher(&self) -> FoldHasher { FoldHasher::with_seed(self.per_hasher_seed, self.global_seed.get()) } } /// A [`BuildHasher`] for [`fast::FoldHasher`]s that all have the same fixed seed. /// /// Not recommended unless you absolutely need determinism. #[derive(Copy, Clone, Debug)] pub struct FixedState { per_hasher_seed: u64, } impl FixedState { /// Creates a [`FixedState`] with the given seed. #[inline(always)] pub const fn with_seed(seed: u64) -> Self { // XOR with ARBITRARY3 such that with_seed(0) matches default. Self { per_hasher_seed: seed ^ ARBITRARY3, } } } impl Default for FixedState { #[inline(always)] fn default() -> Self { Self { per_hasher_seed: ARBITRARY3, } } } impl BuildHasher for FixedState { type Hasher = FoldHasher; #[inline(always)] fn build_hasher(&self) -> FoldHasher { FoldHasher::with_seed(self.per_hasher_seed, &FIXED_GLOBAL_SEED) } } } pub mod quality { use super::*; use crate::quality::FoldHasher; /// A [`BuildHasher`] for [`quality::FoldHasher`]s that are randomly initialized. #[derive(Copy, Clone, Default, Debug)] pub struct RandomState { inner: fast::RandomState, } impl BuildHasher for RandomState { type Hasher = FoldHasher; #[inline(always)] fn build_hasher(&self) -> FoldHasher { FoldHasher { inner: self.inner.build_hasher(), } } } /// A [`BuildHasher`] for [`quality::FoldHasher`]s that all have the same fixed seed. /// /// Not recommended unless you absolutely need determinism. #[derive(Copy, Clone, Default, Debug)] pub struct FixedState { inner: fast::FixedState, } impl FixedState { /// Creates a [`FixedState`] with the given seed. #[inline(always)] pub const fn with_seed(seed: u64) -> Self { Self { // We do an additional folded multiply with the seed here for // the quality hash to ensure better independence between seed // and hash. If the seed is zero the folded multiply is zero, // preserving with_seed(0) == default(). inner: fast::FixedState::with_seed(folded_multiply(seed, ARBITRARY8)), } } } impl BuildHasher for FixedState { type Hasher = FoldHasher; #[inline(always)] fn build_hasher(&self) -> FoldHasher { FoldHasher { inner: self.inner.build_hasher(), } } } } #[cfg(target_has_atomic = "8")] mod global { use super::*; use core::cell::UnsafeCell; use core::sync::atomic::{AtomicU8, Ordering}; fn generate_global_seed() -> [u64; 4] { let mix = |seed: u64, x: u64| folded_multiply(seed ^ x, ARBITRARY9); // Use address space layout randomization as our main randomness source. // This isn't great, but we don't advertise HashDoS resistance in the first // place. This is a whole lot better than nothing, at near zero cost with // no dependencies. let mut seed = 0; let stack_ptr = &seed as *const _; let func_ptr = generate_global_seed; let static_ptr = &GLOBAL_SEED_STORAGE as *const _; seed = mix(seed, stack_ptr as usize as u64); seed = mix(seed, func_ptr as usize as u64); seed = mix(seed, static_ptr as usize as u64); // If we have the standard library available, augment entropy with the // current time and an address from the allocator. #[cfg(feature = "std")] { #[cfg(not(any( miri, all(target_family = "wasm", target_os = "unknown"), target_os = "zkvm" )))] if let Ok(duration) = std::time::UNIX_EPOCH.elapsed() { seed = mix(seed, duration.subsec_nanos() as u64); seed = mix(seed, duration.as_secs()); } let box_ptr = &*Box::new(0u8) as *const _; seed = mix(seed, box_ptr as usize as u64); } let seed_a = mix(seed, 0); let seed_b = mix(mix(mix(seed_a, 0), 0), 0); let seed_c = mix(mix(mix(seed_b, 0), 0), 0); let seed_d = mix(mix(mix(seed_c, 0), 0), 0); // Zeroes form a weak-point for the multiply-mix, and zeroes tend to be // a common input. So we want our global seeds that are XOR'ed with the // input to always be non-zero. To also ensure there is always a good spread // of bits, we give up 3 bits of entropy and simply force some bits on. const FORCED_ONES: u64 = (1 << 63) | (1 << 31) | 1; [ seed_a | FORCED_ONES, seed_b | FORCED_ONES, seed_c | FORCED_ONES, seed_d | FORCED_ONES, ] } // Now all the below code purely exists to cache the above seed as // efficiently as possible. Even if we weren't a no_std crate and had access to // OnceLock, we don't want to check whether the global is set each time we // hash an object, so we hand-roll a global storage where type safety allows us // to assume the storage is initialized after construction. struct GlobalSeedStorage { state: AtomicU8, seed: UnsafeCell<[u64; 4]>, } const UNINIT: u8 = 0; const LOCKED: u8 = 1; const INIT: u8 = 2; // SAFETY: we only mutate the UnsafeCells when state is in the thread-exclusive // LOCKED state, and only read the UnsafeCells when state is in the // once-achieved-eternally-preserved state INIT. unsafe impl Sync for GlobalSeedStorage {} static GLOBAL_SEED_STORAGE: GlobalSeedStorage = GlobalSeedStorage { state: AtomicU8::new(UNINIT), seed: UnsafeCell::new([0; 4]), }; /// An object representing an initialized global seed. /// /// Does not actually store the seed inside itself, it is a zero-sized type. /// This prevents inflating the RandomState size and in turn HashMap's size. #[derive(Copy, Clone, Debug)] pub struct GlobalSeed { // So we can't accidentally type GlobalSeed { } within this crate. _no_accidental_unsafe_init: (), } impl GlobalSeed { #[inline(always)] pub fn new() -> Self { if GLOBAL_SEED_STORAGE.state.load(Ordering::Acquire) != INIT { Self::init_slow() } Self { _no_accidental_unsafe_init: (), } } #[cold] #[inline(never)] fn init_slow() { // Generate seed outside of critical section. let seed = generate_global_seed(); loop { match GLOBAL_SEED_STORAGE.state.compare_exchange_weak( UNINIT, LOCKED, Ordering::Relaxed, Ordering::Acquire, ) { Ok(_) => unsafe { // SAFETY: we just acquired an exclusive lock. *GLOBAL_SEED_STORAGE.seed.get() = seed; GLOBAL_SEED_STORAGE.state.store(INIT, Ordering::Release); return; }, Err(INIT) => return, // Yes, it's a spin loop. We need to support no_std (so no easy // access to proper locks), this is a one-time-per-program // initialization, and the critical section is only a few // store instructions, so it'll be fine. _ => core::hint::spin_loop(), } } } #[inline(always)] pub fn get(self) -> &'static [u64; 4] { // SAFETY: our constructor ensured we are in the INIT state and thus // this raw read does not race with any write. unsafe { &*GLOBAL_SEED_STORAGE.seed.get() } } } } #[cfg(not(target_has_atomic = "8"))] mod global { #[derive(Copy, Clone, Debug)] pub struct GlobalSeed {} impl GlobalSeed { #[inline(always)] pub fn new() -> Self { Self {} } #[inline(always)] pub fn get(self) -> &'static [u64; 4] { &super::FIXED_GLOBAL_SEED } } }