seahash-4.1.0/.cargo_vcs_info.json0000644000000001121400047621100124750ustar { "git": { "sha1": "94b632aeac099031c373599313d5b5f0acbbaec0" } } seahash-4.1.0/.gitignore010064400017500001750000000000241375364332200133050ustar 00000000000000/target/ Cargo.lock seahash-4.1.0/.gitlab-ci.yml010064400017500001750000000017001375364332200137530ustar 00000000000000# This file is a template, and might need editing before it works on your project. # Official language image. Look for the different tagged releases at: # https://hub.docker.com/r/library/rust/tags/ image: "rust:latest" # Optional: Pick zero or more services to be used on all builds. # Only needed when using a docker container to run your tests in. # Check out: http://docs.gitlab.com/ce/ci/docker/using_docker_images.html#what-is-a-service # services: # - mysql:latest # - redis:latest # - postgres:latest # Optional: Install a C compiler, cmake and git into the container. # You will often need this when you (or any of your dependencies) depends on C code. # before_script: # - apt-get update -yqq # - apt-get install -yqq --no-install-recommends build-essential # Use cargo to test the project test:cargo: script: - rustc --version && cargo --version # Print version info for debugging - cargo test --all --verbose --all-features seahash-4.1.0/Cargo.toml0000644000000021711400047621100105020ustar # THIS FILE IS AUTOMATICALLY GENERATED BY CARGO # # When uploading crates to the registry Cargo will automatically # "normalize" Cargo.toml files for maximal compatibility # with all versions of Cargo and also rewrite `path` dependencies # to registry (e.g., crates.io) dependencies # # If you believe there's an error in this file please file an # issue against the rust-lang/cargo repository. If you're # editing this file be aware that the upstream Cargo.toml # will likely look very different (and much more reasonable) [package] name = "seahash" version = "4.1.0" authors = ["ticki ", "Tom Almeida "] exclude = ["target", "Cargo.lock"] description = "A blazingly fast, portable hash function with proven statistical guarantees." documentation = "https://docs.rs/seahash" keywords = ["hash", "hashing", "checksum", "checksumming", "portable"] license = "MIT" repository = "https://gitlab.redox-os.org/redox-os/seahash" [[bench]] name = "bench" harness = false [dev-dependencies.criterion] version = "0.3" [dev-dependencies.quickcheck] version = "0.9.2" [features] default = [] use_std = [] seahash-4.1.0/Cargo.toml.orig010064400017500001750000000011111400047477700142040ustar 00000000000000[package] name = "seahash" version = "4.1.0" authors = ["ticki ", "Tom Almeida "] description = "A blazingly fast, portable hash function with proven statistical guarantees." repository = "https://gitlab.redox-os.org/redox-os/seahash" documentation = "https://docs.rs/seahash" license = "MIT" keywords = ["hash", "hashing", "checksum", "checksumming", "portable"] exclude = ["target", "Cargo.lock"] [dev-dependencies] quickcheck = "0.9.2" criterion = "0.3" [features] default = [] use_std = [] [[bench]] name = "bench" harness = false seahash-4.1.0/README.md010064400017500001750000000007441375364332200126050ustar 00000000000000
Logo
=================== SeaHash: A bizarrely fast hash function. SeaHash is a hash function with performance better than (around 3-20% improvement) xxHash and MetroHash. Furthermore, SeaHash has mathematically provable statistical guarantees. In action: [![The hash function in action.](http://ticki.github.io/img/seahash_construction_diagram.svg)](http://ticki.github.io/img/seahash_construction_diagram.svg) seahash-4.1.0/benches/bench.rs010064400017500001750000000051201400047463300143440ustar 00000000000000extern crate core; extern crate criterion; extern crate seahash; use core::hash::Hasher; use criterion::{black_box, criterion_group, criterion_main, BenchmarkId, Criterion, Throughput}; fn describe_benches(c: &mut Criterion) { // shared buffers for all tests let buf = vec![15; 16 * 1024]; // shared/n and buffer/n are executed for these sizes let sizes = [64, 1024, 4096, 16 * 1024]; let mut group = c.benchmark_group("buffer"); for size in &sizes { group.throughput(Throughput::Bytes(*size as u64)); group.bench_with_input(BenchmarkId::from_parameter(size), size, |b, &size| { b.iter(|| { black_box(seahash::hash(&buf[..size])); }) }); } group.finish(); let mut group = c.benchmark_group("stream"); for size in &sizes { group.throughput(Throughput::Bytes(*size as u64)); group.bench_with_input(BenchmarkId::from_parameter(size), size, |b, &size| { b.iter_with_setup( || seahash::SeaHasher::default(), |mut h: seahash::SeaHasher| { // use chunks of 32 bytes to simulate some looping on a single hasher value for _ in 0..size / 32 { h.write(&buf[..32]); } // this will mostly be an empty slice, but that is a possible Hasher api usage h.write(&buf[..(size % 32)]); black_box(h.finish()) }, ) }); } group.finish(); // gigabyte group times are comparable with earlier benchmark values based on // d52d115a223a0e81d1600bd8a5e73cb4b24a38c0 let mut group = c.benchmark_group("gigabyte"); group.throughput(Throughput::Bytes((1024 * 1024 * 1024) as u64)); group.bench_function(BenchmarkId::from_parameter("buffer"), |b| { b.iter(|| { let mut buf = [15; 4096]; let mut total = 0; for _ in 0..250_000 { total ^= seahash::hash(&buf); buf[0] = buf[0].wrapping_add(1); } black_box(total) }) }); group.bench_function(BenchmarkId::from_parameter("stream"), |b| { b.iter(|| { let mut buf = [15; 4096]; let mut h = seahash::SeaHasher::default(); for _ in 0..250_000 { h.write(&buf); buf[0] = buf[0].wrapping_add(1); } black_box(h.finish()) }) }); group.finish(); } criterion_group!(benches, describe_benches); criterion_main!(benches); seahash-4.1.0/logo.png010064400017500001750000000400731375364332200127730ustar 00000000000000PNG  IHDR\YbKGD pHYs   IDATxwU3d`κ"׼Ӫ5ft kkZ]֜0gE1b #4qYf]Uν 8NjPUпQ XS}qk m>5|f#:`u㔮w, AY̡@b}V &Loyq18N ?X3[g[T}bqZ-/j?)q18N" .=>< s,~^.rbq@/l`+GF.}pOq\ 8Ò5%:ݟp' +SՑŀ8}V"jvTyŀ 3;;,P%߆ r 0 SSUu[bŀTX8K TL,//h`^xO ,%'16 plu讘@ϔN;;C5NH\V1jdőXn}Nmm{T}ƛq2љ;a[]R87Aܩcʽ Uuy툇eK|Qm|)"T=_punryn>V9aS`V@ "Vx=ឮɠn’qvYtbqJ#"@N Uunk+,#28AUpt2d罀mEP}Uu)NpV/ת "(!WUuko9'# p0X|.t:p Vo_`0 x0I7ka7ZS;%nG;%:t.sܧz >0V; 2-蔨X^±@W;s4nS_usZ8USKI 28y//Mn+X-\0_N%[qyxŀb{m8,JPr8lU}ߛ)+cO.WUu189}\U-[Pތ(`QS lk  Tƾng " ^Bv!PxVOF.x9E[ZՀ~bSBŀQkEd]ow OU<󼅋›)8DYIXq6Q%6t#E@0x.عtR)lNǗj`7R$ \e_p:j_̱<4,LhmU˅@ 0_#| 80ǿ8B`>f!nĎLtӱqVX&9޺PૈMh9"%bzpvU2h@,{5U)2Hx:~[2Ddr &z9)5bzj20KTyg{$Z`5~~}[$y 8$9M,Qmo2'űfG fVœ(`K&m֋v9.穾r?hs)Us ;)!<v!9FQR\Nc [$QOW8NC)"~ǩN:[6>ݍcW4}8FUGqY"v@oO쐭DPUG )keKHNį agY87:M`/U>kPǜ-"J9s"AMxVM?Ump|s,X\t29jEd/IyOj Yň(bgb>Fmc:8]UolkϤπN44 X_DT ne骱=dMiUsʎ X\ts1PB0Dm.nlϲ;sv\ˊC.ox:M0hN""yRQBFzAL18rzm{rOQ͵l:] O!08ƅ UmD ꊧ%4ғ{UٳVVesfuQPDq{ʹ "Gq]Da"^@;oܓ} p)sx9V eՈH_,i9X`pb鄟=ljXlY7] TT%"Jpeq:T'b`Ѷ ۻfMD~9#1:ܛIb7WY 6iX@_ \T*+e!Tu"w\>%݌3V 8UCހ"^.zljzvU^jUU el\mi?Q`U[5wwgTuA:T$5c}oQ,YNT45?|g7Yfƭ0?<՜&Ue, , S܍%{G{lxU}˅|C40e'B'*߀E d6YYU+Dh;YλAj|`Q՛\8%—@[\h͕qkYA/2upe me! ѓD,h)"G) c-r uJWt b: F Txx;w[Gj5bw E,^?:8TUSoe0-"қ$ǯbZHʂr|Ȫ:_DVpVs]$gjV C }LUv20ԋwrU0[ƱM%8NHUrj2~/cQh #"G{7iTgˣ1?)O3O{+;+3:1UȶXtQnBŀNtxH<]:PG,Uw}.V\jpȺ'Sq|q,V iq[ lXW^I=+EwY[D. 3+UR8V0ʘ7WI6ˀ;9ܟ |@PC?`KU^XBK0?498B\&?Sj{Gc5!28LUUlU%~ >jRX\j6XտRz'l>`q* X!/.Rܩ?V ǻ'JnǚJiU˱*y<ppomleZ+t)FVGnUCSɨ<y'9^-8s ;nV_+-k*OKMpe<`p9]L!ªF0 Ķ FYzEU!q~@DekkHm,쉅.X2YT{,!@ucÀ[Q:&3)*VVB%;E$] Q9t<~ﭖwoU\bN7oM׍X4j>{0V"ƫΘ3c;b64c/Q |^uhir c)%ۦ=4,LZoMMXun)vvWcw?ot2MB[:]k Mpwp/1\RGbsQ(9 W׼5] D1=_n>XyoTu[7ZMwǩloK-|2ɶE_ b ᭁY9Uҟ5n<e5BUVԮ1g$9CX /=!Zx/x +nT )~OqRE`#Wuv곱D@D[qYas~-+`-H٪o>^E&mTrƻm?_qA\e}e Ŝ-%~#ױc:NAx&XbWNJ')68FUBj{̋`gle ]Ld8E#Vr1 Wnh;v.VLx灓| @1 ?pVp<7 h,c]`'rf*\bmi"<&Bc5c@q ׊a`K_DF_c L~& qcX5(BGVt' U,y4bm"kc5a BK0i!cT|,8l {)q۪,!VR؛Ta] -T j@~a,>H7^U' [+aYcA(?EUx:NƁ!D@ RAʹ0H? ʀ8NI|k=yXޏ=*]R IxYXnm.qJWmyzouU鯁ۀ<\ŀ8N^-G##0PUǗQcQbW28GUvJ*oqQչSpίRe ;`S lBŀ8N+XE\$yp}Ȯ"8^%x;VU'E&p),Y<[ ``+rlj0G^m}q <G xl_,rp`Ҝ T K塘P{,Xž_m pp'[s18S["aہKUul3_o'`Ww@8Sb IH q @DC)ެ8}1988Y"RU LB88NW n T{:8NvX\hIm1GB'%DMqr@l;uoBM1gΏʡ2bV@ P9agbe?߅LU9nQ{ꋌW]C }afQa .WU`K``Oaam4#:b@UUDZLa#o#Y#t .XMsD QUv e}p"|z|QD+,[s*d/ n~a%(-çXc1ߙ""Cϳ."݁CM4_&{]큓"p@jbRU\V|.X9 |+pT[O Lm;@ޗ3$V ?= ``mg/nbp|Psu.Di84]ژ"ɧE3N\MDn>$y\5"ҥ@DdsСv"Yt m6lYqm0Z;eݖƳ٦alsyJDɀM |emo11 ~>2HV "] CW0o@WiQ l ""6ڈ%+$w6ϗl0DD~_6**ω"ȉ92,%="/9 8xsBjDa"0R-%"Ŗu_tF}q}IwHa'ʮppYx"~&qQ{Edf5VؒWm3_NKC>jXS/)8GDdJ\)Հ>h \c%j1%' FbC`Jo3 cT 7USV5Yמb_"2fY"O+gJF$O\N+6X, = ˀ5cv+G# M%)qX.x`gDM-C,_2{('a!P?Sm0*H9ZxmI]݌\u}":/\{-LXX[2\t¢~T'>dZ .XbMTc+17 ব-NΘqMIMR/"c 5^/"/竆?U5suBnr^~EUEX#I/h0V-Ɩ7gTuzgp<XRKscRneF"ҡ9.$ & 8\U/ 3#Tux*,%1.f&ptxHyՋcqx(TGys0Ore"t Sr,I.~~x2ᱫ46)J 굘ͻ zV!:[pix/ >X̓4 tg>y_-2sB\-ײ$tY E㔓 &yu'/VB|$YHW 4U-3-uj̇H ^54 ੰKNx!q*C$ћ쎵ߣ~NIUP&鋁.n^9e̛_ 88@[T3"b0.jGS$qM#E8T76TչoU7}"Vhv >BG'ď"GS"g3=fWqdj "[y1t 5ƘU2uSձzn%p`h`V%b%DžWj@"aL0gHZc kU SVi"F9X.y"r^l+":RhXVO- 1ciOu|@ Kh4߭@#9(|:rXI/7FVyg2ԥ1]YT >s%s:50CDVmŶ-4]z^V . Y=|Edgy1 . kp%m|pnl9|%"HX tU=?W)r[`RE#Iʄ z叅gY)p-Wa΄%0AX*n.[y8Fr? ΐD Ƽ;nSRO+Kl /"C~@0q< l<@9Im$"= 8nMceyY֟%5J kaK+`{ai[V6rw10K Hf*"I"2Ģ'q`[K1+D2;6:HW9xs  %JFP䕄uҌ^ S\qplZ$[]m  |&"V"RuUȮa·~S,TѨ5;=\-|mxX"2>#|bŅڄRCB;'> <_~sʿ:|&KXXR u*X$}DB-_j'qRbݸcY #Çmp+W£3ZD"ތ9{b}t"*C `K͕Us08UTUE`Ol$_cY!+VDP<)bQ[q%u 0]D>X=yXY pZX;o T&n) EY81|0&0MMa5l:QĀ^#"RܘRa6p~WP1"r!ԘtUuZ6/"7FNaccƎ`b^򲜝R,Z`)_aNDB'y$d{jYP}"N鑠yvN36[s3C/KywZ+)Q%8Y!BR\TN782cP>|IUg *m06W{c/M(:^e*Uز)ay&dH\ b; &Y2&q,0VehVo |BF9w,-E&a0 m;i>p);VՅRcId6x ǜ o˘XB"\ ,buz1K1 3PUk̫ \Xer"V 9G>$T plmuQ  dUŭ_M6 `12j f{j~f'꽅ɝY$3nza2X/Xs %2z*"Jߧ`pSLm=sQ t&:3,ci~89&̬ 6J&bTgȥpw11NU+bI6J~+q_5DU2m=AN^Pe0yp oT3}(hA6FQضXr\UQ n$2e:+{!;- lЀ4\Uws_B^3t|U00lҢ6~TSeb\,,W[lԤd i%04^+v)X~wJm a[*Ͻ )+5%&zxEҸcDl`W,FX|}B;}Yq̫cRۄqjcO bۼo6{Wx)=B}m:iyÖꦅX1o-K*^4-VA-kh0oπof"*˵C; "n]H{cUo`M`0VRݷNJ 0OVvdž}8++YsMbIENDB`seahash-4.1.0/src/buffer.rs010064400017500001750000000322611400047463300137240ustar 00000000000000//! A highly optimized version of SeaHash. use std::slice; use helper; /// A SeaHash state. #[derive(Clone)] pub struct State { /// `a` a: u64, /// `b` b: u64, /// `c` c: u64, /// `d` d: u64, /// The number of written bytes. written: u64, } impl State { /// Create a new state vector with some initial values. pub fn new(a: u64, b: u64, c: u64, d: u64) -> State { State { a: a, b: b, c: c, d: d, written: 0, } } /// Hash a buffer with some seed. pub fn hash(buf: &[u8], (mut a, mut b, mut c, mut d): (u64, u64, u64, u64)) -> State { unsafe { // We use 4 different registers to store seperate hash states, because this allows us // to update them seperately, and consequently exploiting ILP to update the states in // parallel. // The pointer to the current bytes. let mut ptr = buf.as_ptr(); // The end of the "main segment", i.e. the biggest buffer s.t. the length is divisible // by 32. let end_ptr = buf.as_ptr().offset(buf.len() as isize & !0x1F); while end_ptr > ptr { // Modern CPUs allow the pointer arithmetic to be done in place, hence not // introducing tmpvars. a ^= helper::read_u64(ptr); b ^= helper::read_u64(ptr.offset(8)); c ^= helper::read_u64(ptr.offset(16)); d ^= helper::read_u64(ptr.offset(24)); // Increment the pointer. ptr = ptr.offset(32); // Diffuse the updated registers. We hope that each of these are executed in // parallel. a = helper::diffuse(a); b = helper::diffuse(b); c = helper::diffuse(c); d = helper::diffuse(d); } // Calculate the number of excessive bytes. These are bytes that could not be handled // in the loop above. let mut excessive = buf.len() as usize + buf.as_ptr() as usize - end_ptr as usize; // Handle the excessive bytes. match excessive { 0 => {} 1..=7 => { // 1 or more excessive. // Write the last excessive bytes (<8 bytes). a ^= helper::read_int(slice::from_raw_parts(ptr as *const u8, excessive)); // Diffuse. a = helper::diffuse(a); } 8 => { // 8 bytes excessive. // Mix in the partial block. a ^= helper::read_u64(ptr); // Diffuse. a = helper::diffuse(a); } 9..=15 => { // More than 8 bytes excessive. // Mix in the partial block. a ^= helper::read_u64(ptr); // Write the last excessive bytes (<8 bytes). excessive = excessive - 8; b ^= helper::read_int(slice::from_raw_parts(ptr.offset(8), excessive)); // Diffuse. a = helper::diffuse(a); b = helper::diffuse(b); } 16 => { // 16 bytes excessive. // Mix in the partial block. a = helper::diffuse(a ^ helper::read_u64(ptr)); b = helper::diffuse(b ^ helper::read_u64(ptr.offset(8))); } 17..=23 => { // 16 bytes or more excessive. // Mix in the partial block. a ^= helper::read_u64(ptr); b ^= helper::read_u64(ptr.offset(8)); // Write the last excessive bytes (<8 bytes). excessive = excessive - 16; c ^= helper::read_int(slice::from_raw_parts(ptr.offset(16), excessive)); // Diffuse. a = helper::diffuse(a); b = helper::diffuse(b); c = helper::diffuse(c); } 24 => { // 24 bytes excessive. // Mix in the partial block. a ^= helper::read_u64(ptr); b ^= helper::read_u64(ptr.offset(8)); c ^= helper::read_u64(ptr.offset(16)); // Diffuse. a = helper::diffuse(a); b = helper::diffuse(b); c = helper::diffuse(c); } _ => { // More than 24 bytes excessive. // Mix in the partial block. a ^= helper::read_u64(ptr); b ^= helper::read_u64(ptr.offset(8)); c ^= helper::read_u64(ptr.offset(16)); // Write the last excessive bytes (<8 bytes). excessive = excessive - 24; d ^= helper::read_int(slice::from_raw_parts(ptr.offset(24), excessive)); // Diffuse. a = helper::diffuse(a); b = helper::diffuse(b); c = helper::diffuse(c); d = helper::diffuse(d); } } } State { a: a, b: b, c: c, d: d, written: buf.len() as u64, } } /// Write another 64-bit integer into the state. pub fn push(&mut self, x: u64) { // Mix `x` into `a`. let a = helper::diffuse(self.a ^ x); // Rotate around. // _______________________ // | v // a <---- b <---- c <---- d self.a = self.b; self.b = self.c; self.c = self.d; self.d = a; // Increase the written bytes counter. self.written += 8; } /// Remove the most recently written 64-bit integer from the state. /// /// Given the value of the most recently written u64 `last`, remove it from the state. pub fn pop(&mut self, last: u64) { // Un-mix `last` from `d`. Removes the recently written data. let d = helper::undiffuse(self.d) ^ last; // Rotate back. // _______________________ // v | // a ----> b ----> c ----> d self.d = self.c; self.c = self.b; self.b = self.a; self.a = d; // Decrese the written bytes counter. self.written -= 8; } /// Finalize the state. #[inline] pub fn finalize(self) -> u64 { let State { written, mut a, b, mut c, d, } = self; // XOR the states together. Even though XOR is commutative, it doesn't matter, because the // state vector's initial components are mutually distinct, and thus swapping even and odd // chunks will affect the result, because it is sensitive to the initial condition. a ^= b; c ^= d; a ^= c; // XOR the number of written bytes in order to make the excessive bytes zero-sensitive // (without this, two excessive zeros would be equivalent to three excessive zeros). This // is know as length padding. a ^= written; // We diffuse to make the excessive bytes discrete (i.e. small changes shouldn't give small // changes in the output). helper::diffuse(a) } } /// Hash some buffer. /// /// This is a highly optimized implementation of SeaHash. It implements numerous techniques to /// improve performance: /// /// - Register allocation: This makes a great deal out of making sure everything fits into /// registers such that minimal memory accesses are needed. This works quite successfully on most /// CPUs, and the only time it reads from memory is when it fetches the data of the buffer. /// - Bulk reads: Like most other good hash functions, we read 8 bytes a time. This obviously /// improves performance a lot /// - Independent updates: We make sure very few statements next to each other depends on the /// other. This means that almost always the CPU will be able to run the instructions in parallel. /// - Loop unrolling: The hot loop is unrolled such that very little branches (one every 32 bytes) /// are needed. /// /// and more. /// /// The seed of this hash function is prechosen. pub fn hash(buf: &[u8]) -> u64 { hash_seeded( buf, 0x16f11fe89b0d677c, 0xb480a793d8e6c86c, 0x6fe2e5aaf078ebc9, 0x14f994a4c5259381, ) } /// Hash some buffer according to a chosen seed. /// /// The keys are expected to be chosen from a uniform distribution. The keys should be mutually /// distinct to avoid issues with collisions if the lanes are permuted. /// /// This is not secure, as [the key can be extracted with a bit of computational /// work](https://github.com/ticki/tfs/issues/5), as such, it is recommended to have a fallback /// hash function (adaptive hashing) in the case of hash flooding. It can be considered unbroken if /// the output is not known (i.e. no malicious party has access to the raw values of the keys, only /// a permutation thereof).), however I absolutely do not recommend using it for this. If you want /// to be strict, this should only be used as a layer of obfuscation, such that the fallback (e.g. /// SipHash) is harder to trigger. /// /// In the future, I might strengthen the security if possible while having backward compatibility /// with the default initialization vector. pub fn hash_seeded(buf: &[u8], a: u64, b: u64, c: u64, d: u64) -> u64 { State::hash(buf, (a, b, c, d)).finalize() } #[cfg(test)] mod tests { use super::*; use reference; fn hash_match(a: &[u8]) { assert_eq!(hash(a), reference::hash(a)); assert_eq!( hash_seeded(a, 1, 1, 1, 1), reference::hash_seeded(a, 1, 1, 1, 1) ); assert_eq!( hash_seeded(a, 500, 2873, 2389, 9283), reference::hash_seeded(a, 500, 2873, 2389, 9283) ); assert_eq!( hash_seeded(a, 238945723984, 872894734, 239478243, 28937498234), reference::hash_seeded(a, 238945723984, 872894734, 239478243, 28937498234) ); assert_eq!( hash_seeded(a, !0, !0, !0, !0), reference::hash_seeded(a, !0, !0, !0, !0) ); assert_eq!( hash_seeded(a, 0, 0, 0, 0), reference::hash_seeded(a, 0, 0, 0, 0) ); } #[test] #[cfg_attr(miri, ignore)] // very slow to run on miri fn zero() { let arr = [0; 4096]; for n in 0..4096 { hash_match(&arr[0..n]); } } #[test] fn seq() { let mut buf = [0; 4096]; for i in 0..4096 { buf[i] = i as u8; } hash_match(&buf); } #[test] fn position_depedent() { let mut buf1 = [0; 4098]; for i in 0..4098 { buf1[i] = i as u8; } let mut buf2 = [0; 4098]; for i in 0..4098 { buf2[i] = i as u8 ^ 1; } assert!(hash(&buf1) != hash(&buf2)); } #[test] fn shakespear() { hash_match(b"to be or not to be"); hash_match(b"love is a wonderful terrible thing"); } #[test] fn zero_senitive() { assert_ne!(hash(&[1, 2, 3, 4]), hash(&[1, 0, 2, 3, 4])); assert_ne!(hash(&[1, 2, 3, 4]), hash(&[1, 0, 0, 2, 3, 4])); assert_ne!(hash(&[1, 2, 3, 4]), hash(&[1, 2, 3, 4, 0])); assert_ne!(hash(&[1, 2, 3, 4]), hash(&[0, 1, 2, 3, 4])); assert_ne!(hash(&[0, 0, 0]), hash(&[0, 0, 0, 0, 0])); } #[test] fn not_equal() { assert_ne!(hash(b"to be or not to be "), hash(b"to be or not to be")); assert_ne!(hash(b"jkjke"), hash(b"jkjk")); assert_ne!(hash(b"ijkjke"), hash(b"ijkjk")); assert_ne!(hash(b"iijkjke"), hash(b"iijkjk")); assert_ne!(hash(b"iiijkjke"), hash(b"iiijkjk")); assert_ne!(hash(b"iiiijkjke"), hash(b"iiiijkjk")); assert_ne!(hash(b"iiiiijkjke"), hash(b"iiiiijkjk")); assert_ne!(hash(b"iiiiiijkjke"), hash(b"iiiiiijkjk")); assert_ne!(hash(b"iiiiiiijkjke"), hash(b"iiiiiiijkjk")); assert_ne!(hash(b"iiiiiiiijkjke"), hash(b"iiiiiiiijkjk")); assert_ne!(hash(b"ab"), hash(b"bb")); } #[test] fn push() { let mut state = State::new(1, 2, 3, 4); state.push(!0); state.push(0); assert_eq!( hash_seeded( &[0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 0, 0, 0, 0, 0, 0, 0], 1, 2, 3, 4 ), state.finalize() ); } #[test] fn pop() { let mut state = State::new(1, 2, 3, 4); state.push(!0); state.push(0); state.pop(0); assert_eq!( hash_seeded( &[0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF], 1, 2, 3, 4 ), state.finalize() ); } } seahash-4.1.0/src/helper.rs010064400017500001750000000106151400047463300137310ustar 00000000000000//! Helper functions. /// Read a buffer smaller than 8 bytes into an integer in little-endian. /// /// This assumes that `buf.len() < 8`. If this is not satisfied, the behavior is unspecified. #[inline(always)] pub fn read_int(buf: &[u8]) -> u64 { // Because we want to make sure that it is register allocated, we fetch this into a variable. // It will likely make no difference anyway, though. let ptr = buf.as_ptr(); unsafe { // Break it down to reads of integers with widths in total spanning the buffer. This minimizes // the number of reads match buf.len() { // u8. 1 => *ptr as u64, // u16. 2 => (ptr as *const u16).read_unaligned().to_le() as u64, // u16 + u8. 3 => { let a = (ptr as *const u16).read_unaligned().to_le() as u64; let b = *ptr.offset(2) as u64; a | (b << 16) } // u32. 4 => (ptr as *const u32).read_unaligned().to_le() as u64, // u32 + u8. 5 => { let a = (ptr as *const u32).read_unaligned().to_le() as u64; let b = *ptr.offset(4) as u64; a | (b << 32) } // u32 + u16. 6 => { let a = (ptr as *const u32).read_unaligned().to_le() as u64; let b = (ptr.offset(4) as *const u16).read_unaligned().to_le() as u64; a | (b << 32) } // u32 + u16 + u8. 7 => { let a = (ptr as *const u32).read_unaligned().to_le() as u64; let b = (ptr.offset(4) as *const u16).read_unaligned().to_le() as u64; let c = *ptr.offset(6) as u64; a | (b << 32) | (c << 48) } _ => 0, } } } /// Read a little-endian 64-bit integer from some buffer. #[inline(always)] pub unsafe fn read_u64(ptr: *const u8) -> u64 { #[cfg(target_pointer_width = "32")] { // We cannot be sure about the memory layout of a potentially emulated 64-bit integer, so // we read it manually. If possible, the compiler should emit proper instructions. let a = (ptr as *const u32).read_unaligned().to_le(); let b = (ptr.offset(4) as *const u32).read_unaligned().to_le(); a as u64 | ((b as u64) << 32) } #[cfg(target_pointer_width = "64")] { (ptr as *const u64).read_unaligned().to_le() } } /// The diffusion function. /// /// This is a bijective function emitting chaotic behavior. Such functions are used as building /// blocks for hash functions. pub const fn diffuse(mut x: u64) -> u64 { // These are derived from the PCG RNG's round. Thanks to @Veedrac for proposing this. The basic // idea is that we use dynamic shifts, which are determined by the input itself. The shift is // chosen by the higher bits, which means that changing those flips the lower bits, which // scatters upwards because of the multiplication. x = x.wrapping_mul(0x6eed0e9da4d94a4f); let a = x >> 32; let b = x >> 60; x ^= a >> b; x = x.wrapping_mul(0x6eed0e9da4d94a4f); x } /// Reverse the `diffuse` function. pub const fn undiffuse(mut x: u64) -> u64 { // 0x2f72b4215a3d8caf is the modular multiplicative inverse of the constant used in `diffuse`. x = x.wrapping_mul(0x2f72b4215a3d8caf); let a = x >> 32; let b = x >> 60; x ^= a >> b; x = x.wrapping_mul(0x2f72b4215a3d8caf); x } #[cfg(test)] mod tests { use super::*; fn diffuse_test(x: u64, y: u64) { assert_eq!(diffuse(x), y); assert_eq!(x, undiffuse(y)); assert_eq!(undiffuse(diffuse(x)), x); } #[test] fn read_int_() { assert_eq!(read_int(&[2, 3]), 770); assert_eq!(read_int(&[3, 2]), 515); assert_eq!(read_int(&[3, 2, 5]), 328195); } #[test] fn read_u64_() { unsafe { assert_eq!(read_u64([1, 0, 0, 0, 0, 0, 0, 0].as_ptr()), 1); assert_eq!(read_u64([2, 1, 0, 0, 0, 0, 0, 0].as_ptr()), 258); } } #[test] fn diffuse_test_vectors() { diffuse_test(94203824938, 17289265692384716055); diffuse_test(0xDEADBEEF, 12110756357096144265); diffuse_test(0, 0); diffuse_test(1, 15197155197312260123); diffuse_test(2, 1571904453004118546); diffuse_test(3, 16467633989910088880); } } seahash-4.1.0/src/impl_std.rs010064400017500001750000000014761400047463300142720ustar 00000000000000use crate::SeaHasher; use std::hash::Hasher; use std::io; impl io::Write for SeaHasher { fn write(&mut self, buf: &[u8]) -> io::Result { Hasher::write(self, buf); Ok(buf.len()) } fn flush(&mut self) -> io::Result<()> { Ok(()) } } #[cfg(test)] mod tests { use super::*; #[test] fn hash_write_trait() { let reader: &[u8] = &[ 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, ]; let mut hasher = SeaHasher::new(); // io::copy consumes the mutable reader -> cloning the buffer let _ = io::copy(&mut reader.clone(), &mut hasher).unwrap(); let hash = hasher.finish(); let control = crate::hash(&reader); assert_eq!(control, hash); } } seahash-4.1.0/src/lib.rs010064400017500001750000000200421400047463300132130ustar 00000000000000//! SeaHash: A blazingly fast, portable hash function with proven statistical guarantees. //! //! SeaHash is a hash function with performance better than (around 3-20% improvement) xxHash and //! MetroHash. Furthermore, SeaHash has mathematically provable statistical guarantees. //! //! SeaHash is a portable hash function, meaning that the output is not dependent on the hosting //! architecture, and makes no assumptions on endianness or the alike. This stable layout allows it //! to be used for on-disk/permanent storage (e.g. checksums). //! //! # Design, advantages, and features //! //! - **High quality**: It beats most other general purpose hash functions because it provides full //! avalanche inbetween state updates. //! - **Performance**: SeaHash beats every high-quality (grading 10/10 in smhasher) hash function //! that I know of. //! - **Provable quality guarantees**: Contrary to most other non-cryptographic hash function, //! SeaHash can be proved to satisfy the avalanche criterion as well as BIC. //! - **Parallelizable**: Consists of multiple, independent states to take advantage of ILP and/or //! software threads. //! - **Bulk reads**: Reads 8 or 4 bytes a time. //! - **Stable and portable**: Does not depend on the target architecture, and produces a stable //! value, which is only changed in major version bumps. //! - **Keyed**: Designed to not leak the seed/key. Note that it has not gone through //! cryptoanalysis yet, so the keyed version shouldn't be relied on when security is needed. //! - **Hardware accelerateable**: SeaHash is designed such that ASICs can implement it with really //! high performance. //! //! # A word of warning! //! //! This is **not** a cryptographic function, and it certainly should not be used as one. If you //! want a good cryptographic hash function, you should use SHA-3 (Keccak) or BLAKE2. //! //! It is not secure, nor does it aim to be. It aims to have high quality pseudorandom output and //! few collisions, as well as being fast. //! //! # Benchmark //! //! On normal hardware, it is expected to run with a rate around 5.9-6.7 GB/S on a 2.5 GHz CPU. //! Further improvement can be seen when hashing very big buffers in parallel. //! //! | Function | Quality | Cycles per byte (lower is better) | Author //! |-------------|---------------|-----------------------------------|------------------- //! | **SeaHash** | **Excellent** | **0.24** | **Ticki** //! | xxHash | Excellent | 0.31 | Collet //! | MetroHash | Excellent | 0.35 | Rogers //! | Murmur | Excellent | 0.64 | Appleby //! | Rabin | Medium | 1.51 | Rabin //! | CityHash | Excellent | 1.62 | Pike, Alakuijala //! | LoseLose | Terrible | 2.01 | Kernighan, Ritchie //! | FNV | Poor | 3.12 | Fowler, Noll, Vo //! | SipHash | Pseudorandom | 3.21 | Aumasson, Bernstein //! | CRC | Good | 3.91 | Peterson //! | DJB2 | Poor | 4.13 | Bernstein //! //! ## Ideal architecture //! //! SeaHash is designed and optimized for the most common architecture in use: //! //! - Little-endian //! - 64-bit //! - 64 or more bytes cache lines //! - 4 or more instruction pipelines //! - 4 or more 64-bit registers //! //! Anything that does not hold the above requirements will perform worse by up to 30-40%. Note that //! this means it is still faster than CityHash (~1 GB/S), MurMurHash (~2.6 GB/S), FNV (~0.5 GB/S), //! etc. //! //! # Achieving the performance //! //! Like any good general-purpose hash function, SeaHash reads 8 bytes at once effectively reducing //! the running time by an order of ~5. //! //! Secondly, SeaHash achieves the performance by heavily exploiting Instruction-Level Parallelism. //! In particular, it fetches 4 integers in every round and independently diffuses them. This //! yields four different states, which are finally combined. //! //! # Statistical guarantees //! //! SeaHash comes with certain proven guarantees about the statistical properties of the output: //! //! 1. Pick some _n_-byte sequence, _s_. The number of _n_-byte sequence colliding with _s_ is //! independent of the choice of _s_ (all equivalence class have equal size). //! 2. If you flip any bit in the input, the probability for any bit in the output to be flipped is //! 0.5. //! 3. The hash value of a sequence of uniformly distributed bytes is itself uniformly distributed. //! //! The first guarantee can be derived through deduction, by proving that the diffusion function is //! bijective (reverse the XORs and find the congruence inverses to the primes). //! //! The second guarantee requires more complex calculations: Construct a matrix of probabilities //! and set one to certain (1), then apply transformations through the respective operations. The //! proof is a bit long, but relatively simple. //! //! The third guarantee requires proving that the hash value is a tree, such that: //! - Leafs represents the input values. //! - Single-child nodes reduce to the diffusion of the child. //! - Multiple-child nodes reduce to the sum of the children. //! //! Then simply show that each of these reductions transform uniformly distributed variables to //! uniformly distributed variables. //! //! # Inner workings //! //! In technical terms, SeaHash follows a alternating 4-state length-padded Merkle–Damgård //! construction with an XOR-diffuse compression function (click to enlarge): //! //! [![A diagram.](http://ticki.github.io/img/seahash_construction_diagram.svg)] //! (http://ticki.github.io/img/seahash_construction_diagram.svg) //! //! It starts with 4 initial states, then it alternates between them (increment, wrap on 4) and //! does XOR with the respective block. When a state has been visited the diffusion function (f) is //! applied. The very last block is padded with zeros. //! //! After all the blocks have been gone over, all the states are XOR'd to the number of bytes //! written. The sum is then passed through the diffusion function, which produces the final hash //! value. //! //! The diffusion function is drawn below. //! //! ```notest //! x ← px //! x ← x ⊕ ((x ≫ 32) ≫ (x ≫ 60)) //! x ← px //! ``` //! //! The advantage of having four completely segregated (note that there is no mix round, so they're //! entirely independent) states is that fast parallelism is possible. For example, if I were to //! hash 1 TB, I can spawn up four threads which can run independently without _any_ //! intercommunication or synchronization before the last round. //! //! If the diffusion function (f) was cryptographically secure, it would pass cryptoanalysis //! trivially. This might seem irrelevant, as it clearly isn't cryptographically secure, but it //! tells us something about the inner semantics. In particular, any diffusion function with //! sufficient statistical quality will make up a good hash function in this construction. //! //! Read [the blog post](http://ticki.github.io/blog/seahash-explained/) for more details. //! //! # ASIC version //! //! SeaHash is specifically designed such that it can be efficiently implemented in the form of //! ASIC while only using very few transistors. //! //! # Specification //! //! See the [`reference`](./reference) module. //! //! # Credits //! //! Aside for myself (@ticki), there are couple of other people who have helped creating this. //! Joshua Landau suggested using the [PCG family of diffusions](http://www.pcg-random.org/), //! created by Melissa E. O'Neill. Sokolov Yura spotted multiple bugs in SeaHash. #![warn(missing_docs)] #![cfg_attr(all(not(test), not(feature = "use_std")), no_std)] #[cfg(all(not(test), not(feature = "use_std")))] extern crate core as std; pub use buffer::{hash, hash_seeded, State}; pub use stream::SeaHasher; mod buffer; mod helper; pub mod reference; mod stream; #[cfg(feature = "use_std")] mod impl_std; seahash-4.1.0/src/reference.rs010064400017500001750000000103271400047463300144100ustar 00000000000000//! A slow, but clear reference implementation of SeaHash. //! //! # Specification //! //! The input buffer is padded with null bytes until the length is divisible by 8. //! //! We start out with state //! //! ```notest //! a = 0x16f11fe89b0d677c //! b = 0xb480a793d8e6c86c //! c = 0x6fe2e5aaf078ebc9 //! d = 0x14f994a4c5259381 //! ``` //! //! If a seed is given, each of the initial state component are modularly multiplied by the seed. //! //! From the stream, we read one 64-bit block (in little-endian) at a time. This number, `n`, //! determines the new state by: //! //! ```notest //! a' = b //! b' = c //! c' = d //! d' = g(a ⊕ n) //! ``` //! //! `g(x)` is defined as `g(x) = j(h(j(x)))` with `h(x) = (x ≫ 32) ≫ (x ≫ 60)` and `j(x) ≡ px (mod //! 2^64)` with `p = 0x7ed0e9fa0d94a33`. //! //! Let the final state be `(x, y, z, w)`. Then the final result is given by `H = g(x ⊕ y ⊕ z ⊕ w ⊕ //! l)` where `l` is the number of bytes in the original buffer. use helper; /// Read an integer in little-endian. fn read_int(int: &[u8]) -> u64 { debug_assert!( int.len() <= 8, "The buffer length of the integer must be less than or equal to \ the one of an u64." ); // Start at 0. let mut x = 0; for &i in int.iter().rev() { // Shift up a byte. x <<= 8; // Set the lower byte. x |= i as u64; } x } /// A hash state. struct State { /// The `a` substate. a: u64, /// The `b` substate. b: u64, /// The `c` substate. c: u64, /// The `d` substate. d: u64, } impl State { /// Write a 64-bit integer to the state. fn write_u64(&mut self, x: u64) { let mut a = self.a; // Mix `x` into `a`. a = helper::diffuse(a ^ x); // Rotate around. // _______________________ // | v // a <---- b <---- c <---- d self.a = self.b; self.b = self.c; self.c = self.d; self.d = a; } /// Calculate the final hash. fn finish(self, total: usize) -> u64 { // Even though XORing is commutative, it doesn't matter, because the state vector's initial // components are mutually distinct, and thus swapping even and odd chunks will affect the // result, because it is sensitive to the initial condition. To add discreteness, we // diffuse. helper::diffuse( self.a ^ self.b ^ self.c ^ self.d // We XOR in the number of written bytes to make it zero-sensitive when excessive bytes // are written (0u32.0u8 ≠ 0u16.0u8). ^ total as u64, ) } /// Create a new state with some initial values (seed). fn with_seeds(k1: u64, k2: u64, k3: u64, k4: u64) -> State { State { // These values are randomly generated. a: k1, b: k2, c: k3, d: k4, } } } /// A reference implementation of SeaHash. /// /// This is bloody slow when compared to the optimized version. This is because SeaHash was /// specifically designed to take all sorts of hardware and software hacks into account to achieve /// maximal performance, but this makes code significantly less readable. As such, this version has /// only one goal: to make the algorithm readable and understandable. pub fn hash(buf: &[u8]) -> u64 { hash_seeded( buf, 0x16f11fe89b0d677c, 0xb480a793d8e6c86c, 0x6fe2e5aaf078ebc9, 0x14f994a4c5259381, ) } /// The seeded version of the reference implementation. pub fn hash_seeded(buf: &[u8], k1: u64, k2: u64, k3: u64, k4: u64) -> u64 { // Initialize the state. let mut state = State::with_seeds(k1, k2, k3, k4); // Partition the rounded down buffer into chunks of 8 bytes, and iterate over them. The last // block might not be 8 bytes long. for int in buf.chunks(8) { // Read the chunk into an integer and write into the state. state.write_u64(read_int(int)); } // Finish the hash state and return the final value. state.finish(buf.len()) } #[cfg(test)] mod tests { use super::*; #[test] fn shakespear() { assert_eq!(hash(b"to be or not to be"), 1988685042348123509); } } seahash-4.1.0/src/stream.rs010064400017500001750000000233461400047463300137520ustar 00000000000000use std::hash::Hasher; use std::slice; use helper; /// The streaming version of the algorithm. #[derive(Clone, Copy)] pub struct SeaHasher { /// The state of the hasher. state: (u64, u64, u64, u64), /// The number of bytes we have written in total written: u64, /// Our tail tail: u64, /// The number of bytes in the tail ntail: usize, } impl Default for SeaHasher { fn default() -> SeaHasher { SeaHasher::with_seeds( 0x16f11fe89b0d677c, 0xb480a793d8e6c86c, 0x6fe2e5aaf078ebc9, 0x14f994a4c5259381, ) } } impl SeaHasher { /// Create a new `SeaHasher` with default state. pub fn new() -> SeaHasher { SeaHasher::default() } /// Construct a new `SeaHasher` given some seed. /// /// For maximum quality, these seeds should be chosen at random. pub fn with_seeds(k1: u64, k2: u64, k3: u64, k4: u64) -> SeaHasher { SeaHasher { state: (k1, k2, k3, k4), written: 0, tail: 0, ntail: 0, } } #[inline(always)] fn push(&mut self, x: u64) { let a = helper::diffuse(self.state.0 ^ x); self.state.0 = self.state.1; self.state.1 = self.state.2; self.state.2 = self.state.3; self.state.3 = a; self.written += 8; } #[inline(always)] fn push_bytes(&mut self, bytes: &[u8]) { // The start of the bytes that aren't in the tail let copied = core::cmp::min(8 - self.ntail, bytes.len()); unsafe { let mut this = self.tail.to_le_bytes(); let mut ptr = bytes.as_ptr(); ptr.copy_to_nonoverlapping(this.as_mut_ptr().add(self.ntail), copied); // It will be at most 8 if copied + self.ntail != 8 { self.ntail += copied; self.tail = u64::from_le_bytes(this); } else { self.push(u64::from_le_bytes(this)); self.ntail = 0; self.tail = 0; // We've done the existing tail, now just do the rest in chunks of 4 x u64. ptr = ptr.offset(copied as isize); let end_ptr = ptr.offset((bytes.len() - copied) as isize & !0x1F); while end_ptr > ptr { self.state.0 = helper::diffuse(self.state.0 ^ helper::read_u64(ptr)); self.state.1 = helper::diffuse(self.state.1 ^ helper::read_u64(ptr.offset(8))); self.state.2 = helper::diffuse(self.state.2 ^ helper::read_u64(ptr.offset(16))); self.state.3 = helper::diffuse(self.state.3 ^ helper::read_u64(ptr.offset(24))); ptr = ptr.offset(32); self.written += 32; } let mut excessive = bytes.len() + bytes.as_ptr() as usize - ptr as usize; match excessive { 0 => { // input was a multiple of 4 x u64 bytes long; no new tail bytes. } 1..=7 => { self.tail = helper::read_int(slice::from_raw_parts(ptr as *const u8, excessive)); self.ntail = excessive; // self.written does not need to be updated as we only gathered self.tail // bytes after larger chunks. } 8 => { self.push(helper::read_u64(ptr)); // self.written is updated by self.push } 9..=15 => { self.push(helper::read_u64(ptr)); excessive -= 8; self.tail = helper::read_int(slice::from_raw_parts(ptr.offset(8), excessive)); self.ntail = excessive; // self.written is updated by self.push } 16 => { let a = helper::diffuse(self.state.0 ^ helper::read_u64(ptr)); let b = helper::diffuse(self.state.1 ^ helper::read_u64(ptr.offset(8))); // rotate self.state.0 = self.state.2; self.state.1 = self.state.3; self.state.2 = a; self.state.3 = b; self.written += 16; } 17..=23 => { let a = helper::diffuse(self.state.0 ^ helper::read_u64(ptr)); let b = helper::diffuse(self.state.1 ^ helper::read_u64(ptr.offset(8))); // rotate self.state.0 = self.state.2; self.state.1 = self.state.3; self.state.2 = a; self.state.3 = b; excessive -= 16; self.tail = helper::read_int(slice::from_raw_parts(ptr.offset(16), excessive)); self.ntail = excessive; self.written += 16; } 24 => { let a = helper::diffuse(self.state.0 ^ helper::read_u64(ptr)); let b = helper::diffuse(self.state.1 ^ helper::read_u64(ptr.offset(8))); let c = helper::diffuse(self.state.2 ^ helper::read_u64(ptr.offset(16))); self.state.0 = self.state.3; self.state.1 = a; self.state.2 = b; self.state.3 = c; self.written += 24; } _ => { let a = helper::diffuse(self.state.0 ^ helper::read_u64(ptr)); let b = helper::diffuse(self.state.1 ^ helper::read_u64(ptr.offset(8))); let c = helper::diffuse(self.state.2 ^ helper::read_u64(ptr.offset(16))); self.state.0 = self.state.3; self.state.1 = a; self.state.2 = b; self.state.3 = c; excessive -= 24; self.tail = helper::read_int(slice::from_raw_parts(ptr.offset(24), excessive)); self.ntail = excessive; self.written += 24; } } } } } } impl Hasher for SeaHasher { fn finish(&self) -> u64 { let a = if self.ntail > 0 { let tail = helper::read_int(&self.tail.to_le_bytes()[..self.ntail]); helper::diffuse(self.state.0 ^ tail) } else { self.state.0 }; helper::diffuse( a ^ self.state.1 ^ self.state.2 ^ self.state.3 ^ self.written + self.ntail as u64, ) } fn write(&mut self, bytes: &[u8]) { self.push_bytes(bytes) } fn write_u64(&mut self, n: u64) { self.write(&n.to_le_bytes()) } fn write_u8(&mut self, n: u8) { self.write(&n.to_le_bytes()) } fn write_u16(&mut self, n: u16) { self.write(&n.to_le_bytes()) } fn write_u32(&mut self, n: u32) { self.write(&n.to_le_bytes()) } fn write_usize(&mut self, n: usize) { self.write(&n.to_le_bytes()) } fn write_i64(&mut self, n: i64) { self.write(&n.to_le_bytes()) } fn write_i8(&mut self, n: i8) { self.write(&n.to_le_bytes()) } fn write_i16(&mut self, n: i16) { self.write(&n.to_le_bytes()) } fn write_i32(&mut self, n: i32) { self.write(&n.to_le_bytes()) } fn write_isize(&mut self, n: isize) { self.write(&n.to_le_bytes()) } } #[cfg(test)] mod tests { use super::*; use crate::hash_seeded; use std::hash::Hasher; #[test] fn chunked_equiv() { let test_buf: &[u8] = &[ 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, ]; let mut stream_hasher1 = SeaHasher::default(); Hasher::write(&mut stream_hasher1, test_buf); let mut stream_hasher2 = SeaHasher::default(); Hasher::write(&mut stream_hasher2, &test_buf[..8]); Hasher::write(&mut stream_hasher2, &test_buf[8..]); let mut stream_hasher3 = SeaHasher::default(); Hasher::write(&mut stream_hasher3, &test_buf[..3]); Hasher::write(&mut stream_hasher3, &test_buf[3..]); let mut stream_hasher4 = SeaHasher::default(); Hasher::write_u16(&mut stream_hasher4, 0xffff); Hasher::write_u16(&mut stream_hasher4, 0xffff); Hasher::write_u32(&mut stream_hasher4, 0xffffffff); Hasher::write_u64(&mut stream_hasher4, 0); assert_eq!(stream_hasher1.finish(), stream_hasher2.finish()); assert_eq!(stream_hasher1.finish(), stream_hasher3.finish()); assert_eq!(stream_hasher1.finish(), stream_hasher4.finish()); } #[test] fn match_optimized() { let test_buf: &[u8] = &[ 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, ]; let mut sea_hasher = SeaHasher::with_seeds( 0xe7b0c93ca8525013, 0x011d02b854ae8182, 0x7bcc5cf9c39cec76, 0xfa336285d102d083, ); sea_hasher.write(test_buf); let stream_hash = sea_hasher.finish(); let buffer_hash = hash_seeded( test_buf, 0xe7b0c93ca8525013, 0x011d02b854ae8182, 0x7bcc5cf9c39cec76, 0xfa336285d102d083, ); assert_eq!(buffer_hash, stream_hash) } } seahash-4.1.0/tests/chunking.rs010064400017500001750000000040171400047463300146320ustar 00000000000000extern crate seahash; use seahash::SeaHasher as H; use std::hash::Hasher; #[test] fn hash_chunking_vs_not() { // originally from https://gitlab.redox-os.org/redox-os/seahash/issues/5 let c1: &[u8] = b"This hashing algorithm was extracted from the Rustc compiler."; let c2: &[u8] = b" This is the same hashing algoirthm used for some internal operations in FireFox."; let c3: &[u8] = b" The strength of this algorithm is in hashing 8 bytes at a time on 64-bit platforms, where the FNV algorithm works on one byte at a time."; let mut h1 = H::default(); h1.write(c1); h1.write(c2); h1.write(c3); let hash1 = h1.finish(); let mut c4 = Vec::::new(); c4.extend_from_slice(c1); c4.extend_from_slice(c2); c4.extend_from_slice(c3); let mut h2 = H::default(); h2.write(&c4); let hash2 = h2.finish(); let reference = seahash::reference::hash(&c4); let buffer = seahash::hash(&c4); println!("hash1: {:016x}", hash1); println!("hash2: {:016x}", hash2); println!("ref : {:016x}", reference); println!("buf : {:016x}", buffer); assert_eq!(hash1, hash2); assert_eq!(hash1, reference); assert_eq!(hash1, buffer); assert_eq!(hash1, 0xa06e72e1b06144a0); } #[test] fn test_different_chunk_sizes() { let v = { let c1: &[u8] = b"This hashing algorithm was extracted from the Rustc compiler."; let c2: &[u8] = b" This is the same hashing algoirthm used for some internal operations in FireFox."; let c3: &[u8] = b" The strength of this algorithm is in hashing 8 bytes at a time on 64-bit platforms, where the FNV algorithm works on one byte at a time."; [c1, c2, c3].concat() }; let mut h1 = H::default(); h1.write(&v); let h1 = h1.finish(); for chunk_len in 1..v.len() { let mut h2 = H::default(); for w in v.chunks(chunk_len) { h2.write(w); } let h2 = h2.finish(); assert_eq!(h1, h2, "failed with chunk_len={}", chunk_len); } } seahash-4.1.0/tests/quickchecks.rs010064400017500001750000000027721400047463300153270ustar 00000000000000extern crate seahash; #[macro_use] extern crate quickcheck; use quickcheck::TestResult; use seahash::hash; use seahash::reference::hash as reference; use seahash::SeaHasher; use std::hash::Hasher; use std::num::{NonZeroU8, NonZeroUsize}; quickcheck! { #[cfg_attr(miri, ignore)] // very slow to run on miri fn chunked_matches_buffered(xs: Vec, chunk_size: NonZeroUsize, times: NonZeroU8, additional: u8) -> TestResult { let target_size = xs.len() * times.get() as usize + additional as usize; if xs.is_empty() || target_size > 10_000_000 { TestResult::discard() } else { let xs = xs.into_iter() .cycle() // the vecs produced by quickcheck are perhaps a bit small by default. // additional should add some noise to avoid only getting nice even lengths. .take(target_size) .collect::>(); // write all at once let mut h0 = SeaHasher::default(); h0.write(&xs); let h0 = h0.finish(); // write in chunks let mut h1 = SeaHasher::default(); for chunk in xs.chunks(chunk_size.get()) { h1.write(chunk); } let h1 = h1.finish(); // compare all, including to buffered and reference let outcome = h0 == h1 && h0 == hash(&xs) && h0 == reference(&xs); TestResult::from_bool(outcome) } } }