murmurhash3-0.0.5/.gitignore000060000007650000024000000000241246321274200141670ustar0000000000000000/target /Cargo.lock murmurhash3-0.0.5/.travis.yml000066400007650000024000000005301255341752000143250ustar0000000000000000language: rust rust: - stable - beta - nightly matrix: include: - rust: nightly env: FEATURES=nightly BENCH=true script: - cargo build --verbose --features ${FEATURES:-""} - cargo test --verbose --features ${FEATURES:-""} - '[ "$BENCH" == "true" ] && cargo bench --no-run --verbose --features ${FEATURES:-""} || true' murmurhash3-0.0.5/Cargo.toml000060000007650000024000000014461255341504600141420ustar0000000000000000# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO # # When uploading crates to the registry Cargo will automatically # "normalize" Cargo.toml files for maximal compatibility # with all versions of Cargo and also rewrite `path` dependencies # to registry (e.g., crates.io) dependencies. # # If you are reading this file be aware that the original Cargo.toml # will likely look very different (and much more reasonable). # See Cargo.toml.orig for the original contents. [package] name = "murmurhash3" version = "0.0.5" authors = ["mhallin "] description = "MurmurHash3 implementation" homepage = "https://github.com/mhallin/murmurhash3-rs" readme = "README.rst" license = "MIT" [lib] name = "murmurhash3" path = "src/lib.rs" [dev-dependencies.rand] version = "*" [features] nightly = [] murmurhash3-0.0.5/Cargo.toml.orig000060000007650000024000000005141255341504600150740ustar0000000000000000[package] name = "murmurhash3" version = "0.0.5" authors = ["mhallin "] description = "MurmurHash3 implementation" license = "MIT" readme = "README.rst" homepage = "https://github.com/mhallin/murmurhash3-rs" [lib] name = "murmurhash3" path = "src/lib.rs" [dev-dependencies] rand = "*" [features] nightly = [] murmurhash3-0.0.5/CHANGELOG.rst000064400007650000024000000011001250776627600142430ustar0000000000000000Change Log ========== Unreleased_ ---------- 0.0.4_ — 2014-04-04 ---------- * Enable ``HashState`` implementation 0.0.3_ — 2014-03-29 ------------------ * PR1_: Fixes to keep Rust Nightly compatibility, thanks polyfractal_ .. _Unreleased: https://github.com/mhallin/murmurhash3-rs/compare/v0.0.4...HEAD .. _0.0.4: https://github.com/mhallin/murmurhash3-rs/compare/v0.0.3...v0.0.4 .. _0.0.3: https://github.com/mhallin/murmurhash3-rs/compare/v0.0.2...v0.0.3 .. _PR1: https://github.com/mhallin/murmurhash3-rs/pull/1 .. _polyfractal: https://github.com/polyfractal murmurhash3-0.0.5/LICENSE000066400007650000024000000020701246337664200132330ustar0000000000000000The MIT License (MIT) Copyright (c) 2015 Magnus Hallin Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. murmurhash3-0.0.5/README.rst000066400007650000024000000024031250574536200137110ustar0000000000000000************** MurmurHash3.rs ************** .. image:: https://travis-ci.org/mhallin/murmurhash3-rs.svg?branch=master :target: https://travis-ci.org/mhallin/murmurhash3-rs A rust implementation of the MurmurHash3_. Both 32 bit and 128 bit versions are included. The 128 bit version is implemented with 64 bit datatypes, making it most suitable for x86_64 or other 64 bit architectures. ---- Usage ===== In your ``Cargo.toml``: .. code:: toml [dependencies] murmurhash3 = "*" Then you can start to use either ``murmurhash3_x86_32`` or ``murmurhash3_x64_128``: .. code:: rust use murmurhash3::murmurhash3_x64_128; fn hash_value() { let data = "test data"; let seed = 48221234; let hash = murmurhash3_x64_128(data.as_bytes(), seed); } Unfortunately, there is a bug in the ``HashState`` library implementation which prevents implementation of new ``Hasher`` implementations for use in for example ``HashMap``. Additionally, only the 32 bit hasher can be used there since ``HashMap`` uses a 64 bit hash internally. Tests ===== .. code:: cargo test Runs all tests with optimization level 3 in order to weed out potential problems with the optimizer. .. _MurmurHash3: https://code.google.com/p/smhasher/wiki/MurmurHash3 murmurhash3-0.0.5/src/hasher.rs000064400007650000024000000024351250776537300146420ustar0000000000000000use std::hash::Hasher; use std::collections::hash_state::HashState; use mmh3_32::murmurhash3_x86_32; pub struct Murmur3Hasher { seed: u32, bytes: Vec, } #[derive(Clone, Copy)] pub struct Murmur3HashState { seed: u32, } impl Murmur3HashState { pub fn new() -> Murmur3HashState { return Murmur3HashState { seed: 0 }; } pub fn with_seed(seed: u32) -> Murmur3HashState { return Murmur3HashState { seed: seed }; } } impl Hasher for Murmur3Hasher { fn finish(&self) -> u64 { return murmurhash3_x86_32(&self.bytes, self.seed) as u64; } fn write(&mut self, bytes: &[u8]) { self.bytes.push_all(bytes); } } impl HashState for Murmur3HashState { type Hasher = Murmur3Hasher; fn hasher(&self) -> Murmur3Hasher { return Murmur3Hasher { seed: self.seed, bytes: vec![] }; } } #[cfg(test)] mod test { use super::Murmur3HashState; use std::collections::hash_map::HashMap; #[test] fn use_in_hashmap() { let mut hashmap = HashMap::with_hash_state(Murmur3HashState::new()); hashmap.insert("one", 1); hashmap.insert("two", 2); assert!(hashmap.len() == 2); assert!(*hashmap.get("one").unwrap() == 1); assert!(*hashmap.get("two").unwrap() == 2); } } murmurhash3-0.0.5/src/lib.rs000064400007650000024000000005621255341473300141250ustar0000000000000000#![cfg_attr(feature = "nightly", feature(hashmap_hasher))] #![cfg_attr(feature = "nightly", feature(test))] #![cfg_attr(feature = "nightly", feature(vec_push_all))] mod mmh3_128; mod mmh3_32; #[cfg(feature="nightly")] mod hasher; pub use mmh3_128::murmurhash3_x64_128; pub use mmh3_32::murmurhash3_x86_32; #[cfg(feature="nightly")] pub use hasher::Murmur3HashState; murmurhash3-0.0.5/src/mmh3_128.rs000064400007650000024000000146401255341502600146130ustar0000000000000000use std::mem; fn fmix64(mut k: u64) -> u64 { k ^= k >> 33; k = k.wrapping_mul(0xff51afd7ed558ccdu64); k ^= k >> 33; k = k.wrapping_mul(0xc4ceb9fe1a85ec53u64); k ^= k >> 33; return k; } fn get_128_block(bytes: &[u8], index: usize) -> (u64, u64) { let b64: &[u64] = unsafe { mem::transmute(bytes) }; return (b64[index], b64[index + 1]); } pub fn murmurhash3_x64_128(bytes: &[u8], seed: u64) -> (u64, u64) { let c1 = 0x87c37b91114253d5u64; let c2 = 0x4cf5ad432745937fu64; let read_size = 16; let len = bytes.len() as u64; let block_count = len / read_size; let (mut h1, mut h2) = (seed, seed); for i in 0..block_count as usize { let (mut k1, mut k2) = get_128_block(bytes, i * 2); k1 = k1.wrapping_mul(c1); k1 = k1.rotate_left(31); k1 = k1.wrapping_mul(c2); h1 ^= k1; h1 = h1.rotate_left(27); h1 = h1.wrapping_add(h2); h1 = h1.wrapping_mul(5); h1 = h1.wrapping_add(0x52dce729); k2 = k2.wrapping_mul(c2); k2 = k2.rotate_left(33); k2 = k2.wrapping_mul(c1); h2 ^= k2; h2 = h2.rotate_left(31); h2 = h2.wrapping_add(h1); h2 = h2.wrapping_mul(5); h2 = h2.wrapping_add(0x38495ab5); } let (mut k1, mut k2) = (0u64, 0u64); if len & 15 == 15 { k2 ^= (bytes[(block_count * read_size) as usize + 14] as u64) << 48; } if len & 15 >= 14 { k2 ^= (bytes[(block_count * read_size) as usize + 13] as u64) << 40; } if len & 15 >= 13 { k2 ^= (bytes[(block_count * read_size) as usize + 12] as u64) << 32; } if len & 15 >= 12 { k2 ^= (bytes[(block_count * read_size) as usize + 11] as u64) << 24; } if len & 15 >= 11 { k2 ^= (bytes[(block_count * read_size) as usize + 10] as u64) << 16; } if len & 15 >= 10 { k2 ^= (bytes[(block_count * read_size) as usize + 9] as u64) << 8; } if len & 15 >= 9 { k2 ^= bytes[(block_count * read_size) as usize + 8] as u64; k2 = k2.wrapping_mul(c2); k2 = k2.rotate_left(33); k2 = k2.wrapping_mul(c1); h2 ^= k2; } if len & 15 >= 8 { k1 ^= (bytes[(block_count * read_size) as usize + 7] as u64) << 56; } if len & 15 >= 7 { k1 ^= (bytes[(block_count * read_size) as usize + 6] as u64) << 48; } if len & 15 >= 6 { k1 ^= (bytes[(block_count * read_size) as usize + 5] as u64) << 40; } if len & 15 >= 5 { k1 ^= (bytes[(block_count * read_size) as usize + 4] as u64) << 32; } if len & 15 >= 4 { k1 ^= (bytes[(block_count * read_size) as usize + 3] as u64) << 24; } if len & 15 >= 3 { k1 ^= (bytes[(block_count * read_size) as usize + 2] as u64) << 16; } if len & 15 >= 2 { k1 ^= (bytes[(block_count * read_size) as usize + 1] as u64) << 8; } if len & 15 >= 1 { k1 ^= bytes[(block_count * read_size) as usize + 0] as u64; k1 = k1.wrapping_mul(c1); k1 = k1.rotate_left(31); k1 = k1.wrapping_mul(c2); h1 ^= k1; } h1 ^= bytes.len() as u64; h2 ^= bytes.len() as u64; h1 = h1.wrapping_add(h2); h2 = h2.wrapping_add(h1); h1 = fmix64(h1); h2 = fmix64(h2); h1 = h1.wrapping_add(h2); h2 = h2.wrapping_add(h1); return (h1, h2); } #[cfg(test)] mod test { use super::murmurhash3_x64_128; #[test] fn test_empty_string() { assert!(murmurhash3_x64_128("".as_bytes(), 0) == (0, 0)); } #[test] fn test_tail_lengths() { assert!(murmurhash3_x64_128("1".as_bytes(), 0) == (8213365047359667313, 10676604921780958775)); assert!(murmurhash3_x64_128("12".as_bytes(), 0) == (5355690773644049813, 9855895140584599837)); assert!(murmurhash3_x64_128("123".as_bytes(), 0) == (10978418110857903978, 4791445053355511657)); assert!(murmurhash3_x64_128("1234".as_bytes(), 0) == (619023178690193332, 3755592904005385637)); assert!(murmurhash3_x64_128("12345".as_bytes(), 0) == (2375712675693977547, 17382870096830835188)); assert!(murmurhash3_x64_128("123456".as_bytes(), 0) == (16435832985690558678, 5882968373513761278)); assert!(murmurhash3_x64_128("1234567".as_bytes(), 0) == (3232113351312417698, 4025181827808483669)); assert!(murmurhash3_x64_128("12345678".as_bytes(), 0) == (4272337174398058908, 10464973996478965079)); assert!(murmurhash3_x64_128("123456789".as_bytes(), 0) == (4360720697772133540, 11094893415607738629)); assert!(murmurhash3_x64_128("123456789a".as_bytes(), 0) == (12594836289594257748, 2662019112679848245)); assert!(murmurhash3_x64_128("123456789ab".as_bytes(), 0) == (6978636991469537545, 12243090730442643750)); assert!(murmurhash3_x64_128("123456789abc".as_bytes(), 0) == (211890993682310078, 16480638721813329343)); assert!(murmurhash3_x64_128("123456789abcd".as_bytes(), 0) == (12459781455342427559, 3193214493011213179)); assert!(murmurhash3_x64_128("123456789abcde".as_bytes(), 0) == (12538342858731408721, 9820739847336455216)); assert!(murmurhash3_x64_128("123456789abcdef".as_bytes(), 0) == (9165946068217512774, 2451472574052603025)); assert!(murmurhash3_x64_128("123456789abcdef1".as_bytes(), 0) == (9259082041050667785, 12459473952842597282)); } #[test] fn test_large_data() { assert!(murmurhash3_x64_128("Lorem ipsum dolor sit amet, consectetur adipiscing elit. Etiam at consequat massa. Cras eleifend pellentesque ex, at dignissim libero maximus ut. Sed eget nulla felis".as_bytes(), 0) == (9455322759164802692, 17863277201603478371)); } #[cfg(feature="nightly")] mod bench { extern crate rand; extern crate test; use std::iter::FromIterator; use self::rand::Rng; use self::test::{Bencher, black_box}; use super::super::murmurhash3_x64_128; fn run_bench(b: &mut Bencher, size: u64) { let mut data: Vec = FromIterator::from_iter((0..size).map(|_| 0u8)); rand::thread_rng().fill_bytes(&mut data); b.bytes = size; b.iter(|| { black_box(murmurhash3_x64_128(&data, 0)); }); } #[bench] fn bench_random_256k(b: &mut Bencher) { run_bench(b, 256 * 1024); } #[bench] fn bench_random_16b(b: &mut Bencher) { run_bench(b, 16); } } } murmurhash3-0.0.5/src/mmh3_32.rs000064400007650000024000000057201255341503300145220ustar0000000000000000use std::mem; fn fmix32(mut h: u32) -> u32 { h ^= h >> 16; h = h.wrapping_mul(0x85ebca6b); h ^= h >> 13; h = h.wrapping_mul(0xc2b2ae35); h ^= h >> 16; return h; } fn get_32_block(bytes: &[u8], index: usize) -> u32 { let b32: &[u32] = unsafe { mem::transmute(bytes) }; return b32[index]; } pub fn murmurhash3_x86_32(bytes: &[u8], seed: u32) -> u32 { let c1 = 0xcc9e2d51u32; let c2 = 0x1b873593u32; let read_size = 4; let len = bytes.len() as u32; let block_count = len / read_size; let mut h1 = seed; for i in 0..block_count as usize { let mut k1 = get_32_block(bytes, i); k1 = k1.wrapping_mul(c1); k1 = k1.rotate_left(15); k1 = k1.wrapping_mul(c2); h1 ^= k1; h1 = h1.rotate_left(13); h1 = h1.wrapping_mul(5); h1 = h1.wrapping_add(0xe6546b64) } let mut k1 = 0u32; if len & 3 == 3 { k1 ^= (bytes[(block_count * read_size) as usize + 2] as u32) << 16; } if len & 3 >= 2 { k1 ^= (bytes[(block_count * read_size) as usize + 1] as u32) << 8; } if len & 3 >= 1 { k1 ^= bytes[(block_count * read_size) as usize + 0] as u32; k1 = k1.wrapping_mul(c1); k1 = k1.rotate_left(15); k1 = k1.wrapping_mul(c2); h1 ^= k1; } h1 ^= bytes.len() as u32; h1 = fmix32(h1); return h1; } #[cfg(test)] mod test { use super::murmurhash3_x86_32; #[test] fn test_empty_string() { assert!(murmurhash3_x86_32("".as_bytes(), 0) == 0); } #[test] fn test_tail_lengths() { assert!(murmurhash3_x86_32("1".as_bytes(), 0) == 2484513939); assert!(murmurhash3_x86_32("12".as_bytes(), 0) == 4191350549); assert!(murmurhash3_x86_32("123".as_bytes(), 0) == 2662625771); assert!(murmurhash3_x86_32("1234".as_bytes(), 0) == 1914461635); } #[test] fn test_large_data() { assert!(murmurhash3_x86_32("Lorem ipsum dolor sit amet, consectetur adipiscing elit. Etiam at consequat massa. Cras eleifend pellentesque ex, at dignissim libero maximus ut. Sed eget nulla felis".as_bytes(), 0) == 1004899618); } #[cfg(feature="nightly")] mod bench { extern crate rand; extern crate test; use std::iter::FromIterator; use self::rand::Rng; use self::test::{Bencher, black_box}; use super::super::murmurhash3_x86_32; fn run_bench(b: &mut Bencher, size: u64) { let mut data: Vec = FromIterator::from_iter((0..size).map(|_| 0u8)); rand::thread_rng().fill_bytes(&mut data); b.bytes = size; b.iter(|| { black_box(murmurhash3_x86_32(&data, 0)); }); } #[bench] fn bench_random_256k(b: &mut Bencher) { run_bench(b, 256 * 1024); } #[bench] fn bench_random_16b(b: &mut Bencher) { run_bench(b, 16); } } }