pax_global_header00006660000000000000000000000064144314324700014514gustar00rootroot0000000000000052 comment=8b7c9bf3ed1b8f83e5265c84a2cad5c1d6bb1d69 libmurmurhash-1.6/000077500000000000000000000000001443143247000142445ustar00rootroot00000000000000libmurmurhash-1.6/.gitignore000066400000000000000000000004701443143247000162350ustar00rootroot00000000000000*.o *.a *.so.* *.so mmh mmh_dyn mmh_old mmh_ref # autotools .deps/ .libs/ Makefile Makefile.in aclocal.m4 autom4te.cache/ config.h config.h.in config.h.in~ config.log config.status config/ configure libmurmurhash.la libmurmurhash.pc libmurmurhash_la-PMurHash.lo libmurmurhash_la-murmurhash.lo libtool stamp-h1 libmurmurhash-1.6/Makefile.am000066400000000000000000000027121443143247000163020ustar00rootroot00000000000000# AutoMake Me dist_noinst_DATA = NOLICENSE Readme.md lib_LTLIBRARIES = libmurmurhash.la libmurmurhashdir=$(includedir) libmurmurhash_HEADERS= murmurhash.h libmurmurhash_la_SOURCES = PMurHash.c murmurhash.c PMurHash.h libmurmurhash_la_LDFLAGS = -version-info @LIB_VERSION@ libmurmurhash_la_CPPFLAGS = $(INCLUDES) dist_man_MANS = murmurhash.3 pkgconfigdir=$(libdir)/pkgconfig pkgconfig_DATA = libmurmurhash.pc ## checks check_PROGRAMS=mmh mmh_old EXTRA_PROGRAMS=mmh_dyn mmh_ref # new interface, static linking mmh_SOURCES = test/mmh.c mmh_LDADD = libmurmurhash.la # new interface, dynamic linking mmh_dyn_SOURCES = test/mmh.c mmh_dyn_LDADD = -lmurmurhash # old interface, static linking mmh_old_SOURCES = test/mmh_old.c mmh_old_LDADD = libmurmurhash.la # old interface, reference implementation mmh_ref_SOURCES = test/mmh_old.c test/MurmurHash3.c test/MurmurHash3.h dist_noinst_DATA += test/almostempty test/almostempty.hash check: mmh mmh_old ./mmh $(top_srcdir)/test/almostempty | diff $(top_srcdir)/test/almostempty.hash - ./mmh_old $(top_srcdir)/test/almostempty | diff $(top_srcdir)/test/almostempty.hash - # Only execute this on x86 or x86_64 check-ref: mmh_ref ./mmh_ref $(top_srcdir)/test/almostempty | diff $(top_srcdir)/test/almostempty.hash - # should be checked after installation! check-dynamic: mmh_dyn ./mmh_dyn $(top_srcdir)/test/almostempty | diff $(top_srcdir)/test/almostempty.hash - ## misc format: clang-format -i *.c *.h test/*.c test/*.h libmurmurhash-1.6/NOLICENSE000066400000000000000000000004331443143247000155060ustar00rootroot00000000000000libmurmurhash was written by Fabian Klötzl, and is placed in the public domain. The author hereby disclaims copyright to this source code. MurmurHash3 was written by Austin Appleby, and is placed in the public domain. PMurHash was written by Shane Day, and is also public domain. libmurmurhash-1.6/PMurHash.c000066400000000000000000000272401443143247000161040ustar00rootroot00000000000000/*----------------------------------------------------------------------------- * MurmurHash3 was written by Austin Appleby, and is placed in the public * domain. * * This implementation was written by Shane Day, and is also public domain. * * This is a portable ANSI C implementation of MurmurHash3_x86_32 (Murmur3A) * with support for progressive processing. */ /*----------------------------------------------------------------------------- If you want to understand the MurmurHash algorithm you would be much better off reading the original source. Just point your browser at: http://code.google.com/p/smhasher/source/browse/trunk/MurmurHash3.cpp What this version provides? 1. Progressive data feeding. Useful when the entire payload to be hashed does not fit in memory or when the data is streamed through the application. Also useful when hashing a number of strings with a common prefix. A partial hash of a prefix string can be generated and reused for each suffix string. 2. Portability. Plain old C so that it should compile on any old compiler. Both CPU endian and access-alignment neutral, but avoiding inefficient code when possible depending on CPU capabilities. 3. Drop in. I personally like nice self contained public domain code, making it easy to pilfer without loads of refactoring to work properly in the existing application code & makefile structure and mucking around with licence files. Just copy PMurHash.h and PMurHash.c and you're ready to go. How does it work? We can only process entire 32 bit chunks of input, except for the very end that may be shorter. So along with the partial hash we need to give back to the caller a carry containing up to 3 bytes that we were unable to process. This carry also needs to record the number of bytes the carry holds. I use the low 2 bits as a count (0..3) and the carry bytes are shifted into the high byte in stream order. To handle endianess I simply use a macro that reads a uint32_t and define that macro to be a direct read on little endian machines, a read and swap on big endian machines, or a byte-by-byte read if the endianess is unknown. Addendum 2019-01-31: Byte order detection was somewhat out of order. I added macros as suggested by recent GCC documentation. (Fabian Klötzl) -----------------------------------------------------------------------------*/ #include "PMurHash.h" /* I used ugly type names in the header to avoid potential conflicts with * application or system typedefs & defines. Since I'm not including any more * headers below here I can rename these so that the code reads like C99 */ #undef uint32_t #define uint32_t MH_UINT32 #undef uint8_t #define uint8_t MH_UINT8 /* MSVC warnings we choose to ignore */ #if defined(_MSC_VER) #pragma warning(disable : 4127) /* conditional expression is constant */ #endif /*----------------------------------------------------------------------------- * Endianess, misalignment capabilities and util macros * * The following 3 macros are defined in this section. The other macros defined * are only needed to help derive these 3. * * READ_UINT32(x) Read a little endian unsigned 32-bit int * UNALIGNED_SAFE Defined if READ_UINT32 works on non-word boundaries * ROTL32(x,r) Rotate x left by r bits */ #if defined(__ORDER_LITTLE_ENDIAN__) #define __LITTLE_ENDIAN __ORDER_LITTLE_ENDIAN__ #endif #if defined(__ORDER_BIG_ENDIAN__) #define __BIG_ENDIAN __ORDER_BIG_ENDIAN__ #endif #if defined(__BYTE_ORDER__) #define __BYTE_ORDER __BYTE_ORDER__ #endif /* Convention is to define __BYTE_ORDER == to one of these values */ #if !defined(__BIG_ENDIAN) #define __BIG_ENDIAN 4321 #endif #if !defined(__LITTLE_ENDIAN) #define __LITTLE_ENDIAN 1234 #endif /* I386 */ #if defined(_M_IX86) || defined(__i386__) || defined(__i386) || defined(i386) #define __BYTE_ORDER __LITTLE_ENDIAN #define UNALIGNED_SAFE #endif /* gcc 'may' define __LITTLE_ENDIAN__ or __BIG_ENDIAN__ to 1 (Note the trailing * __), or even _LITTLE_ENDIAN or _BIG_ENDIAN (Note the single _ prefix) */ #if !defined(__BYTE_ORDER) #if defined(__LITTLE_ENDIAN__) && __LITTLE_ENDIAN__ == 1 || \ defined(_LITTLE_ENDIAN) && _LITTLE_ENDIAN == 1 #define __BYTE_ORDER __LITTLE_ENDIAN #elif defined(__BIG_ENDIAN__) && __BIG_ENDIAN__ == 1 || \ defined(_BIG_ENDIAN) && _BIG_ENDIAN == 1 #define __BYTE_ORDER __BIG_ENDIAN #endif #endif /* gcc (usually) defines xEL/EB macros for ARM and MIPS endianess */ #if !defined(__BYTE_ORDER) #if defined(__ARMEL__) || defined(__MIPSEL__) #define __BYTE_ORDER __LITTLE_ENDIAN #endif #if defined(__ARMEB__) || defined(__MIPSEB__) #define __BYTE_ORDER __BIG_ENDIAN #endif #endif /* Now find best way we can to READ_UINT32 */ #if __BYTE_ORDER == __LITTLE_ENDIAN /* CPU endian matches murmurhash algorithm, so read 32-bit word directly */ #define READ_UINT32(ptr) (*((uint32_t *)(ptr))) #elif __BYTE_ORDER == __BIG_ENDIAN /* TODO: Add additional cases below where a compiler provided bswap32 is * available */ #if defined(__GNUC__) && \ (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3)) #define READ_UINT32(ptr) (__builtin_bswap32(*((uint32_t *)(ptr)))) #else /* Without a known fast bswap32 we're just as well off doing this */ #define READ_UINT32(ptr) (ptr[0] | ptr[1] << 8 | ptr[2] << 16 | ptr[3] << 24) #define UNALIGNED_SAFE #endif #else /* Unknown endianess so last resort is to read individual bytes */ #define READ_UINT32(ptr) (ptr[0] | ptr[1] << 8 | ptr[2] << 16 | ptr[3] << 24) /* Since we're not doing word-reads we can skip the messing about with * realignment */ #define UNALIGNED_SAFE #endif /* Find best way to ROTL32 */ #if defined(_MSC_VER) #include /* Microsoft put _rotl declaration in here */ #define ROTL32(x, r) _rotl(x, r) #else /* gcc recognises this code and generates a rotate instruction for CPUs with one */ #define ROTL32(x, r) (((uint32_t)x << r) | ((uint32_t)x >> (32 - r))) #endif /*----------------------------------------------------------------------------- * Core murmurhash algorithm macros */ #define C1 (0xcc9e2d51) #define C2 (0x1b873593) /* This is the main processing body of the algorithm. It operates * on each full 32-bits of input. */ #define DOBLOCK(h1, k1) \ do { \ k1 *= C1; \ k1 = ROTL32(k1, 15); \ k1 *= C2; \ \ h1 ^= k1; \ h1 = ROTL32(h1, 13); \ h1 = h1 * 5 + 0xe6546b64; \ } while (0) /* Append unaligned bytes to carry, forcing hash churn if we have 4 bytes */ /* cnt=bytes to process, h1=name of h1 var, c=carry, n=bytes in c, * ptr/len=payload */ #define DOBYTES(cnt, h1, c, n, ptr, len) \ do { \ int _i = cnt; \ while (_i--) { \ c = c >> 8 | *ptr++ << 24; \ n++; \ len--; \ if (n == 4) { \ DOBLOCK(h1, c); \ n = 0; \ } \ } \ } while (0) /*---------------------------------------------------------------------------*/ /* Main hashing function. Initialise carry to 0 and h1 to 0 or an initial seed * if wanted. Both ph1 and pcarry are required arguments. */ void PMurHash32_Process(uint32_t *ph1, uint32_t *pcarry, const void *key, int len) { uint32_t h1 = *ph1; uint32_t c = *pcarry; const uint8_t *ptr = (uint8_t *)key; const uint8_t *end; /* Extract carry count from low 2 bits of c value */ int n = c & 3; #if defined(UNALIGNED_SAFE) /* This CPU handles unaligned word access */ /* Consume any carry bytes */ int i = (4 - n) & 3; if (i && i <= len) { DOBYTES(i, h1, c, n, ptr, len); } /* Process 32-bit chunks */ end = ptr + len / 4 * 4; for (; ptr < end; ptr += 4) { uint32_t k1 = READ_UINT32(ptr); DOBLOCK(h1, k1); } #else /*UNALIGNED_SAFE*/ /* This CPU does not handle unaligned word access */ /* Consume enough so that the next data byte is word aligned */ int i = -(long)ptr & 3; if (i && i <= len) { DOBYTES(i, h1, c, n, ptr, len); } /* We're now aligned. Process in aligned blocks. Specialise for each * possible carry count */ end = ptr + len / 4 * 4; switch (n) { /* how many bytes in c */ case 0: /* c=[----] w=[3210] b=[3210]=w c'=[----] */ for (; ptr < end; ptr += 4) { uint32_t k1 = READ_UINT32(ptr); DOBLOCK(h1, k1); } break; case 1: /* c=[0---] w=[4321] b=[3210]=c>>24|w<<8 c'=[4---] */ for (; ptr < end; ptr += 4) { uint32_t k1 = c >> 24; c = READ_UINT32(ptr); k1 |= c << 8; DOBLOCK(h1, k1); } break; case 2: /* c=[10--] w=[5432] b=[3210]=c>>16|w<<16 c'=[54--] */ for (; ptr < end; ptr += 4) { uint32_t k1 = c >> 16; c = READ_UINT32(ptr); k1 |= c << 16; DOBLOCK(h1, k1); } break; case 3: /* c=[210-] w=[6543] b=[3210]=c>>8|w<<24 c'=[654-] */ for (; ptr < end; ptr += 4) { uint32_t k1 = c >> 8; c = READ_UINT32(ptr); k1 |= c << 24; DOBLOCK(h1, k1); } } #endif /*UNALIGNED_SAFE*/ /* Advance over whole 32-bit chunks, possibly leaving 1..3 bytes */ len -= len / 4 * 4; /* Append any remaining bytes into carry */ DOBYTES(len, h1, c, n, ptr, len); /* Copy out new running hash and carry */ *ph1 = h1; *pcarry = (c & ~0xff) | n; } /*---------------------------------------------------------------------------*/ /* Finalize a hash. To match the original Murmur3A the total_length must be * provided */ uint32_t PMurHash32_Result(uint32_t h, uint32_t carry, uint32_t total_length) { uint32_t k1; int n = carry & 3; if (n) { k1 = carry >> (4 - n) * 8; k1 *= C1; k1 = ROTL32(k1, 15); k1 *= C2; h ^= k1; } h ^= total_length; /* fmix */ h ^= h >> 16; h *= 0x85ebca6b; h ^= h >> 13; h *= 0xc2b2ae35; h ^= h >> 16; return h; } /*---------------------------------------------------------------------------*/ /* Murmur3A compatable all-at-once */ uint32_t PMurHash32(uint32_t seed, const void *key, int len) { uint32_t h1 = seed, carry = 0; PMurHash32_Process(&h1, &carry, key, len); return PMurHash32_Result(h1, carry, len); } /*---------------------------------------------------------------------------*/ /* Provide an API suitable for smhasher */ void PMurHash32_test(const void *key, int len, uint32_t seed, void *out) { uint32_t h1 = seed, carry = 0; const uint8_t *ptr = (uint8_t *)key; const uint8_t *end = ptr + len; #if 0 /* Exercise the progressive processing */ while(ptr < end) { //const uint8_t *mid = ptr + rand()%(end-ptr)+1; const uint8_t *mid = ptr + (rand()&0xF); mid = mid= 199901L) #include #define MH_UINT32 uint32_t #endif /* Otherwise try testing against max value macros from limit.h */ #if !defined(MH_UINT32) #include #if (USHRT_MAX == 0xffffffffUL) #define MH_UINT32 unsigned short #elif (UINT_MAX == 0xffffffffUL) #define MH_UINT32 unsigned int #elif (ULONG_MAX == 0xffffffffUL) #define MH_UINT32 unsigned long #endif #endif #if !defined(MH_UINT32) #error Unable to determine type name for unsigned 32-bit int #endif /* I'm yet to work on a platform where 'unsigned char' is not 8 bits */ #define MH_UINT8 unsigned char /* ------------------------------------------------------------------------- */ /* Prototypes */ #ifdef __cplusplus extern "C" { #endif #pragma GCC visibility push(hidden) void PMurHash32_Process(MH_UINT32 *ph1, MH_UINT32 *pcarry, const void *key, int len); MH_UINT32 PMurHash32_Result(MH_UINT32 h1, MH_UINT32 carry, MH_UINT32 total_length); MH_UINT32 PMurHash32(MH_UINT32 seed, const void *key, int len); void PMurHash32_test(const void *key, int len, MH_UINT32 seed, void *out); #pragma GCC visibility pop #ifdef __cplusplus } #endif libmurmurhash-1.6/Readme.md000066400000000000000000000034741443143247000157730ustar00rootroot00000000000000# LibMurmurHash MurmurHash is one of the most commonly used non-cryptographic hash functions. This project bundles it into a shared library to avoid code duplication. Further it provides compatibility even for big-endian systems. ## Compiling and Installing libmurmurhash uses the autotools as the build system. When building from a release tarball they are optional as is the first step of the following. autoreconf -fi # optional ./configure make make check make install ## API LibMurmurHash provides the three variants of MurmurHash3 from the [original source](https://github.com/aappleby/smhasher). The first parameter specifies the beginning of the data to be hashed in memory. The second parameter gives the number of bytes to be hashed. The third parameter can be used to compute an alternative hash on the same data. Pass the location of the hash via the output parameter. ```C #include void lmmh_x86_32(const void *addr, unsigned int len, uint32_t seed, uint32_t out[1]); void lmmh_x86_128(const void *addr, unsigned int len, uint32_t seed, uint32_t out[4]); void lmmh_x64_128(const void *addr, unsigned int len, uint32_t seed, uint64_t out[2]); ``` For compatibility the original API is still supported but *deprecated*. It has two problems; The given length could be negative; The out parameter has some peculiar alignment requirements. ```C void MurmurHash3_x86_32(const void *data, int len, uint32_t seed, void *out); void MurmurHash3_x86_128(const void *data, int len, uint32_t seed, void *out); void MurmurHash3_x64_128(const void *data, int len, uint32_t seed, void *out); ``` ## License No license—all code is public domain. I appreciate credit, though. ## Contact Please file a bug on GitHub or send me a mail . libmurmurhash-1.6/configure.ac000066400000000000000000000017641443143247000165420ustar00rootroot00000000000000AC_INIT(libmurmurhash, 1.6, fabian-libmurmurhash@kloetzl.info) AC_CONFIG_SRCDIR([murmurhash.h]) AC_CONFIG_HEADERS([config.h]) AC_PREREQ(2.57) # Directory that contains install-sh and other auxiliary files AC_CONFIG_AUX_DIR([config]) AC_CONFIG_MACRO_DIR(config) AM_INIT_AUTOMAKE([1.6 foreign dist-zip tar-ustar filename-length-max=299]) LIB_VERSION=2:0:0 AC_SUBST([VERSION]) AC_SUBST([LIB_VERSION]) AC_SUBST([VERSION]) # Checks for programs. AC_PROG_CC AC_PROG_LN_S AC_PROG_INSTALL AC_PROG_LIBTOOL CPPFLAGS="-I\$(top_srcdir) $CPPFLAGS" # Checks for libraries. # Checks for header files. AC_HEADER_STDC AC_CHECK_HEADERS([fcntl.h inttypes.h limits.h stddef.h stdint.h stdlib.h string.h unistd.h]) # Checks for typedefs, structures, and compiler characteristics. AC_C_INLINE AC_TYPE_INT8_T AC_TYPE_SIZE_T AC_TYPE_UINT32_T AC_TYPE_UINT64_T AC_TYPE_UINT8_T # Checks for library functions. AC_FUNC_MMAP AC_CHECK_FUNCS([munmap]) AC_PROG_MAKE_SET AC_CONFIG_FILES([ Makefile libmurmurhash.pc ]) AC_OUTPUT libmurmurhash-1.6/libmurmurhash.pc.in000066400000000000000000000003551443143247000200620ustar00rootroot00000000000000prefix=@prefix@ exec_prefix=${prefix} libdir=${exec_prefix}/lib includedir=${prefix}/include Name: libmurmurhash Version: @VERSION@ Description: Portable MurmurHash Implementation Libs: -L${libdir} -lmurmurhash Cflags: -I${includedir} libmurmurhash-1.6/murmurhash.3000066400000000000000000000041661443143247000165320ustar00rootroot00000000000000.TH LIBMURMURHASH "3" "2019-02-04" "1" "libmurmurhash manual" .SH NAME libmurmurhash \- non-cryptographic hash functions .SH SYNOPSIS .nf .B #include .PP .BI "void lmmh_x86_32(const void *" addr ", unsigned int " len ", uint32_t " seed ", uint32_t " out "[1]); .BI "void lmmh_x86_128(const void *" addr ", unsigned int " len ", uint32_t " seed ", uint32_t " out "[4]); .BI "void lmmh_x64_128(const void *" addr ", unsigned int " len ", uint32_t " seed ", uint64_t " out "[2]); .SS Deprecated API .nf .BI "void MurmurHash3_x86_32(const void *" data ", int " len ", uint32_t " seed ", void *" out "); .BI "void MurmurHash3_x86_128(const void *" data ", int " len ", uint32_t " seed ", void *" out "); .BI "void MurmurHash3_x64_128(const void *" data ", int " len ", uint32_t " seed ", void *" out "); .fi .SH DESCRIPTION .TP LibMurmurHash provides the three variants of MurmurHash3 from the original source. The first parameter \fIaddr\fR specifies the beginning of the data to be hashed in memory. The second parameter \fIlen\fR gives the number of bytes to be hashed. The third parameter \fIseed\fR can be used to compute an alternative hash on the same data. Pass the location of the hash via the \fIout\fR parameter. .SH EXAMPLE .TP Compute the hash of \fIlength\fR bytes starting from \fIdata\fR and print the 32-bit sized hash value. .PP .in +4n .EX uint32_t hash; lmmh_x86_32(data, length, 0, &hash); printf("%" PRIx32 "", hash); .PP Do the same, but with a wider hash. .PP .in +4n .EX uint64_t widehash[2]; lmmh_x64_128(data, length, 0, widehash); printf("%" PRIx64 "%" PRIx64 "", widehash[0], widehash[1]); .SH AUTHORS MurmurHash was created by Austin Appleby. PMurHash was written by Shane Day. Fabian Klötzl created libmurmurhash. All code is dedicated to the public domain. .SH BUGS The old API is problematic in that a negative length remains unchecked and the output parameter has very specific alignment requirements. Furthermore, the order of bytes in the hash may differ across architectures. .SS Reporting Bugs Please file a bug on GitHub or send me a mail . .SS libmurmurhash-1.6/murmurhash.c000066400000000000000000000156121443143247000166100ustar00rootroot00000000000000//----------------------------------------------------------------------------- // libmurmurhash was written by Fabian Klötzl, and is placed in the public // domain. The author hereby disclaims copyright to this source code. #include "murmurhash.h" #include "PMurHash.h" #include #include #include #include void lmmh_x86_32(const void *addr, unsigned int len, MH_UINT32 seed, uint32_t out[1]) { if (len < (unsigned int)INT_MAX) { MH_UINT32 hash = PMurHash32(seed, addr, len); memcpy(out, &hash, sizeof(hash)); } else { uint32_t h1 = seed, carry = 0; PMurHash32_Process(&h1, &carry, addr, INT_MAX); PMurHash32_Process(&h1, &carry, addr + INT_MAX, len - INT_MAX); MH_UINT32 hash = PMurHash32_Result(h1, carry, len); memcpy(out, &hash, sizeof(hash)); } } #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ #define READ_UINT32(ptr) (*((uint32_t *)(ptr))) #define READ_UINT64(ptr) (*((uint64_t *)(ptr))) #elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ #define READ_UINT32(ptr) (__builtin_bswap32(*((uint32_t *)(ptr)))) #define READ_UINT64(ptr) (__builtin_bswap64(*((uint64_t *)(ptr)))) #else #error "weird byte order" #endif /* Please don't send me emails about how this is undefined behaviour. */ #define ROTL32(x, r) (((uint32_t)x << r) | ((uint32_t)x >> (32 - r))) #define ROTL64(x, r) (((uint64_t)x << r) | ((uint64_t)x >> (64 - r))) static uint32_t getblock32(const unsigned char *addr, int offset) { unsigned char data[sizeof(uint32_t)]; memcpy(&data, addr + offset * sizeof(uint32_t), sizeof(uint32_t)); return READ_UINT32(data); } static uint32_t fmix32(uint32_t h) { h ^= h >> 16; h *= 0x85ebca6b; h ^= h >> 13; h *= 0xc2b2ae35; h ^= h >> 16; return h; } void lmmh_x86_128(const void *addr, unsigned int len, uint32_t seed, uint32_t out[4]) { const uint8_t *data = (const uint8_t *)addr; const int nblocks = len / 16; uint32_t h1 = seed; uint32_t h2 = seed; uint32_t h3 = seed; uint32_t h4 = seed; const uint32_t c1 = 0x239b961b; const uint32_t c2 = 0xab0e9789; const uint32_t c3 = 0x38b34ae5; const uint32_t c4 = 0xa1e38b93; //---------- // body const uint8_t *ptr = data + nblocks * 16; for (int i = -nblocks; i; i++) { uint32_t k1 = getblock32(ptr, i * 4 + 0); uint32_t k2 = getblock32(ptr, i * 4 + 1); uint32_t k3 = getblock32(ptr, i * 4 + 2); uint32_t k4 = getblock32(ptr, i * 4 + 3); k1 *= c1; k1 = ROTL32(k1, 15); k1 *= c2; h1 ^= k1; h1 = ROTL32(h1, 19); h1 += h2; h1 = h1 * 5 + 0x561ccd1b; k2 *= c2; k2 = ROTL32(k2, 16); k2 *= c3; h2 ^= k2; h2 = ROTL32(h2, 17); h2 += h3; h2 = h2 * 5 + 0x0bcaa747; k3 *= c3; k3 = ROTL32(k3, 17); k3 *= c4; h3 ^= k3; h3 = ROTL32(h3, 15); h3 += h4; h3 = h3 * 5 + 0x96cd1c35; k4 *= c4; k4 = ROTL32(k4, 18); k4 *= c1; h4 ^= k4; h4 = ROTL32(h4, 13); h4 += h1; h4 = h4 * 5 + 0x32ac3b17; } //---------- // tail const uint8_t *tail = (const uint8_t *)(data + nblocks * 16); uint32_t k1 = 0; uint32_t k2 = 0; uint32_t k3 = 0; uint32_t k4 = 0; switch (len & 15) { case 15: k4 ^= tail[14] << 16; case 14: k4 ^= tail[13] << 8; case 13: k4 ^= tail[12] << 0; k4 *= c4; k4 = ROTL32(k4, 18); k4 *= c1; h4 ^= k4; case 12: k3 ^= tail[11] << 24; case 11: k3 ^= tail[10] << 16; case 10: k3 ^= tail[9] << 8; case 9: k3 ^= tail[8] << 0; k3 *= c3; k3 = ROTL32(k3, 17); k3 *= c4; h3 ^= k3; case 8: k2 ^= tail[7] << 24; case 7: k2 ^= tail[6] << 16; case 6: k2 ^= tail[5] << 8; case 5: k2 ^= tail[4] << 0; k2 *= c2; k2 = ROTL32(k2, 16); k2 *= c3; h2 ^= k2; case 4: k1 ^= tail[3] << 24; case 3: k1 ^= tail[2] << 16; case 2: k1 ^= tail[1] << 8; case 1: k1 ^= tail[0] << 0; k1 *= c1; k1 = ROTL32(k1, 15); k1 *= c2; h1 ^= k1; }; //---------- // finalization h1 ^= len; h2 ^= len; h3 ^= len; h4 ^= len; h1 += h2; h1 += h3; h1 += h4; h2 += h1; h3 += h1; h4 += h1; h1 = fmix32(h1); h2 = fmix32(h2); h3 = fmix32(h3); h4 = fmix32(h4); h1 += h2; h1 += h3; h1 += h4; h2 += h1; h3 += h1; h4 += h1; ((uint32_t *)out)[0] = h1; ((uint32_t *)out)[1] = h2; ((uint32_t *)out)[2] = h3; ((uint32_t *)out)[3] = h4; } static uint64_t getblock64(const unsigned char *addr, int offset) { unsigned char data[sizeof(uint64_t)]; memcpy(&data, addr + offset * sizeof(uint64_t), sizeof(uint64_t)); return READ_UINT64(data); } #define BIG_CONSTANT(x) (x##LLU) static uint64_t fmix64(uint64_t k) { k ^= k >> 33; k *= BIG_CONSTANT(0xff51afd7ed558ccd); k ^= k >> 33; k *= BIG_CONSTANT(0xc4ceb9fe1a85ec53); k ^= k >> 33; return k; } void lmmh_x64_128(const void *addr, unsigned int len, uint32_t seed, uint64_t out[2]) { const uint8_t *data = (const uint8_t *)addr; const int nblocks = len / 16; uint64_t h1 = seed; uint64_t h2 = seed; const uint64_t c1 = BIG_CONSTANT(0x87c37b91114253d5); const uint64_t c2 = BIG_CONSTANT(0x4cf5ad432745937f); //---------- // body for (int i = 0; i < nblocks; i++) { uint64_t k1 = getblock64(data, i * 2 + 0); uint64_t k2 = getblock64(data, i * 2 + 1); k1 *= c1; k1 = ROTL64(k1, 31); k1 *= c2; h1 ^= k1; h1 = ROTL64(h1, 27); h1 += h2; h1 = h1 * 5 + 0x52dce729; k2 *= c2; k2 = ROTL64(k2, 33); k2 *= c1; h2 ^= k2; h2 = ROTL64(h2, 31); h2 += h1; h2 = h2 * 5 + 0x38495ab5; } //---------- // tail const uint8_t *tail = (const uint8_t *)(data + nblocks * 16); uint64_t k1 = 0; uint64_t k2 = 0; switch (len & 15) { case 15: k2 ^= ((uint64_t)tail[14]) << 48; case 14: k2 ^= ((uint64_t)tail[13]) << 40; case 13: k2 ^= ((uint64_t)tail[12]) << 32; case 12: k2 ^= ((uint64_t)tail[11]) << 24; case 11: k2 ^= ((uint64_t)tail[10]) << 16; case 10: k2 ^= ((uint64_t)tail[9]) << 8; case 9: k2 ^= ((uint64_t)tail[8]) << 0; k2 *= c2; k2 = ROTL64(k2, 33); k2 *= c1; h2 ^= k2; case 8: k1 ^= ((uint64_t)tail[7]) << 56; case 7: k1 ^= ((uint64_t)tail[6]) << 48; case 6: k1 ^= ((uint64_t)tail[5]) << 40; case 5: k1 ^= ((uint64_t)tail[4]) << 32; case 4: k1 ^= ((uint64_t)tail[3]) << 24; case 3: k1 ^= ((uint64_t)tail[2]) << 16; case 2: k1 ^= ((uint64_t)tail[1]) << 8; case 1: k1 ^= ((uint64_t)tail[0]) << 0; k1 *= c1; k1 = ROTL64(k1, 31); k1 *= c2; h1 ^= k1; }; //---------- // finalization h1 ^= len; h2 ^= len; h1 += h2; h2 += h1; h1 = fmix64(h1); h2 = fmix64(h2); h1 += h2; h2 += h1; ((uint64_t *)out)[0] = h1; ((uint64_t *)out)[1] = h2; } void MurmurHash3_x86_32(const void *addr, int len, uint32_t seed, void *out) { uint32_t buffer; lmmh_x86_32(addr, len, seed, &buffer); memcpy(out, &buffer, sizeof(buffer)); } void MurmurHash3_x86_128(const void *addr, int len, uint32_t seed, void *out) { uint32_t buffer[4]; lmmh_x86_128(addr, len, seed, buffer); memcpy(out, buffer, sizeof(buffer)); } void MurmurHash3_x64_128(const void *addr, int len, uint32_t seed, void *out) { uint64_t buffer[2]; lmmh_x64_128(addr, len, seed, buffer); memcpy(out, buffer, sizeof(buffer)); } libmurmurhash-1.6/murmurhash.h000066400000000000000000000026261443143247000166160ustar00rootroot00000000000000//----------------------------------------------------------------------------- // libmurmurhash was written by Fabian Klötzl, and is placed in the public // domain. The author hereby disclaims copyright to this source code. #pragma once #ifdef __cplusplus extern "C" { #endif #include /** @brief Compute a non-cryptographic hash value. * * Consume the seed and a number of bytes starting from the pointer. Chews on * the data and produces a hash value. Do not use for cryptography. * * @param addr - Pointer to data. * @param len - Length of data. * @param seed - Influence the resulting hash value. * @param out - Output parameter. */ extern void lmmh_x86_32(const void *addr, unsigned int len, uint32_t seed, uint32_t out[1]); extern void lmmh_x86_128(const void *addr, unsigned int len, uint32_t seed, uint32_t out[4]); extern void lmmh_x64_128(const void *addr, unsigned int len, uint32_t seed, uint64_t out[2]); // The original function names still work but are deprecated in favor of the // lmmh_* interface extern __attribute__((deprecated)) void MurmurHash3_x86_32(const void *data, int len, uint32_t seed, void *out); extern __attribute__((deprecated)) void MurmurHash3_x86_128(const void *data, int len, uint32_t seed, void *out); extern __attribute__((deprecated)) void MurmurHash3_x64_128(const void *data, int len, uint32_t seed, void *out); #ifdef __cplusplus } #endif libmurmurhash-1.6/test/000077500000000000000000000000001443143247000152235ustar00rootroot00000000000000libmurmurhash-1.6/test/MurmurHash3.c000066400000000000000000000171471443143247000175570ustar00rootroot00000000000000//----------------------------------------------------------------------------- // MurmurHash3 was written by Austin Appleby, and is placed in the public // domain. The author hereby disclaims copyright to this source code. // Note - The x86 and x64 versions do _not_ produce the same results, as the // algorithms are optimized for their respective platforms. You can still // compile and run any of them on any platform, but your performance with the // non-native version will be less than optimal. #include "MurmurHash3.h" //----------------------------------------------------------------------------- // Platform-specific functions and macros // Microsoft Visual Studio #if defined(_MSC_VER) #define FORCE_INLINE __forceinline #include #define ROTL32(x, y) _rotl(x, y) #define ROTL64(x, y) _rotl64(x, y) #define BIG_CONSTANT(x) (x) // Other compilers #else // defined(_MSC_VER) #define FORCE_INLINE inline __attribute__((always_inline)) inline uint32_t rotl32(uint32_t x, int8_t r) { return (x << r) | (x >> (32 - r)); } inline uint64_t rotl64(uint64_t x, int8_t r) { return (x << r) | (x >> (64 - r)); } #define ROTL32(x, y) rotl32(x, y) #define ROTL64(x, y) rotl64(x, y) #define BIG_CONSTANT(x) (x##LLU) #endif // !defined(_MSC_VER) //----------------------------------------------------------------------------- // Block read - if your platform needs to do endian-swapping or can only // handle aligned reads, do the conversion here FORCE_INLINE uint32_t getblock32(const uint32_t *p, int i) { return p[i]; } FORCE_INLINE uint64_t getblock64(const uint64_t *p, int i) { return p[i]; } //----------------------------------------------------------------------------- // Finalization mix - force all bits of a hash block to avalanche FORCE_INLINE uint32_t fmix32(uint32_t h) { h ^= h >> 16; h *= 0x85ebca6b; h ^= h >> 13; h *= 0xc2b2ae35; h ^= h >> 16; return h; } //---------- FORCE_INLINE uint64_t fmix64(uint64_t k) { k ^= k >> 33; k *= BIG_CONSTANT(0xff51afd7ed558ccd); k ^= k >> 33; k *= BIG_CONSTANT(0xc4ceb9fe1a85ec53); k ^= k >> 33; return k; } //----------------------------------------------------------------------------- void MurmurHash3_x86_32(const void *key, int len, uint32_t seed, void *out) { const uint8_t *data = (const uint8_t *)key; const int nblocks = len / 4; uint32_t h1 = seed; const uint32_t c1 = 0xcc9e2d51; const uint32_t c2 = 0x1b873593; //---------- // body const uint32_t *blocks = (const uint32_t *)(data + nblocks * 4); for (int i = -nblocks; i; i++) { uint32_t k1 = getblock32(blocks, i); k1 *= c1; k1 = ROTL32(k1, 15); k1 *= c2; h1 ^= k1; h1 = ROTL32(h1, 13); h1 = h1 * 5 + 0xe6546b64; } //---------- // tail const uint8_t *tail = (const uint8_t *)(data + nblocks * 4); uint32_t k1 = 0; switch (len & 3) { case 3: k1 ^= tail[2] << 16; case 2: k1 ^= tail[1] << 8; case 1: k1 ^= tail[0]; k1 *= c1; k1 = ROTL32(k1, 15); k1 *= c2; h1 ^= k1; }; //---------- // finalization h1 ^= len; h1 = fmix32(h1); *(uint32_t *)out = h1; } //----------------------------------------------------------------------------- void MurmurHash3_x86_128(const void *key, const int len, uint32_t seed, void *out) { const uint8_t *data = (const uint8_t *)key; const int nblocks = len / 16; uint32_t h1 = seed; uint32_t h2 = seed; uint32_t h3 = seed; uint32_t h4 = seed; const uint32_t c1 = 0x239b961b; const uint32_t c2 = 0xab0e9789; const uint32_t c3 = 0x38b34ae5; const uint32_t c4 = 0xa1e38b93; //---------- // body const uint32_t *blocks = (const uint32_t *)(data + nblocks * 16); for (int i = -nblocks; i; i++) { uint32_t k1 = getblock32(blocks, i * 4 + 0); uint32_t k2 = getblock32(blocks, i * 4 + 1); uint32_t k3 = getblock32(blocks, i * 4 + 2); uint32_t k4 = getblock32(blocks, i * 4 + 3); k1 *= c1; k1 = ROTL32(k1, 15); k1 *= c2; h1 ^= k1; h1 = ROTL32(h1, 19); h1 += h2; h1 = h1 * 5 + 0x561ccd1b; k2 *= c2; k2 = ROTL32(k2, 16); k2 *= c3; h2 ^= k2; h2 = ROTL32(h2, 17); h2 += h3; h2 = h2 * 5 + 0x0bcaa747; k3 *= c3; k3 = ROTL32(k3, 17); k3 *= c4; h3 ^= k3; h3 = ROTL32(h3, 15); h3 += h4; h3 = h3 * 5 + 0x96cd1c35; k4 *= c4; k4 = ROTL32(k4, 18); k4 *= c1; h4 ^= k4; h4 = ROTL32(h4, 13); h4 += h1; h4 = h4 * 5 + 0x32ac3b17; } //---------- // tail const uint8_t *tail = (const uint8_t *)(data + nblocks * 16); uint32_t k1 = 0; uint32_t k2 = 0; uint32_t k3 = 0; uint32_t k4 = 0; switch (len & 15) { case 15: k4 ^= tail[14] << 16; case 14: k4 ^= tail[13] << 8; case 13: k4 ^= tail[12] << 0; k4 *= c4; k4 = ROTL32(k4, 18); k4 *= c1; h4 ^= k4; case 12: k3 ^= tail[11] << 24; case 11: k3 ^= tail[10] << 16; case 10: k3 ^= tail[9] << 8; case 9: k3 ^= tail[8] << 0; k3 *= c3; k3 = ROTL32(k3, 17); k3 *= c4; h3 ^= k3; case 8: k2 ^= tail[7] << 24; case 7: k2 ^= tail[6] << 16; case 6: k2 ^= tail[5] << 8; case 5: k2 ^= tail[4] << 0; k2 *= c2; k2 = ROTL32(k2, 16); k2 *= c3; h2 ^= k2; case 4: k1 ^= tail[3] << 24; case 3: k1 ^= tail[2] << 16; case 2: k1 ^= tail[1] << 8; case 1: k1 ^= tail[0] << 0; k1 *= c1; k1 = ROTL32(k1, 15); k1 *= c2; h1 ^= k1; }; //---------- // finalization h1 ^= len; h2 ^= len; h3 ^= len; h4 ^= len; h1 += h2; h1 += h3; h1 += h4; h2 += h1; h3 += h1; h4 += h1; h1 = fmix32(h1); h2 = fmix32(h2); h3 = fmix32(h3); h4 = fmix32(h4); h1 += h2; h1 += h3; h1 += h4; h2 += h1; h3 += h1; h4 += h1; ((uint32_t *)out)[0] = h1; ((uint32_t *)out)[1] = h2; ((uint32_t *)out)[2] = h3; ((uint32_t *)out)[3] = h4; } //----------------------------------------------------------------------------- void MurmurHash3_x64_128(const void *key, const int len, const uint32_t seed, void *out) { const uint8_t *data = (const uint8_t *)key; const int nblocks = len / 16; uint64_t h1 = seed; uint64_t h2 = seed; const uint64_t c1 = BIG_CONSTANT(0x87c37b91114253d5); const uint64_t c2 = BIG_CONSTANT(0x4cf5ad432745937f); //---------- // body const uint64_t *blocks = (const uint64_t *)(data); for (int i = 0; i < nblocks; i++) { uint64_t k1 = getblock64(blocks, i * 2 + 0); uint64_t k2 = getblock64(blocks, i * 2 + 1); k1 *= c1; k1 = ROTL64(k1, 31); k1 *= c2; h1 ^= k1; h1 = ROTL64(h1, 27); h1 += h2; h1 = h1 * 5 + 0x52dce729; k2 *= c2; k2 = ROTL64(k2, 33); k2 *= c1; h2 ^= k2; h2 = ROTL64(h2, 31); h2 += h1; h2 = h2 * 5 + 0x38495ab5; } //---------- // tail const uint8_t *tail = (const uint8_t *)(data + nblocks * 16); uint64_t k1 = 0; uint64_t k2 = 0; switch (len & 15) { case 15: k2 ^= ((uint64_t)tail[14]) << 48; case 14: k2 ^= ((uint64_t)tail[13]) << 40; case 13: k2 ^= ((uint64_t)tail[12]) << 32; case 12: k2 ^= ((uint64_t)tail[11]) << 24; case 11: k2 ^= ((uint64_t)tail[10]) << 16; case 10: k2 ^= ((uint64_t)tail[9]) << 8; case 9: k2 ^= ((uint64_t)tail[8]) << 0; k2 *= c2; k2 = ROTL64(k2, 33); k2 *= c1; h2 ^= k2; case 8: k1 ^= ((uint64_t)tail[7]) << 56; case 7: k1 ^= ((uint64_t)tail[6]) << 48; case 6: k1 ^= ((uint64_t)tail[5]) << 40; case 5: k1 ^= ((uint64_t)tail[4]) << 32; case 4: k1 ^= ((uint64_t)tail[3]) << 24; case 3: k1 ^= ((uint64_t)tail[2]) << 16; case 2: k1 ^= ((uint64_t)tail[1]) << 8; case 1: k1 ^= ((uint64_t)tail[0]) << 0; k1 *= c1; k1 = ROTL64(k1, 31); k1 *= c2; h1 ^= k1; }; //---------- // finalization h1 ^= len; h2 ^= len; h1 += h2; h2 += h1; h1 = fmix64(h1); h2 = fmix64(h2); h1 += h2; h2 += h1; ((uint64_t *)out)[0] = h1; ((uint64_t *)out)[1] = h2; } //----------------------------------------------------------------------------- libmurmurhash-1.6/test/MurmurHash3.h000066400000000000000000000021021443143247000175450ustar00rootroot00000000000000//----------------------------------------------------------------------------- // MurmurHash3 was written by Austin Appleby, and is placed in the public // domain. The author hereby disclaims copyright to this source code. #ifndef _MURMURHASH3_H_ #define _MURMURHASH3_H_ //----------------------------------------------------------------------------- // Platform-specific functions and macros // Microsoft Visual Studio #if defined(_MSC_VER) && (_MSC_VER < 1600) typedef unsigned char uint8_t; typedef unsigned int uint32_t; typedef unsigned __int64 uint64_t; // Other compilers #else // defined(_MSC_VER) #include #endif // !defined(_MSC_VER) //----------------------------------------------------------------------------- void MurmurHash3_x86_32(const void *key, int len, uint32_t seed, void *out); void MurmurHash3_x86_128(const void *key, int len, uint32_t seed, void *out); void MurmurHash3_x64_128(const void *key, int len, uint32_t seed, void *out); //----------------------------------------------------------------------------- #endif // _MURMURHASH3_H_ libmurmurhash-1.6/test/almostempty000066400000000000000000000000601443143247000175200ustar00rootroot00000000000000This file is intentionally left (almost) empty. libmurmurhash-1.6/test/almostempty.hash000066400000000000000000000001151443143247000204430ustar00rootroot000000000000007ebb2534 608fc0779aa2929ada51aa1dee3e1e4f a6618a2a2c5e59b8287a2ef325a5a696 libmurmurhash-1.6/test/mmh.c000066400000000000000000000036441443143247000161570ustar00rootroot00000000000000//----------------------------------------------------------------------------- // libmurmurhash was written by Fabian Klötzl, and is placed in the public // domain. The author hereby disclaims copyright to this source code. #include #include #include #include #include #include #include #include #include #include #include #include #include "murmurhash.h" __attribute__((noreturn)) void usage(int exit_code); int main(int argc, char *argv[]) { const char *file_name = NULL; int opt = 0; while ((opt = getopt(argc, argv, "h")) != -1) { if (opt == 'h') { usage(EXIT_SUCCESS); } if (opt == '?') { usage(EXIT_FAILURE); } } argc -= optind, argv += optind; if (argc == 0) { usage(EXIT_FAILURE); } file_name = argv[0]; int fd = open(file_name, O_RDONLY); if (fd == -1) { err(errno, "%s: failed to open file", file_name); } struct stat sb; if (fstat(fd, &sb) == -1) { err(errno, "%s: failed to get stats", file_name); } size_t length = sb.st_size; void *data = mmap(NULL, length, PROT_READ, MAP_SHARED, fd, 0); if (data == (void *)-1) { err(errno, "%s: failed to map file", file_name); } // new interface uint32_t hash4; lmmh_x86_32(data, length, 0, &hash4); printf("%" PRIx32 "", hash4); uint32_t hash5[4]; lmmh_x86_128(data, length, 0, hash5); printf(" "); for (int i = 0; i < 4; i++) { printf("%" PRIx32 "", hash5[i]); } uint64_t hash6[2]; lmmh_x64_128(data, length, 0, hash6); printf(" "); for (int i = 0; i < 2; i++) { printf("%" PRIx64 "", hash6[i]); } printf("\n"); munmap(data, length); close(fd); return 0; } void usage(int exit_code) { static const char str[] = { "mmh FILE\n" "Compute the murmurhash of a file.\n\n" " -h Print help\n" // }; fprintf(exit_code == EXIT_SUCCESS ? stdout : stderr, str); exit(exit_code); } libmurmurhash-1.6/test/mmh_old.c000066400000000000000000000036701443143247000170140ustar00rootroot00000000000000//----------------------------------------------------------------------------- // libmurmurhash was written by Fabian Klötzl, and is placed in the public // domain. The author hereby disclaims copyright to this source code. #include #include #include #include #include #include #include #include #include #include #include #include #include "murmurhash.h" __attribute__((noreturn)) void usage(int exit_code); int main(int argc, char *argv[]) { const char *file_name = NULL; int opt = 0; while ((opt = getopt(argc, argv, "h")) != -1) { if (opt == 'h') { usage(EXIT_SUCCESS); } if (opt == '?') { usage(EXIT_FAILURE); } } argc -= optind, argv += optind; if (argc == 0) { usage(EXIT_FAILURE); } file_name = argv[0]; int fd = open(file_name, O_RDONLY); if (fd == -1) { err(errno, "%s: failed to open file", file_name); } struct stat sb; if (fstat(fd, &sb) == -1) { err(errno, "%s: failed to get stats", file_name); } size_t length = sb.st_size; void *data = mmap(NULL, length, PROT_READ, MAP_SHARED, fd, 0); if (data == (void *)-1) { err(errno, "%s: failed to map file", file_name); } // old interface uint32_t hash; MurmurHash3_x86_32(data, length, 0, &hash); printf("%" PRIx32 "", hash); uint32_t hash2[4]; MurmurHash3_x86_128(data, length, 0, &hash2); printf(" "); for (int i = 0; i < 4; i++) { printf("%" PRIx32 "", hash2[i]); } uint64_t hash3[2]; MurmurHash3_x64_128(data, length, 0, &hash3); printf(" "); for (int i = 0; i < 2; i++) { printf("%" PRIx64 "", hash3[i]); } printf("\n"); munmap(data, length); close(fd); return 0; } void usage(int exit_code) { static const char str[] = { "mmh FILE\n" "Compute the murmurhash of a file.\n\n" " -h Print help\n" // }; fprintf(exit_code == EXIT_SUCCESS ? stdout : stderr, str); exit(exit_code); }