pax_global_header00006660000000000000000000000064131216317750014520gustar00rootroot0000000000000052 comment=6dfc9ed82b188e9ba9b6d8a8bef21fef46416e11 hat-trie-0.1.2/000077500000000000000000000000001312163177500132355ustar00rootroot00000000000000hat-trie-0.1.2/.gitignore000066400000000000000000000002711312163177500152250ustar00rootroot00000000000000*.la *.lo *.o *~ .DS_Store .deps .libs Makefile Makefile.in aclocal.m4 autom4te.cache config.* configure depcomp hat-trie-*.pc hat-trie-*.tar.gz install-sh libtool ltmain.sh m4 missing hat-trie-0.1.2/.travis.yml000066400000000000000000000001631312163177500153460ustar00rootroot00000000000000language: c compiler: - clang - gcc before_script: autoreconf -i script: ./configure && make && make check hat-trie-0.1.2/COPYING000066400000000000000000000020651312163177500142730ustar00rootroot00000000000000Copyright (C) 2011 by Daniel C. Jones Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN hat-trie-0.1.2/Makefile.am000066400000000000000000000002211312163177500152640ustar00rootroot00000000000000 SUBDIRS = src test EXTRA_DIST = README.md COPYING pkgconfigdir = $(libdir)/pkgconfig pkgconfig_DATA = hat-trie-0.1.pc ACLOCAL_AMFLAGS=-I m4 hat-trie-0.1.2/README.md000066400000000000000000000024721312163177500145210ustar00rootroot00000000000000 Hat-Trie ======== [![Build Status](https://travis-ci.org/dcjones/hat-trie.svg)](https://travis-ci.org/dcjones/hat-trie) This a ANSI C99 implementation of the HAT-trie data structure of Askitis and Sinha, an extremely efficient (space and time) modern variant of tries. The version implemented here maps arrays of bytes to words (i.e., unsigned longs), which can be used to store counts, pointers, etc, or not used at all if you simply want to maintain a set of unique strings. For details see, 1. Askitis, N., & Sinha, R. (2007). HAT-trie: a cache-conscious trie-based data structure for strings. Proceedings of the thirtieth Australasian conference on Computer science-Volume 62 (pp. 97–105). Australian Computer Society, Inc. 2. Askitis, N., & Zobel, J. (2005). Cache-conscious collision resolution in string hash tables. String Processing and Information Retrieval (pp. 91–102). Springer. Installation ------------ git clone git@github.com:dcjones/hat-trie.git cd hat-trie autoreconf -i ./configure make install To use the library, include `hat-trie.h` and link using `-lhat-trie`. Tests ----- Build and run the tests: make check Other Language Bindings ----------------------- * Ruby - https://github.com/luikore/triez * Python - https://github.com/kmike/hat-trie hat-trie-0.1.2/TODO000066400000000000000000000000731312163177500137250ustar00rootroot00000000000000 todo: * Deletion in ahtable. * Deletion in hattrie. hat-trie-0.1.2/configure.ac000066400000000000000000000015361312163177500155300ustar00rootroot00000000000000 AC_INIT([hat-trie], [0.1.2], [dcjones@cs.washington.edu]) AM_INIT_AUTOMAKE([foreign]) m4_ifdef([AM_SILENT_RULES],[AM_SILENT_RULES([yes])]) AC_CONFIG_MACRO_DIR([m4]) base_CFLAGS="-std=gnu99 -Wall -Wextra -pedantic" opt_CFLAGS="${base_CFLAGS} -O3" dbg_CFLAGS="${base_CFLAGS} -g -O0" AC_ARG_ENABLE([debugging], [AS_HELP_STRING([--enable-debugging], [enable debugging info (default is no)])], [], [enable_debugging=no]) AS_IF([test "x$enable_debugging" = xyes], [CFLAGS="$dbg_CFLAGS"], [CFLAGS="$opt_CFLAGS"]) AC_PROG_CC AC_PROG_CPP AC_PROG_INSTALL AC_PROG_LN_S AC_PROG_MAKE_SET AC_DISABLE_SHARED AC_PROG_LIBTOOL AC_C_BIGENDIAN([AC_MSG_ERROR([Big-endian systems are not currently supported.])]) AC_HEADER_STDBOOL AC_CONFIG_FILES([hat-trie-0.1.pc Makefile src/Makefile test/Makefile]) AC_OUTPUT hat-trie-0.1.2/hat-trie-0.1.pc.in000066400000000000000000000003351312163177500162000ustar00rootroot00000000000000 prefix=@prefix@ exec_prefix=@exec_prefix@ libdir=@libdir@ includedir=@includedir@ Name: @PACKAGE_NAME@ Description: An efficient trie implementation. Version: @PACKAGE_VERSION@ Cflags: -I{includedir} Libs: -L${libdir} hat-trie-0.1.2/m4/000077500000000000000000000000001312163177500135555ustar00rootroot00000000000000hat-trie-0.1.2/m4/.gitignore000066400000000000000000000000001312163177500155330ustar00rootroot00000000000000hat-trie-0.1.2/src/000077500000000000000000000000001312163177500140245ustar00rootroot00000000000000hat-trie-0.1.2/src/Makefile.am000066400000000000000000000005601312163177500160610ustar00rootroot00000000000000 lib_LTLIBRARIES = libhat-trie.la libhat_trie_la_SOURCES = common.h \ ahtable.h ahtable.c \ hat-trie.h hat-trie.c \ misc.h misc.c \ murmurhash3.h murmurhash3.c pkginclude_HEADERS = hat-trie.h ahtable.h common.h pstdint.h portable_endian.h hat-trie-0.1.2/src/ahtable.c000066400000000000000000000372021312163177500155740ustar00rootroot00000000000000/* * This file is part of hat-trie. * * Copyright (c) 2011 by Daniel C. Jones * * See ahtable.h for description of the Array Hash Table. * */ #include "ahtable.h" #include "misc.h" #include "murmurhash3.h" #include "portable_endian.h" #include #include const double ahtable_max_load_factor = 100000.0; /* arbitrary large number => don't resize */ const size_t ahtable_initial_size = 4096; static size_t keylen(slot_t s) { if (0x1 & *s) { return (size_t) (*((uint16_t*) s) >> 1); } else { return (size_t) (*s >> 1); } } ahtable_t* ahtable_create() { return ahtable_create_n(ahtable_initial_size); } ahtable_t* ahtable_create_n(size_t n) { ahtable_t* table = malloc_or_die(sizeof(ahtable_t)); table->flag = 0; table->c0 = table->c1 = '\0'; table->n = n; table->m = 0; table->max_m = (size_t) (ahtable_max_load_factor * (double) table->n); table->slots = malloc_or_die(n * sizeof(slot_t)); memset(table->slots, 0, n * sizeof(slot_t)); table->slot_sizes = malloc_or_die(n * sizeof(size_t)); memset(table->slot_sizes, 0, n * sizeof(size_t)); return table; } void ahtable_save(const ahtable_t* table, FILE* fd) { if (table == NULL) return; /* Store table metadata as 64-bit network-ordered (big-endian) values so * that architectures with larger capacity can take advantage of size. */ uint64_t n = htobe64(table->n); fwrite(&n, sizeof(uint64_t), 1, fd); uint64_t m = htobe64(table->m); fwrite(&m, sizeof(uint64_t), 1, fd); uint64_t max_m = htobe64(table->max_m); fwrite(&max_m, sizeof(uint64_t), 1, fd); fwrite(&table->flag, sizeof(uint8_t), 1, fd); fwrite(&table->c0, sizeof(unsigned char), 1, fd); fwrite(&table->c1, sizeof(unsigned char), 1, fd); size_t i; uint32_t slot_size; for (i = 0; i < table->n; ++i) { slot_size = htobe32(table->slot_sizes[i]); fwrite(&slot_size, sizeof(uint32_t), 1, fd); if(table->slot_sizes[i] > 0) { fwrite(table->slots[i], sizeof(unsigned char), table->slot_sizes[i], fd); } } } /* Loads a 64-bit value from disk and casts it into a size_t, which may or may * not be 64-bit. As long as the loaded value fits inside size_t, we're good. * Returns 0 if the value didn't fit, 1 otherwise. */ static uint8_t read_u64bit_to_size_t(size_t* dest, FILE* fd) { uint64_t value; fread(&value, sizeof(uint64_t), 1, fd); value = be64toh(value); if (value > (size_t)-1) { printf("Unable to load 64-bit data from file\n"); return 0; } else { *dest = (size_t)value; return 1; } } ahtable_t* ahtable_load(FILE* fd) { size_t n; if (!read_u64bit_to_size_t(&n, fd)) return NULL; ahtable_t* table = ahtable_create_n(n); if (!read_u64bit_to_size_t(&table->m, fd)) return NULL; if (!read_u64bit_to_size_t(&table->max_m, fd)) return NULL; fread(&table->flag, sizeof(uint8_t), 1, fd); fread(&table->c0, sizeof(unsigned char), 1, fd); fread(&table->c1, sizeof(unsigned char), 1, fd); size_t i; uint32_t slot_size; for (i = 0; i < table->n; ++i) { fread(&slot_size, sizeof(uint32_t), 1, fd); table->slot_sizes[i] = be32toh(slot_size); if(table->slot_sizes[i] > 0) { table->slots[i] = malloc_or_die(table->slot_sizes[i]); fread(table->slots[i], sizeof(unsigned char), table->slot_sizes[i], fd); } } return table; } void ahtable_free(ahtable_t* table) { if (table == NULL) return; size_t i; for (i = 0; i < table->n; ++i) free(table->slots[i]); free(table->slots); free(table->slot_sizes); free(table); } size_t ahtable_size(const ahtable_t* table) { return table->m; } size_t ahtable_sizeof(const ahtable_t* table) { size_t nbytes = sizeof(ahtable_t) + table->n * (sizeof(size_t) + sizeof(slot_t)); size_t i; for (i = 0; i < table->n; ++i) { nbytes += table->slot_sizes[i]; } return nbytes; } void ahtable_clear(ahtable_t* table) { size_t i; for (i = 0; i < table->n; ++i) free(table->slots[i]); table->n = ahtable_initial_size; table->slots = realloc_or_die(table->slots, table->n * sizeof(slot_t)); memset(table->slots, 0, table->n * sizeof(slot_t)); table->slot_sizes = realloc_or_die(table->slot_sizes, table->n * sizeof(size_t)); memset(table->slot_sizes, 0, table->n * sizeof(size_t)); } /** Inserts a key with value into slot s, and returns a pointer to the * space immediately after. */ static slot_t ins_key(slot_t s, const char* key, size_t len, value_t** val) { // key length if (len < 128) { s[0] = (unsigned char) (len << 1); s += 1; } else { /* The least significant bit is set to indicate that two bytes are * being used to store the key length. */ *((uint16_t*) s) = ((uint16_t) len << 1) | 0x1; s += 2; } // key memcpy(s, key, len * sizeof(unsigned char)); s += len; // value *val = (value_t*) s; **val = 0; s += sizeof(value_t); return s; } static void ahtable_expand(ahtable_t* table) { /* Resizing a table is essentially building a brand new one. * One little shortcut we can take on the memory allocation front is to * figure out how much memory each slot needs in advance. */ assert(table->n > 0); size_t new_n = 2 * table->n; size_t* slot_sizes = malloc_or_die(new_n * sizeof(size_t)); memset(slot_sizes, 0, new_n * sizeof(size_t)); const char* key; size_t len = 0; size_t m = 0; ahtable_iter_t* i = ahtable_iter_begin(table, false); while (!ahtable_iter_finished(i)) { key = ahtable_iter_key(i, &len); slot_sizes[hash(key, len) % new_n] += len + sizeof(value_t) + (len >= 128 ? 2 : 1); ++m; ahtable_iter_next(i); } assert(m == table->m); ahtable_iter_free(i); /* allocate slots */ slot_t* slots = malloc_or_die(new_n * sizeof(slot_t)); size_t j; for (j = 0; j < new_n; ++j) { if (slot_sizes[j] > 0) { slots[j] = malloc_or_die(slot_sizes[j]); } else slots[j] = NULL; } /* rehash values. A few shortcuts can be taken here as well, as we know * there will be no collisions. Instead of the regular insertion routine, * we keep track of the ends of every slot and simply insert keys. * */ slot_t* slots_next = malloc_or_die(new_n * sizeof(slot_t)); memcpy(slots_next, slots, new_n * sizeof(slot_t)); size_t h; m = 0; value_t* u; value_t* v; i = ahtable_iter_begin(table, false); while (!ahtable_iter_finished(i)) { key = ahtable_iter_key(i, &len); h = hash(key, len) % new_n; slots_next[h] = ins_key(slots_next[h], key, len, &u); v = ahtable_iter_val(i); *u = *v; ++m; ahtable_iter_next(i); } assert(m == table->m); ahtable_iter_free(i); free(slots_next); for (j = 0; j < table->n; ++j) free(table->slots[j]); free(table->slots); table->slots = slots; free(table->slot_sizes); table->slot_sizes = slot_sizes; table->n = new_n; table->max_m = (size_t) (ahtable_max_load_factor * (double) table->n); } static value_t* get_key(ahtable_t* table, const char* key, size_t len, bool insert_missing) { /* if we are at capacity, preemptively resize */ if (insert_missing && table->m >= table->max_m) { ahtable_expand(table); } uint32_t i = hash(key, len) % table->n; size_t k; slot_t s; value_t* val; /* search the array for our key */ s = table->slots[i]; while ((size_t) (s - table->slots[i]) < table->slot_sizes[i]) { /* get the key length */ k = keylen(s); s += k < 128 ? 1 : 2; /* skip keys that are longer than ours */ if (k != len) { s += k + sizeof(value_t); continue; } /* key found. */ if (memcmp(s, key, len) == 0) { return (value_t*) (s + len); } /* key not found. */ else { s += k + sizeof(value_t); continue; } } if (insert_missing) { /* the key was not found, so we must insert it. */ size_t new_size = table->slot_sizes[i]; new_size += 1 + (len >= 128 ? 1 : 0); // key length new_size += len * sizeof(unsigned char); // key new_size += sizeof(value_t); // value table->slots[i] = realloc_or_die(table->slots[i], new_size); ++table->m; ins_key(table->slots[i] + table->slot_sizes[i], key, len, &val); table->slot_sizes[i] = new_size; return val; } else return NULL; } value_t* ahtable_get(ahtable_t* table, const char* key, size_t len) { if (len > 32767) { fprintf(stderr, "HAT-trie/AH-table cannot store keys longer than 32768\n"); exit(EXIT_FAILURE); } return get_key(table, key, len, true); } value_t* ahtable_tryget(ahtable_t* table, const char* key, size_t len ) { return get_key(table, key, len, false); } int ahtable_del(ahtable_t* table, const char* key, size_t len) { uint32_t i = hash(key, len) % table->n; size_t k; slot_t s; /* search the array for our key */ s = table->slots[i]; while ((size_t) (s - table->slots[i]) < table->slot_sizes[i]) { /* get the key length */ k = keylen(s); s += k < 128 ? 1 : 2; /* skip keys that are longer than ours */ if (k != len) { s += k + sizeof(value_t); continue; } /* key found. */ if (memcmp(s, key, len) == 0) { /* move everything over, resize the array */ unsigned char* t = s + len + sizeof(value_t); s -= k < 128 ? 1 : 2; memmove(s, t, table->slot_sizes[i] - (size_t) (t - table->slots[i])); table->slot_sizes[i] -= (size_t) (t - s); --table->m; return 0; } /* key not found. */ else { s += k + sizeof(value_t); continue; } } // Key was not found. Do nothing. return -1; } static int cmpkey(const void* a_, const void* b_) { slot_t a = *(slot_t*) a_; slot_t b = *(slot_t*) b_; size_t ka = keylen(a), kb = keylen(b); a += ka < 128 ? 1 : 2; b += kb < 128 ? 1 : 2; int c = memcmp(a, b, ka < kb ? ka : kb); return c == 0 ? (int) ka - (int) kb : c; } /* Sorted/unsorted iterators are kept private and exposed by passing the sorted flag to ahtable_iter_begin. */ typedef struct ahtable_sorted_iter_t_ { const ahtable_t* table; // parent slot_t* xs; // pointers to keys size_t i; // current key } ahtable_sorted_iter_t; static ahtable_sorted_iter_t* ahtable_sorted_iter_begin(const ahtable_t* table) { ahtable_sorted_iter_t* i = malloc_or_die(sizeof(ahtable_sorted_iter_t)); i->table = table; i->xs = malloc_or_die(table->m * sizeof(slot_t)); i->i = 0; slot_t s; size_t j, k, u; for (j = 0, u = 0; j < table->n; ++j) { s = table->slots[j]; while (s < table->slots[j] + table->slot_sizes[j]) { i->xs[u++] = s; k = keylen(s); s += k < 128 ? 1 : 2; s += k + sizeof(value_t); } } qsort(i->xs, table->m, sizeof(slot_t), cmpkey); return i; } static bool ahtable_sorted_iter_finished(ahtable_sorted_iter_t* i) { return i->i >= i->table->m; } static void ahtable_sorted_iter_next(ahtable_sorted_iter_t* i) { if (ahtable_sorted_iter_finished(i)) return; ++i->i; } static void ahtable_sorted_iter_free(ahtable_sorted_iter_t* i) { if (i == NULL) return; free(i->xs); free(i); } static const char* ahtable_sorted_iter_key(ahtable_sorted_iter_t* i, size_t* len) { if (ahtable_sorted_iter_finished(i)) return NULL; slot_t s = i->xs[i->i]; if (len) *len = keylen(s); return (const char*) (s + (*len < 128 ? 1 : 2)); } static value_t* ahtable_sorted_iter_val(ahtable_sorted_iter_t* i) { if (ahtable_sorted_iter_finished(i)) return NULL; slot_t s = i->xs[i->i]; size_t k = keylen(s); s += k < 128 ? 1 : 2; s += k; return (value_t*) s; } typedef struct ahtable_unsorted_iter_t_ { const ahtable_t* table; // parent size_t i; // slot index slot_t s; // slot position } ahtable_unsorted_iter_t; static ahtable_unsorted_iter_t* ahtable_unsorted_iter_begin(const ahtable_t* table) { ahtable_unsorted_iter_t* i = malloc_or_die(sizeof(ahtable_unsorted_iter_t)); i->table = table; for (i->i = 0; i->i < i->table->n; ++i->i) { i->s = table->slots[i->i]; if ((size_t) (i->s - table->slots[i->i]) >= table->slot_sizes[i->i]) continue; break; } return i; } static bool ahtable_unsorted_iter_finished(ahtable_unsorted_iter_t* i) { return i->i >= i->table->n; } static void ahtable_unsorted_iter_next(ahtable_unsorted_iter_t* i) { if (ahtable_unsorted_iter_finished(i)) return; /* get the key length */ size_t k = keylen(i->s); i->s += k < 128 ? 1 : 2; /* skip to the next key */ i->s += k + sizeof(value_t); if ((size_t) (i->s - i->table->slots[i->i]) >= i->table->slot_sizes[i->i]) { do { ++i->i; } while(i->i < i->table->n && i->table->slot_sizes[i->i] == 0); if (i->i < i->table->n) i->s = i->table->slots[i->i]; else i->s = NULL; } } static void ahtable_unsorted_iter_free(ahtable_unsorted_iter_t* i) { free(i); } static const char* ahtable_unsorted_iter_key(ahtable_unsorted_iter_t* i, size_t* len) { if (ahtable_unsorted_iter_finished(i)) return NULL; slot_t s = i->s; size_t k; if (0x1 & *s) { k = (size_t) (*((uint16_t*) s)) >> 1; s += 2; } else { k = (size_t) (*s >> 1); s += 1; } if(len) *len = k; return (const char*) s; } static value_t* ahtable_unsorted_iter_val(ahtable_unsorted_iter_t* i) { if (ahtable_unsorted_iter_finished(i)) return NULL; slot_t s = i->s; size_t k; if (0x1 & *s) { k = (size_t) (*((uint16_t*) s)) >> 1; s += 2; } else { k = (size_t) (*s >> 1); s += 1; } s += k; return (value_t*) s; } struct ahtable_iter_t_ { bool sorted; union { ahtable_unsorted_iter_t* unsorted; ahtable_sorted_iter_t* sorted; } i; }; ahtable_iter_t* ahtable_iter_begin(const ahtable_t* table, bool sorted) { ahtable_iter_t* i = malloc_or_die(sizeof(ahtable_iter_t)); i->sorted = sorted; if (sorted) i->i.sorted = ahtable_sorted_iter_begin(table); else i->i.unsorted = ahtable_unsorted_iter_begin(table); return i; } void ahtable_iter_next(ahtable_iter_t* i) { if (i->sorted) ahtable_sorted_iter_next(i->i.sorted); else ahtable_unsorted_iter_next(i->i.unsorted); } bool ahtable_iter_finished(ahtable_iter_t* i) { if (i->sorted) return ahtable_sorted_iter_finished(i->i.sorted); else return ahtable_unsorted_iter_finished(i->i.unsorted); } void ahtable_iter_free(ahtable_iter_t* i) { if (i == NULL) return; if (i->sorted) ahtable_sorted_iter_free(i->i.sorted); else ahtable_unsorted_iter_free(i->i.unsorted); free(i); } const char* ahtable_iter_key(ahtable_iter_t* i, size_t* len) { if (i->sorted) return ahtable_sorted_iter_key(i->i.sorted, len); else return ahtable_unsorted_iter_key(i->i.unsorted, len); } value_t* ahtable_iter_val(ahtable_iter_t* i) { if (i->sorted) return ahtable_sorted_iter_val(i->i.sorted); else return ahtable_unsorted_iter_val(i->i.unsorted); } hat-trie-0.1.2/src/ahtable.h000066400000000000000000000100611312163177500155730ustar00rootroot00000000000000/* * This file is part of hat-trie. * * Copyright (c) 2011 by Daniel C. Jones * * * This is an implementation of the 'cache-conscious' hash tables described in, * * Askitis, N., & Zobel, J. (2005). Cache-conscious collision resolution in * string hash tables. String Processing and Information Retrieval (pp. * 91–102). Springer. * * http://naskitis.com/naskitis-spire05.pdf * * Briefly, the idea behind an Array Hash Table is, as opposed to separate * chaining with linked lists, to store keys contiguously in one big array, * thereby improving the caching behavior, and reducing space requirements. * * ahtable keeps a fixed number (array) of slots, each of which contains a * variable number of key/value pairs. Each key is preceded by its length-- * one byte for lengths < 128 bytes, and TWO bytes for longer keys. The least * significant bit of the first byte indicates, if set, that the size is two * bytes. The slot number where a key/value pair goes is determined by finding * the murmurhashed integer value of its key, modulus the number of slots. * The number of slots expands in a stepwise fashion when the number of # key/value pairs reaches an arbitrarily large number. * * +-------+-------+-------+-------+-------+-------+ * | 0 | 1 | 2 | 3 | ... | N | * +-------+-------+-------+-------+-------+-------+ * | | | | | * v | | v v * NULL | | 4html[VALUE] etc. * | v * | 5space[VALUE]4jury[VALUE] * v * 6justice[VALUE]3car[VALUE]4star[VALUE] * */ #ifndef HATTRIE_AHTABLE_H #define HATTRIE_AHTABLE_H #ifdef __cplusplus extern "C" { #endif #include #include #include #include "pstdint.h" #include "common.h" typedef unsigned char* slot_t; typedef struct ahtable_t_ { /* these fields are reserved for hattrie to fiddle with */ uint8_t flag; unsigned char c0; unsigned char c1; size_t n; // number of slots size_t m; // number of key/value pairs stored size_t max_m; // number of stored keys before we resize size_t* slot_sizes; slot_t* slots; } ahtable_t; extern const double ahtable_max_load_factor; extern const size_t ahtable_initial_size; ahtable_t* ahtable_create (void); // Create an empty hash table. ahtable_t* ahtable_create_n (size_t n); // Create an empty hash table, with // n slots reserved. ahtable_t* ahtable_load (FILE* fd); // Load a hash table from a file handle. void ahtable_save (const ahtable_t* T, FILE* fd); // Save a hash table to a file handle. void ahtable_free (ahtable_t*); // Free all memory used by a table. void ahtable_clear (ahtable_t*); // Remove all entries. size_t ahtable_size (const ahtable_t*); // Number of stored keys. size_t ahtable_sizeof (const ahtable_t*); // Memory used by the table in bytes. /** Find the given key in the table, inserting it if it does not exist, and * returning a pointer to it's value. * * This pointer is not guaranteed to be valid after additional calls to * ahtable_get, ahtable_del, ahtable_clear, or other functions that modify the * table. */ value_t* ahtable_get (ahtable_t*, const char* key, size_t len); /* Find a given key in the table, return a NULL pointer if it does not exist. */ value_t* ahtable_tryget (ahtable_t*, const char* key, size_t len); int ahtable_del(ahtable_t*, const char* key, size_t len); typedef struct ahtable_iter_t_ ahtable_iter_t; ahtable_iter_t* ahtable_iter_begin (const ahtable_t*, bool sorted); void ahtable_iter_next (ahtable_iter_t*); bool ahtable_iter_finished (ahtable_iter_t*); void ahtable_iter_free (ahtable_iter_t*); const char* ahtable_iter_key (ahtable_iter_t*, size_t* len); value_t* ahtable_iter_val (ahtable_iter_t*); #ifdef __cplusplus } #endif #endif hat-trie-0.1.2/src/common.h000066400000000000000000000005121312163177500154630ustar00rootroot00000000000000/* * This file is part of hat-trie. * * Copyright (c) 2011 by Daniel C. Jones * * * Common typedefs, etc. * */ #ifndef HATTRIE_COMMON_H #define HATTRIE_COMMON_H #include "pstdint.h" // an unsigned int that is guaranteed to be the same size as a pointer typedef uintptr_t value_t; #endif hat-trie-0.1.2/src/hat-trie.c000066400000000000000000000421211312163177500157050ustar00rootroot00000000000000/* * This file is part of hat-trie. * * Copyright (c) 2011 by Daniel C. Jones * */ #include "hat-trie.h" #include "ahtable.h" #include "misc.h" #include "pstdint.h" #include #include #define HT_UNUSED(x) x=x /* maximum number of keys that may be stored in a bucket before it is burst */ static const size_t MAX_BUCKET_SIZE = 16384; #define NODE_MAXCHAR 0xff // 0x7f for 7-bit ASCII #define NODE_CHILDS (NODE_MAXCHAR+1) static const uint8_t NODE_TYPE_TRIE = 0x1; static const uint8_t NODE_TYPE_PURE_BUCKET = 0x2; static const uint8_t NODE_TYPE_HYBRID_BUCKET = 0x4; static const uint8_t NODE_HAS_VAL = 0x8; struct trie_node_t_; /* Node's may be trie nodes or buckets. This union allows us to keep * non-specific pointer. */ typedef union node_ptr_ { ahtable_t* b; struct trie_node_t_* t; uint8_t* flag; } node_ptr; typedef struct trie_node_t_ { uint8_t flag; /* the value for the key that is consumed on a trie node */ value_t val; /* Map a character to either a trie_node_t or a ahtable_t. The first byte * must be examined to determine which. */ node_ptr xs[NODE_CHILDS]; } trie_node_t; struct hattrie_t_ { node_ptr root; // root node size_t m; // number of stored keys }; size_t hattrie_size(const hattrie_t* T) { return T->m; } static size_t node_sizeof(node_ptr node) { if (*node.flag & NODE_TYPE_TRIE) { size_t nbytes = sizeof(trie_node_t); size_t i; nbytes += node_sizeof(node.t->xs[0]); for (i = 1; i < NODE_CHILDS; ++i) { if (node.t->xs[i].t != node.t->xs[i-1].t) nbytes += node_sizeof(node.t->xs[i]); } return nbytes; } else { return ahtable_sizeof(node.b); } } size_t hattrie_sizeof(const hattrie_t* T) { return sizeof(hattrie_t) + node_sizeof(T->root); } /* Create a new trie node with all pointers pointing to the given child (which * can be NULL). */ static trie_node_t* alloc_trie_node(hattrie_t* T, node_ptr child) { trie_node_t* node = malloc_or_die(sizeof(trie_node_t)); node->flag = NODE_TYPE_TRIE; node->val = 0; /* pass T to allow custom allocator for trie. */ HT_UNUSED(T); /* unused now */ size_t i; for (i = 0; i < NODE_CHILDS; ++i) node->xs[i] = child; return node; } /* iterate trie nodes until string is consumed or bucket is found */ static node_ptr hattrie_consume(node_ptr *p, const char **k, size_t *l, unsigned brk) { node_ptr node = p->t->xs[(unsigned char) **k]; while (*node.flag & NODE_TYPE_TRIE && *l > brk) { ++*k; --*l; *p = node; node = node.t->xs[(unsigned char) **k]; } /* copy and writeback variables if it's faster */ assert(*p->flag & NODE_TYPE_TRIE); return node; } /* use node value and return pointer to it */ static inline value_t* hattrie_useval(hattrie_t *T, node_ptr n) { if (!(n.t->flag & NODE_HAS_VAL)) { n.t->flag |= NODE_HAS_VAL; ++T->m; } return &n.t->val; } /* clear node value if exists */ static inline int hattrie_clrval(hattrie_t *T, node_ptr n) { if (n.t->flag & NODE_HAS_VAL) { n.t->flag &= ~NODE_HAS_VAL; n.t->val = 0; --T->m; return 0; } return -1; } /* find node in trie */ static node_ptr hattrie_find(hattrie_t* T, const char **key, size_t *len) { node_ptr parent = T->root; assert(*parent.flag & NODE_TYPE_TRIE); if (*len == 0) return parent; node_ptr node = hattrie_consume(&parent, key, len, 1); /* if the trie node consumes value, use it */ if (*node.flag & NODE_TYPE_TRIE) { if (!(node.t->flag & NODE_HAS_VAL)) { node.flag = NULL; } return node; } /* pure bucket holds only key suffixes, skip current char */ if (*node.flag & NODE_TYPE_PURE_BUCKET) { *key += 1; *len -= 1; } /* do not scan bucket, it's not needed for this operation */ return node; } hattrie_t* hattrie_create() { hattrie_t* T = malloc_or_die(sizeof(hattrie_t)); T->m = 0; node_ptr node; node.b = ahtable_create(); node.b->flag = NODE_TYPE_HYBRID_BUCKET; node.b->c0 = 0x00; node.b->c1 = NODE_MAXCHAR; T->root.t = alloc_trie_node(T, node); return T; } static void hattrie_free_node(node_ptr node) { if (*node.flag & NODE_TYPE_TRIE) { size_t i; for (i = 0; i < NODE_CHILDS; ++i) { if (i > 0 && node.t->xs[i].t == node.t->xs[i - 1].t) continue; /* XXX: recursion might not be the best choice here. It is possible * to build a very deep trie. */ if (node.t->xs[i].t) hattrie_free_node(node.t->xs[i]); } free(node.t); } else { ahtable_free(node.b); } } void hattrie_free(hattrie_t* T) { hattrie_free_node(T->root); free(T); } void hattrie_clear(hattrie_t* T) { hattrie_free_node(T->root); node_ptr node; node.b = ahtable_create(); node.b->flag = NODE_TYPE_HYBRID_BUCKET; node.b->c0 = 0x00; node.b->c1 = 0xff; T->root.t = alloc_trie_node(T, node); } /* Perform one split operation on the given node with the given parent. */ static void hattrie_split(hattrie_t* T, node_ptr parent, node_ptr node) { /* only buckets may be split */ assert(*node.flag & NODE_TYPE_PURE_BUCKET || *node.flag & NODE_TYPE_HYBRID_BUCKET); assert(*parent.flag & NODE_TYPE_TRIE); if (*node.flag & NODE_TYPE_PURE_BUCKET) { /* turn the pure bucket into a hybrid bucket */ parent.t->xs[node.b->c0].t = alloc_trie_node(T, node); /* if the bucket had an empty key, move it to the new trie node */ value_t* val = ahtable_tryget(node.b, NULL, 0); if (val) { parent.t->xs[node.b->c0].t->val = *val; parent.t->xs[node.b->c0].t->flag |= NODE_HAS_VAL; *val = 0; ahtable_del(node.b, NULL, 0); } node.b->c0 = 0x00; node.b->c1 = NODE_MAXCHAR; node.b->flag = NODE_TYPE_HYBRID_BUCKET; return; } /* This is a hybrid bucket. Perform a proper split. */ /* count the number of occourances of every leading character */ unsigned int cs[NODE_CHILDS]; // occurance count for leading chars memset(cs, 0, NODE_CHILDS * sizeof(unsigned int)); size_t len; const char* key; ahtable_iter_t* i = ahtable_iter_begin(node.b, false); while (!ahtable_iter_finished(i)) { key = ahtable_iter_key(i, &len); assert(len > 0); cs[(unsigned char) key[0]] += 1; ahtable_iter_next(i); } ahtable_iter_free(i); /* choose a split point */ unsigned int left_m, right_m, all_m; unsigned char j = node.b->c0; all_m = ahtable_size(node.b); left_m = cs[j]; right_m = all_m - left_m; int d; while (j + 1 < node.b->c1) { d = abs((int) (left_m + cs[j + 1]) - (int) (right_m - cs[j + 1])); if (d <= abs(left_m - right_m) && left_m + cs[j + 1] < all_m) { j += 1; left_m += cs[j]; right_m -= cs[j]; } else break; } /* now split into two node cooresponding to ranges [0, j] and * [j + 1, NODE_MAXCHAR], respectively. */ /* create new left and right nodes */ /* TODO: Add a special case if either node is a hybrid bucket containing all * the keys. In such a case, do not build a new table, just use the old one. * */ size_t num_slots; for (num_slots = ahtable_initial_size; (double) left_m > ahtable_max_load_factor * (double) num_slots; num_slots *= 2); node_ptr left, right; left.b = ahtable_create_n(num_slots); left.b->c0 = node.b->c0; left.b->c1 = j; left.b->flag = left.b->c0 == left.b->c1 ? NODE_TYPE_PURE_BUCKET : NODE_TYPE_HYBRID_BUCKET; for (num_slots = ahtable_initial_size; (double) right_m > ahtable_max_load_factor * (double) num_slots; num_slots *= 2); right.b = ahtable_create_n(num_slots); right.b->c0 = j + 1; right.b->c1 = node.b->c1; right.b->flag = right.b->c0 == right.b->c1 ? NODE_TYPE_PURE_BUCKET : NODE_TYPE_HYBRID_BUCKET; /* update the parent's pointer */ unsigned int c; for (c = node.b->c0; c <= j; ++c) parent.t->xs[c] = left; for (; c <= node.b->c1; ++c) parent.t->xs[c] = right; /* distribute keys to the new left or right node */ value_t* u; value_t* v; i = ahtable_iter_begin(node.b, false); while (!ahtable_iter_finished(i)) { key = ahtable_iter_key(i, &len); u = ahtable_iter_val(i); assert(len > 0); /* left */ if ((unsigned char) key[0] <= j) { if (*left.flag & NODE_TYPE_PURE_BUCKET) { v = ahtable_get(left.b, key + 1, len - 1); } else { v = ahtable_get(left.b, key, len); } *v = *u; } /* right */ else { if (*right.flag & NODE_TYPE_PURE_BUCKET) { v = ahtable_get(right.b, key + 1, len - 1); } else { v = ahtable_get(right.b, key, len); } *v = *u; } ahtable_iter_next(i); } ahtable_iter_free(i); ahtable_free(node.b); } value_t* hattrie_get(hattrie_t* T, const char* key, size_t len) { node_ptr parent = T->root; assert(*parent.flag & NODE_TYPE_TRIE); if (len == 0) return &parent.t->val; /* consume all trie nodes, now parent must be trie and child anything */ node_ptr node = hattrie_consume(&parent, &key, &len, 0); assert(*parent.flag & NODE_TYPE_TRIE); /* if the key has been consumed on a trie node, use its value */ if (len == 0) { if (*node.flag & NODE_TYPE_TRIE) { return hattrie_useval(T, node); } else if (*node.flag & NODE_TYPE_HYBRID_BUCKET) { return hattrie_useval(T, parent); } } /* preemptively split the bucket if it is full */ while (ahtable_size(node.b) >= MAX_BUCKET_SIZE) { hattrie_split(T, parent, node); /* after the split, the node pointer is invalidated, so we search from * the parent again. */ node = hattrie_consume(&parent, &key, &len, 0); /* if the key has been consumed on a trie node, use its value */ if (len == 0) { if (*node.flag & NODE_TYPE_TRIE) { return hattrie_useval(T, node); } else if (*node.flag & NODE_TYPE_HYBRID_BUCKET) { return hattrie_useval(T, parent); } } } assert(*node.flag & NODE_TYPE_PURE_BUCKET || *node.flag & NODE_TYPE_HYBRID_BUCKET); assert(len > 0); size_t m_old = node.b->m; value_t* val; if (*node.flag & NODE_TYPE_PURE_BUCKET) { val = ahtable_get(node.b, key + 1, len - 1); } else { val = ahtable_get(node.b, key, len); } T->m += (node.b->m - m_old); return val; } value_t* hattrie_tryget(hattrie_t* T, const char* key, size_t len) { /* find node for given key */ node_ptr node = hattrie_find(T, &key, &len); if (node.flag == NULL) { return NULL; } /* if the trie node consumes value, use it */ if (*node.flag & NODE_TYPE_TRIE) { return &node.t->val; } return ahtable_tryget(node.b, key, len); } int hattrie_del(hattrie_t* T, const char* key, size_t len) { node_ptr parent = T->root; HT_UNUSED(parent); assert(*parent.flag & NODE_TYPE_TRIE); /* find node for deletion */ node_ptr node = hattrie_find(T, &key, &len); if (node.flag == NULL) { return -1; } /* if consumed on a trie node, clear the value */ if (*node.flag & NODE_TYPE_TRIE) { return hattrie_clrval(T, node); } /* remove from bucket */ size_t m_old = ahtable_size(node.b); int ret = ahtable_del(node.b, key, len); T->m -= (m_old - ahtable_size(node.b)); /* merge empty buckets */ /*! \todo */ return ret; } /* plan for iteration: * This is tricky, as we have no parent pointers currently, and I would like to * avoid adding them. That means maintaining a stack * */ typedef struct hattrie_node_stack_t_ { unsigned char c; size_t level; node_ptr node; struct hattrie_node_stack_t_* next; } hattrie_node_stack_t; struct hattrie_iter_t_ { char* key; size_t keysize; // space reserved for the key size_t level; /* keep track of keys stored in trie nodes */ bool has_nil_key; value_t nil_val; const hattrie_t* T; bool sorted; ahtable_iter_t* i; hattrie_node_stack_t* stack; }; static void hattrie_iter_pushchar(hattrie_iter_t* i, size_t level, char c) { if (i->keysize < level) { i->keysize *= 2; i->key = realloc_or_die(i->key, i->keysize * sizeof(char)); } if (level > 0) { i->key[level - 1] = c; } i->level = level; } static void hattrie_iter_nextnode(hattrie_iter_t* i) { if (i->stack == NULL) return; /* pop the stack */ node_ptr node; hattrie_node_stack_t* next; unsigned char c; size_t level; node = i->stack->node; next = i->stack->next; c = i->stack->c; level = i->stack->level; free(i->stack); i->stack = next; if (*node.flag & NODE_TYPE_TRIE) { hattrie_iter_pushchar(i, level, c); if(node.t->flag & NODE_HAS_VAL) { i->has_nil_key = true; i->nil_val = node.t->val; } /* push all child nodes from right to left */ int j; for (j = NODE_MAXCHAR; j >= 0; --j) { /* skip repeated pointers to hybrid bucket */ if (j < NODE_MAXCHAR && node.t->xs[j].t == node.t->xs[j + 1].t) continue; // push stack next = i->stack; i->stack = malloc_or_die(sizeof(hattrie_node_stack_t)); i->stack->node = node.t->xs[j]; i->stack->next = next; i->stack->level = level + 1; i->stack->c = (unsigned char) j; } } else { if (*node.flag & NODE_TYPE_PURE_BUCKET) { hattrie_iter_pushchar(i, level, c); } else { i->level = level - 1; } i->i = ahtable_iter_begin(node.b, i->sorted); } } hattrie_iter_t* hattrie_iter_begin(const hattrie_t* T, bool sorted) { hattrie_iter_t* i = malloc_or_die(sizeof(hattrie_iter_t)); i->T = T; i->sorted = sorted; i->i = NULL; i->keysize = 16; i->key = malloc_or_die(i->keysize * sizeof(char)); i->level = 0; i->has_nil_key = false; i->nil_val = 0; i->stack = malloc_or_die(sizeof(hattrie_node_stack_t)); i->stack->next = NULL; i->stack->node = T->root; i->stack->c = '\0'; i->stack->level = 0; while (((i->i == NULL || ahtable_iter_finished(i->i)) && !i->has_nil_key) && i->stack != NULL ) { ahtable_iter_free(i->i); i->i = NULL; hattrie_iter_nextnode(i); } if (i->i != NULL && ahtable_iter_finished(i->i)) { ahtable_iter_free(i->i); i->i = NULL; } return i; } void hattrie_iter_next(hattrie_iter_t* i) { if (hattrie_iter_finished(i)) return; if (i->i != NULL && !ahtable_iter_finished(i->i)) { ahtable_iter_next(i->i); } else if (i->has_nil_key) { i->has_nil_key = false; i->nil_val = 0; hattrie_iter_nextnode(i); } while (((i->i == NULL || ahtable_iter_finished(i->i)) && !i->has_nil_key) && i->stack != NULL ) { ahtable_iter_free(i->i); i->i = NULL; hattrie_iter_nextnode(i); } if (i->i != NULL && ahtable_iter_finished(i->i)) { ahtable_iter_free(i->i); i->i = NULL; } } bool hattrie_iter_finished(hattrie_iter_t* i) { return i->stack == NULL && i->i == NULL && !i->has_nil_key; } void hattrie_iter_free(hattrie_iter_t* i) { if (i == NULL) return; if (i->i) ahtable_iter_free(i->i); hattrie_node_stack_t* next; while (i->stack) { next = i->stack->next; free(i->stack); i->stack = next; } free(i->key); free(i); } const char* hattrie_iter_key(hattrie_iter_t* i, size_t* len) { if (hattrie_iter_finished(i)) return NULL; size_t sublen; const char* subkey; if (i->has_nil_key) { subkey = NULL; sublen = 0; } else subkey = ahtable_iter_key(i->i, &sublen); if (i->keysize < i->level + sublen + 1) { while (i->keysize < i->level + sublen + 1) i->keysize *= 2; i->key = realloc_or_die(i->key, i->keysize * sizeof(char)); } memcpy(i->key + i->level, subkey, sublen); i->key[i->level + sublen] = '\0'; if (len) *len = i->level + sublen; return i->key; } value_t* hattrie_iter_val(hattrie_iter_t* i) { if (i->has_nil_key) return &i->nil_val; if (hattrie_iter_finished(i)) return NULL; return ahtable_iter_val(i->i); } bool hattrie_iter_equal(const hattrie_iter_t* a, const hattrie_iter_t* b) { return a->T == b->T && a->sorted == b->sorted && a->i == b->i; } hat-trie-0.1.2/src/hat-trie.h000066400000000000000000000046311312163177500157160ustar00rootroot00000000000000/* * This file is part of hat-trie * * Copyright (c) 2011 by Daniel C. Jones * * * This is an implementation of the HAT-trie data structure described in, * * Askitis, N., & Sinha, R. (2007). HAT-trie: a cache-conscious trie-based data * structure for strings. Proceedings of the thirtieth Australasian conference on * Computer science-Volume 62 (pp. 97–105). Australian Computer Society, Inc. * * The HAT-trie is in essence a hybrid data structure, combining tries and hash * tables in a clever way to try to get the best of both worlds. * */ #ifndef HATTRIE_HATTRIE_H #define HATTRIE_HATTRIE_H #ifdef __cplusplus extern "C" { #endif #include "common.h" #include #include typedef struct hattrie_t_ hattrie_t; hattrie_t* hattrie_create (void); // Create an empty hat-trie. void hattrie_free (hattrie_t*); // Free all memory used by a trie. void hattrie_clear (hattrie_t*); // Remove all entries. size_t hattrie_size (const hattrie_t*); // Number of stored keys. size_t hattrie_sizeof (const hattrie_t*); // Memory used in structure in bytes. /** Find the given key in the trie, inserting it if it does not exist, and * returning a pointer to it's key. * * This pointer is not guaranteed to be valid after additional calls to * hattrie_get, hattrie_del, hattrie_clear, or other functions that modifies the * trie. */ value_t* hattrie_get (hattrie_t*, const char* key, size_t len); /** Find a given key in the table, returning a NULL pointer if it does not * exist. */ value_t* hattrie_tryget (hattrie_t*, const char* key, size_t len); /** Delete a given key from trie. Returns 0 if successful or -1 if not found. */ int hattrie_del(hattrie_t* T, const char* key, size_t len); typedef struct hattrie_iter_t_ hattrie_iter_t; hattrie_iter_t* hattrie_iter_begin (const hattrie_t*, bool sorted); void hattrie_iter_next (hattrie_iter_t*); bool hattrie_iter_finished (hattrie_iter_t*); void hattrie_iter_free (hattrie_iter_t*); const char* hattrie_iter_key (hattrie_iter_t*, size_t* len); value_t* hattrie_iter_val (hattrie_iter_t*); /* Return true if two iterators are equal. */ bool hattrie_iter_equal (const hattrie_iter_t* a, const hattrie_iter_t* b); #ifdef __cplusplus } #endif #endif hat-trie-0.1.2/src/misc.c000066400000000000000000000014601312163177500151240ustar00rootroot00000000000000/* * This file is part of hat-trie. * * Copyright (c) 2011 by Daniel C. Jones * */ #include "misc.h" #include void* malloc_or_die(size_t n) { void* p = malloc(n); if (p == NULL && n != 0) { fprintf(stderr, "Cannot allocate %zu bytes.\n", n); exit(EXIT_FAILURE); } return p; } void* realloc_or_die(void* ptr, size_t n) { void* p = realloc(ptr, n); if (p == NULL && n != 0) { fprintf(stderr, "Cannot allocate %zu bytes.\n", n); exit(EXIT_FAILURE); } return p; } FILE* fopen_or_die(const char* path, const char* mode) { FILE* f = fopen(path, mode); if (f == NULL) { fprintf(stderr, "Cannot open file %s with mode %s.\n", path, mode); exit(EXIT_FAILURE); } return f; } hat-trie-0.1.2/src/misc.h000066400000000000000000000005321312163177500151300ustar00rootroot00000000000000/* * This file is part of hat-trie. * * Copyright (c) 2011 by Daniel C. Jones * * misc : * miscelaneous functions. * */ #ifndef LINESET_MISC_H #define LINESET_MISC_H #include void* malloc_or_die(size_t); void* realloc_or_die(void*, size_t); FILE* fopen_or_die(const char*, const char*); #endif hat-trie-0.1.2/src/murmurhash3.c000066400000000000000000000024761312163177500164570ustar00rootroot00000000000000/* This is MurmurHash3. The original C++ code was placed in the public domain * by its author, Austin Appleby. */ #include "murmurhash3.h" static inline uint32_t fmix(uint32_t h) { h ^= h >> 16; h *= 0x85ebca6b; h ^= h >> 13; h *= 0xc2b2ae35; h ^= h >> 16; return h; } static inline uint32_t rotl32(uint32_t x, int8_t r) { return (x << r) | (x >> (32 - r)); } uint32_t hash(const char* data, size_t len_) { const int len = (int) len_; const int nblocks = len / 4; uint32_t h1 = 0xc062fb4a; uint32_t c1 = 0xcc9e2d51; uint32_t c2 = 0x1b873593; //---------- // body const uint32_t * blocks = (const uint32_t*) (data + nblocks * 4); int i; for(i = -nblocks; i; i++) { uint32_t k1 = blocks[i]; k1 *= c1; k1 = rotl32(k1, 15); k1 *= c2; h1 ^= k1; h1 = rotl32(h1, 13); h1 = h1*5+0xe6546b64; } //---------- // tail const uint8_t * tail = (const uint8_t*)(data + nblocks*4); uint32_t k1 = 0; switch(len & 3) { case 3: k1 ^= tail[2] << 16; case 2: k1 ^= tail[1] << 8; case 1: k1 ^= tail[0]; k1 *= c1; k1 = rotl32(k1,15); k1 *= c2; h1 ^= k1; } //---------- // finalization h1 ^= len; h1 = fmix(h1); return h1; } hat-trie-0.1.2/src/murmurhash3.h000066400000000000000000000002171312163177500164530ustar00rootroot00000000000000 #ifndef MURMURHASH3_H #define MURMURHASH3_H #include #include "pstdint.h" uint32_t hash(const char* data, size_t len); #endif hat-trie-0.1.2/src/portable_endian.h000066400000000000000000000052361312163177500173310ustar00rootroot00000000000000// "License": Public Domain // I, Mathias Panzenböck, place this file hereby into the public domain. Use it at your own risk for whatever you like. #ifndef PORTABLE_ENDIAN_H__ #define PORTABLE_ENDIAN_H__ #if (defined(_WIN16) || defined(_WIN32) || defined(_WIN64)) && !defined(__WINDOWS__) # define __WINDOWS__ #endif #if defined(__linux__) || defined(__CYGWIN__) # define __USE_BSD # include #elif defined(__APPLE__) # include # define htobe16(x) OSSwapHostToBigInt16(x) # define htole16(x) OSSwapHostToLittleInt16(x) # define be16toh(x) OSSwapBigToHostInt16(x) # define le16toh(x) OSSwapLittleToHostInt16(x) # define htobe32(x) OSSwapHostToBigInt32(x) # define htole32(x) OSSwapHostToLittleInt32(x) # define be32toh(x) OSSwapBigToHostInt32(x) # define le32toh(x) OSSwapLittleToHostInt32(x) # define htobe64(x) OSSwapHostToBigInt64(x) # define htole64(x) OSSwapHostToLittleInt64(x) # define be64toh(x) OSSwapBigToHostInt64(x) # define le64toh(x) OSSwapLittleToHostInt64(x) # define __BYTE_ORDER BYTE_ORDER # define __BIG_ENDIAN BIG_ENDIAN # define __LITTLE_ENDIAN LITTLE_ENDIAN # define __PDP_ENDIAN PDP_ENDIAN #elif defined(__OpenBSD__) # include #elif defined(__NetBSD__) || defined(__FreeBSD__) || defined(__DragonFly__) # include # define be16toh(x) betoh16(x) # define le16toh(x) letoh16(x) # define be32toh(x) betoh32(x) # define le32toh(x) letoh32(x) # define be64toh(x) betoh64(x) # define le64toh(x) letoh64(x) #elif defined(__WINDOWS__) # include # include # if BYTE_ORDER == LITTLE_ENDIAN # define htobe16(x) htons(x) # define htole16(x) (x) # define be16toh(x) ntohs(x) # define le16toh(x) (x) # define htobe32(x) htonl(x) # define htole32(x) (x) # define be32toh(x) ntohl(x) # define le32toh(x) (x) # define htobe64(x) htonll(x) # define htole64(x) (x) # define be64toh(x) ntohll(x) # define le64toh(x) (x) # elif BYTE_ORDER == BIG_ENDIAN /* that would be xbox 360 */ # define htobe16(x) (x) # define htole16(x) __builtin_bswap16(x) # define be16toh(x) (x) # define le16toh(x) __builtin_bswap16(x) # define htobe32(x) (x) # define htole32(x) __builtin_bswap32(x) # define be32toh(x) (x) # define le32toh(x) __builtin_bswap32(x) # define htobe64(x) (x) # define htole64(x) __builtin_bswap64(x) # define be64toh(x) (x) # define le64toh(x) __builtin_bswap64(x) # else # error byte order not supported # endif # define __BYTE_ORDER BYTE_ORDER # define __BIG_ENDIAN BIG_ENDIAN # define __LITTLE_ENDIAN LITTLE_ENDIAN # define __PDP_ENDIAN PDP_ENDIAN #else # error platform not supported #endif #endif hat-trie-0.1.2/src/pstdint.h000066400000000000000000000662771312163177500157040ustar00rootroot00000000000000/* A portable stdint.h **************************************************************************** * BSD License: **************************************************************************** * * Copyright (c) 2005-2014 Paul Hsieh * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. The name of the author may not be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * **************************************************************************** * * Version 0.1.14 * * The ANSI C standard committee, for the C99 standard, specified the * inclusion of a new standard include file called stdint.h. This is * a very useful and long desired include file which contains several * very precise definitions for integer scalar types that is * critically important for making portable several classes of * applications including cryptography, hashing, variable length * integer libraries and so on. But for most developers its likely * useful just for programming sanity. * * The problem is that most compiler vendors have decided not to * implement the C99 standard, and the next C++ language standard * (which has a lot more mindshare these days) will be a long time in * coming and its unknown whether or not it will include stdint.h or * how much adoption it will have. Either way, it will be a long time * before all compilers come with a stdint.h and it also does nothing * for the extremely large number of compilers available today which * do not include this file, or anything comparable to it. * * So that's what this file is all about. Its an attempt to build a * single universal include file that works on as many platforms as * possible to deliver what stdint.h is supposed to. A few things * that should be noted about this file: * * 1) It is not guaranteed to be portable and/or present an identical * interface on all platforms. The extreme variability of the * ANSI C standard makes this an impossibility right from the * very get go. Its really only meant to be useful for the vast * majority of platforms that possess the capability of * implementing usefully and precisely defined, standard sized * integer scalars. Systems which are not intrinsically 2s * complement may produce invalid constants. * * 2) There is an unavoidable use of non-reserved symbols. * * 3) Other standard include files are invoked. * * 4) This file may come in conflict with future platforms that do * include stdint.h. The hope is that one or the other can be * used with no real difference. * * 5) In the current verison, if your platform can't represent * int32_t, int16_t and int8_t, it just dumps out with a compiler * error. * * 6) 64 bit integers may or may not be defined. Test for their * presence with the test: #ifdef INT64_MAX or #ifdef UINT64_MAX. * Note that this is different from the C99 specification which * requires the existence of 64 bit support in the compiler. If * this is not defined for your platform, yet it is capable of * dealing with 64 bits then it is because this file has not yet * been extended to cover all of your system's capabilities. * * 7) (u)intptr_t may or may not be defined. Test for its presence * with the test: #ifdef PTRDIFF_MAX. If this is not defined * for your platform, then it is because this file has not yet * been extended to cover all of your system's capabilities, not * because its optional. * * 8) The following might not been defined even if your platform is * capable of defining it: * * WCHAR_MIN * WCHAR_MAX * (u)int64_t * PTRDIFF_MIN * PTRDIFF_MAX * (u)intptr_t * * 9) The following have not been defined: * * WINT_MIN * WINT_MAX * * 10) The criteria for defining (u)int_least(*)_t isn't clear, * except for systems which don't have a type that precisely * defined 8, 16, or 32 bit types (which this include file does * not support anyways). Default definitions have been given. * * 11) The criteria for defining (u)int_fast(*)_t isn't something I * would trust to any particular compiler vendor or the ANSI C * committee. It is well known that "compatible systems" are * commonly created that have very different performance * characteristics from the systems they are compatible with, * especially those whose vendors make both the compiler and the * system. Default definitions have been given, but its strongly * recommended that users never use these definitions for any * reason (they do *NOT* deliver any serious guarantee of * improved performance -- not in this file, nor any vendor's * stdint.h). * * 12) The following macros: * * PRINTF_INTMAX_MODIFIER * PRINTF_INT64_MODIFIER * PRINTF_INT32_MODIFIER * PRINTF_INT16_MODIFIER * PRINTF_LEAST64_MODIFIER * PRINTF_LEAST32_MODIFIER * PRINTF_LEAST16_MODIFIER * PRINTF_INTPTR_MODIFIER * * are strings which have been defined as the modifiers required * for the "d", "u" and "x" printf formats to correctly output * (u)intmax_t, (u)int64_t, (u)int32_t, (u)int16_t, (u)least64_t, * (u)least32_t, (u)least16_t and (u)intptr_t types respectively. * PRINTF_INTPTR_MODIFIER is not defined for some systems which * provide their own stdint.h. PRINTF_INT64_MODIFIER is not * defined if INT64_MAX is not defined. These are an extension * beyond what C99 specifies must be in stdint.h. * * In addition, the following macros are defined: * * PRINTF_INTMAX_HEX_WIDTH * PRINTF_INT64_HEX_WIDTH * PRINTF_INT32_HEX_WIDTH * PRINTF_INT16_HEX_WIDTH * PRINTF_INT8_HEX_WIDTH * PRINTF_INTMAX_DEC_WIDTH * PRINTF_INT64_DEC_WIDTH * PRINTF_INT32_DEC_WIDTH * PRINTF_INT16_DEC_WIDTH * PRINTF_INT8_DEC_WIDTH * * Which specifies the maximum number of characters required to * print the number of that type in either hexadecimal or decimal. * These are an extension beyond what C99 specifies must be in * stdint.h. * * Compilers tested (all with 0 warnings at their highest respective * settings): Borland Turbo C 2.0, WATCOM C/C++ 11.0 (16 bits and 32 * bits), Microsoft Visual C++ 6.0 (32 bit), Microsoft Visual Studio * .net (VC7), Intel C++ 4.0, GNU gcc v3.3.3 * * This file should be considered a work in progress. Suggestions for * improvements, especially those which increase coverage are strongly * encouraged. * * Acknowledgements * * The following people have made significant contributions to the * development and testing of this file: * * Chris Howie * John Steele Scott * Dave Thorup * John Dill * Florian Wobbe * Christopher Sean Morrison * */ #include #include #include /* * For gcc with _STDINT_H, fill in the PRINTF_INT*_MODIFIER macros, and * do nothing else. On the Mac OS X version of gcc this is _STDINT_H_. */ #if ((defined(__STDC__) && __STDC__ && defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L) || (defined (__WATCOMC__) && (defined (_STDINT_H_INCLUDED) || __WATCOMC__ >= 1250)) || (defined(__GNUC__) && (__GNUC__ > 3 || defined(_STDINT_H) || defined(_STDINT_H_) || defined (__UINT_FAST64_TYPE__)) )) && !defined (_PSTDINT_H_INCLUDED) #include #define _PSTDINT_H_INCLUDED # if defined(__GNUC__) && (defined(__x86_64__) || defined(__ppc64__)) # ifndef PRINTF_INT64_MODIFIER # define PRINTF_INT64_MODIFIER "l" # endif # ifndef PRINTF_INT32_MODIFIER # define PRINTF_INT32_MODIFIER "" # endif # else # ifndef PRINTF_INT64_MODIFIER # define PRINTF_INT64_MODIFIER "ll" # endif # ifndef PRINTF_INT32_MODIFIER # define PRINTF_INT32_MODIFIER "l" # endif # endif # ifndef PRINTF_INT16_MODIFIER # define PRINTF_INT16_MODIFIER "h" # endif # ifndef PRINTF_INTMAX_MODIFIER # define PRINTF_INTMAX_MODIFIER PRINTF_INT64_MODIFIER # endif # ifndef PRINTF_INT64_HEX_WIDTH # define PRINTF_INT64_HEX_WIDTH "16" # endif # ifndef PRINTF_INT32_HEX_WIDTH # define PRINTF_INT32_HEX_WIDTH "8" # endif # ifndef PRINTF_INT16_HEX_WIDTH # define PRINTF_INT16_HEX_WIDTH "4" # endif # ifndef PRINTF_INT8_HEX_WIDTH # define PRINTF_INT8_HEX_WIDTH "2" # endif # ifndef PRINTF_INT64_DEC_WIDTH # define PRINTF_INT64_DEC_WIDTH "20" # endif # ifndef PRINTF_INT32_DEC_WIDTH # define PRINTF_INT32_DEC_WIDTH "10" # endif # ifndef PRINTF_INT16_DEC_WIDTH # define PRINTF_INT16_DEC_WIDTH "5" # endif # ifndef PRINTF_INT8_DEC_WIDTH # define PRINTF_INT8_DEC_WIDTH "3" # endif # ifndef PRINTF_INTMAX_HEX_WIDTH # define PRINTF_INTMAX_HEX_WIDTH PRINTF_INT64_HEX_WIDTH # endif # ifndef PRINTF_INTMAX_DEC_WIDTH # define PRINTF_INTMAX_DEC_WIDTH PRINTF_INT64_DEC_WIDTH # endif /* * Something really weird is going on with Open Watcom. Just pull some of * these duplicated definitions from Open Watcom's stdint.h file for now. */ # if defined (__WATCOMC__) && __WATCOMC__ >= 1250 # if !defined (INT64_C) # define INT64_C(x) (x + (INT64_MAX - INT64_MAX)) # endif # if !defined (UINT64_C) # define UINT64_C(x) (x + (UINT64_MAX - UINT64_MAX)) # endif # if !defined (INT32_C) # define INT32_C(x) (x + (INT32_MAX - INT32_MAX)) # endif # if !defined (UINT32_C) # define UINT32_C(x) (x + (UINT32_MAX - UINT32_MAX)) # endif # if !defined (INT16_C) # define INT16_C(x) (x) # endif # if !defined (UINT16_C) # define UINT16_C(x) (x) # endif # if !defined (INT8_C) # define INT8_C(x) (x) # endif # if !defined (UINT8_C) # define UINT8_C(x) (x) # endif # if !defined (UINT64_MAX) # define UINT64_MAX 18446744073709551615ULL # endif # if !defined (INT64_MAX) # define INT64_MAX 9223372036854775807LL # endif # if !defined (UINT32_MAX) # define UINT32_MAX 4294967295UL # endif # if !defined (INT32_MAX) # define INT32_MAX 2147483647L # endif # if !defined (INTMAX_MAX) # define INTMAX_MAX INT64_MAX # endif # if !defined (INTMAX_MIN) # define INTMAX_MIN INT64_MIN # endif # endif #endif #ifndef _PSTDINT_H_INCLUDED #define _PSTDINT_H_INCLUDED #ifndef SIZE_MAX # define SIZE_MAX (~(size_t)0) #endif /* * Deduce the type assignments from limits.h under the assumption that * integer sizes in bits are powers of 2, and follow the ANSI * definitions. */ #ifndef UINT8_MAX # define UINT8_MAX 0xff #endif #if !defined(uint8_t) && !defined(_UINT8_T) # if (UCHAR_MAX == UINT8_MAX) || defined (S_SPLINT_S) typedef unsigned char uint8_t; # define UINT8_C(v) ((uint8_t) v) # else # error "Platform not supported" # endif #endif #ifndef INT8_MAX # define INT8_MAX 0x7f #endif #ifndef INT8_MIN # define INT8_MIN INT8_C(0x80) #endif #if !defined(int8_t) && !defined(_INT8_T) # if (SCHAR_MAX == INT8_MAX) || defined (S_SPLINT_S) typedef signed char int8_t; # define INT8_C(v) ((int8_t) v) # else # error "Platform not supported" # endif #endif #ifndef UINT16_MAX # define UINT16_MAX 0xffff #endif #if !defined(uint16_t) && !defined(_UINT16_T) #if (UINT_MAX == UINT16_MAX) || defined (S_SPLINT_S) typedef unsigned int uint16_t; # ifndef PRINTF_INT16_MODIFIER # define PRINTF_INT16_MODIFIER "" # endif # define UINT16_C(v) ((uint16_t) (v)) #elif (USHRT_MAX == UINT16_MAX) typedef unsigned short uint16_t; # define UINT16_C(v) ((uint16_t) (v)) # ifndef PRINTF_INT16_MODIFIER # define PRINTF_INT16_MODIFIER "h" # endif #else #error "Platform not supported" #endif #endif #ifndef INT16_MAX # define INT16_MAX 0x7fff #endif #ifndef INT16_MIN # define INT16_MIN INT16_C(0x8000) #endif #if !defined(int16_t) && !defined(_INT16_T) #if (INT_MAX == INT16_MAX) || defined (S_SPLINT_S) typedef signed int int16_t; # define INT16_C(v) ((int16_t) (v)) # ifndef PRINTF_INT16_MODIFIER # define PRINTF_INT16_MODIFIER "" # endif #elif (SHRT_MAX == INT16_MAX) typedef signed short int16_t; # define INT16_C(v) ((int16_t) (v)) # ifndef PRINTF_INT16_MODIFIER # define PRINTF_INT16_MODIFIER "h" # endif #else #error "Platform not supported" #endif #endif #ifndef UINT32_MAX # define UINT32_MAX (0xffffffffUL) #endif #if !defined(uint32_t) && !defined(_UINT32_T) #if (ULONG_MAX == UINT32_MAX) || defined (S_SPLINT_S) typedef unsigned long uint32_t; # define UINT32_C(v) v ## UL # ifndef PRINTF_INT32_MODIFIER # define PRINTF_INT32_MODIFIER "l" # endif #elif (UINT_MAX == UINT32_MAX) typedef unsigned int uint32_t; # ifndef PRINTF_INT32_MODIFIER # define PRINTF_INT32_MODIFIER "" # endif # define UINT32_C(v) v ## U #elif (USHRT_MAX == UINT32_MAX) typedef unsigned short uint32_t; # define UINT32_C(v) ((unsigned short) (v)) # ifndef PRINTF_INT32_MODIFIER # define PRINTF_INT32_MODIFIER "" # endif #else #error "Platform not supported" #endif #endif #ifndef INT32_MAX # define INT32_MAX (0x7fffffffL) #endif #ifndef INT32_MIN # define INT32_MIN INT32_C(0x80000000) #endif #if !defined(int32_t) && !defined(_INT32_T) #if (LONG_MAX == INT32_MAX) || defined (S_SPLINT_S) typedef signed long int32_t; # define INT32_C(v) v ## L # ifndef PRINTF_INT32_MODIFIER # define PRINTF_INT32_MODIFIER "l" # endif #elif (INT_MAX == INT32_MAX) typedef signed int int32_t; # define INT32_C(v) v # ifndef PRINTF_INT32_MODIFIER # define PRINTF_INT32_MODIFIER "" # endif #elif (SHRT_MAX == INT32_MAX) typedef signed short int32_t; # define INT32_C(v) ((short) (v)) # ifndef PRINTF_INT32_MODIFIER # define PRINTF_INT32_MODIFIER "" # endif #else #error "Platform not supported" #endif #endif /* * The macro stdint_int64_defined is temporarily used to record * whether or not 64 integer support is available. It must be * defined for any 64 integer extensions for new platforms that are * added. */ #undef stdint_int64_defined #if (defined(__STDC__) && defined(__STDC_VERSION__)) || defined (S_SPLINT_S) # if (__STDC__ && __STDC_VERSION__ >= 199901L) || defined (S_SPLINT_S) # define stdint_int64_defined typedef long long int64_t; typedef unsigned long long uint64_t; # define UINT64_C(v) v ## ULL # define INT64_C(v) v ## LL # ifndef PRINTF_INT64_MODIFIER # define PRINTF_INT64_MODIFIER "ll" # endif # endif #endif #if !defined (stdint_int64_defined) # if defined(__GNUC__) # define stdint_int64_defined __extension__ typedef long long int64_t; __extension__ typedef unsigned long long uint64_t; # define UINT64_C(v) v ## ULL # define INT64_C(v) v ## LL # ifndef PRINTF_INT64_MODIFIER # define PRINTF_INT64_MODIFIER "ll" # endif # elif defined(__MWERKS__) || defined (__SUNPRO_C) || defined (__SUNPRO_CC) || defined (__APPLE_CC__) || defined (_LONG_LONG) || defined (_CRAYC) || defined (S_SPLINT_S) # define stdint_int64_defined typedef long long int64_t; typedef unsigned long long uint64_t; # define UINT64_C(v) v ## ULL # define INT64_C(v) v ## LL # ifndef PRINTF_INT64_MODIFIER # define PRINTF_INT64_MODIFIER "ll" # endif # elif (defined(__WATCOMC__) && defined(__WATCOM_INT64__)) || (defined(_MSC_VER) && _INTEGRAL_MAX_BITS >= 64) || (defined (__BORLANDC__) && __BORLANDC__ > 0x460) || defined (__alpha) || defined (__DECC) # define stdint_int64_defined typedef __int64 int64_t; typedef unsigned __int64 uint64_t; # define UINT64_C(v) v ## UI64 # define INT64_C(v) v ## I64 # ifndef PRINTF_INT64_MODIFIER # define PRINTF_INT64_MODIFIER "I64" # endif # endif #endif #if !defined (LONG_LONG_MAX) && defined (INT64_C) # define LONG_LONG_MAX INT64_C (9223372036854775807) #endif #ifndef ULONG_LONG_MAX # define ULONG_LONG_MAX UINT64_C (18446744073709551615) #endif #if !defined (INT64_MAX) && defined (INT64_C) # define INT64_MAX INT64_C (9223372036854775807) #endif #if !defined (INT64_MIN) && defined (INT64_C) # define INT64_MIN INT64_C (-9223372036854775808) #endif #if !defined (UINT64_MAX) && defined (INT64_C) # define UINT64_MAX UINT64_C (18446744073709551615) #endif /* * Width of hexadecimal for number field. */ #ifndef PRINTF_INT64_HEX_WIDTH # define PRINTF_INT64_HEX_WIDTH "16" #endif #ifndef PRINTF_INT32_HEX_WIDTH # define PRINTF_INT32_HEX_WIDTH "8" #endif #ifndef PRINTF_INT16_HEX_WIDTH # define PRINTF_INT16_HEX_WIDTH "4" #endif #ifndef PRINTF_INT8_HEX_WIDTH # define PRINTF_INT8_HEX_WIDTH "2" #endif #ifndef PRINTF_INT64_DEC_WIDTH # define PRINTF_INT64_DEC_WIDTH "20" #endif #ifndef PRINTF_INT32_DEC_WIDTH # define PRINTF_INT32_DEC_WIDTH "10" #endif #ifndef PRINTF_INT16_DEC_WIDTH # define PRINTF_INT16_DEC_WIDTH "5" #endif #ifndef PRINTF_INT8_DEC_WIDTH # define PRINTF_INT8_DEC_WIDTH "3" #endif /* * Ok, lets not worry about 128 bit integers for now. Moore's law says * we don't need to worry about that until about 2040 at which point * we'll have bigger things to worry about. */ #ifdef stdint_int64_defined typedef int64_t intmax_t; typedef uint64_t uintmax_t; # define INTMAX_MAX INT64_MAX # define INTMAX_MIN INT64_MIN # define UINTMAX_MAX UINT64_MAX # define UINTMAX_C(v) UINT64_C(v) # define INTMAX_C(v) INT64_C(v) # ifndef PRINTF_INTMAX_MODIFIER # define PRINTF_INTMAX_MODIFIER PRINTF_INT64_MODIFIER # endif # ifndef PRINTF_INTMAX_HEX_WIDTH # define PRINTF_INTMAX_HEX_WIDTH PRINTF_INT64_HEX_WIDTH # endif # ifndef PRINTF_INTMAX_DEC_WIDTH # define PRINTF_INTMAX_DEC_WIDTH PRINTF_INT64_DEC_WIDTH # endif #else typedef int32_t intmax_t; typedef uint32_t uintmax_t; # define INTMAX_MAX INT32_MAX # define UINTMAX_MAX UINT32_MAX # define UINTMAX_C(v) UINT32_C(v) # define INTMAX_C(v) INT32_C(v) # ifndef PRINTF_INTMAX_MODIFIER # define PRINTF_INTMAX_MODIFIER PRINTF_INT32_MODIFIER # endif # ifndef PRINTF_INTMAX_HEX_WIDTH # define PRINTF_INTMAX_HEX_WIDTH PRINTF_INT32_HEX_WIDTH # endif # ifndef PRINTF_INTMAX_DEC_WIDTH # define PRINTF_INTMAX_DEC_WIDTH PRINTF_INT32_DEC_WIDTH # endif #endif /* * Because this file currently only supports platforms which have * precise powers of 2 as bit sizes for the default integers, the * least definitions are all trivial. Its possible that a future * version of this file could have different definitions. */ #ifndef stdint_least_defined typedef int8_t int_least8_t; typedef uint8_t uint_least8_t; typedef int16_t int_least16_t; typedef uint16_t uint_least16_t; typedef int32_t int_least32_t; typedef uint32_t uint_least32_t; # define PRINTF_LEAST32_MODIFIER PRINTF_INT32_MODIFIER # define PRINTF_LEAST16_MODIFIER PRINTF_INT16_MODIFIER # define UINT_LEAST8_MAX UINT8_MAX # define INT_LEAST8_MAX INT8_MAX # define UINT_LEAST16_MAX UINT16_MAX # define INT_LEAST16_MAX INT16_MAX # define UINT_LEAST32_MAX UINT32_MAX # define INT_LEAST32_MAX INT32_MAX # define INT_LEAST8_MIN INT8_MIN # define INT_LEAST16_MIN INT16_MIN # define INT_LEAST32_MIN INT32_MIN # ifdef stdint_int64_defined typedef int64_t int_least64_t; typedef uint64_t uint_least64_t; # define PRINTF_LEAST64_MODIFIER PRINTF_INT64_MODIFIER # define UINT_LEAST64_MAX UINT64_MAX # define INT_LEAST64_MAX INT64_MAX # define INT_LEAST64_MIN INT64_MIN # endif #endif #undef stdint_least_defined /* * The ANSI C committee pretending to know or specify anything about * performance is the epitome of misguided arrogance. The mandate of * this file is to *ONLY* ever support that absolute minimum * definition of the fast integer types, for compatibility purposes. * No extensions, and no attempt to suggest what may or may not be a * faster integer type will ever be made in this file. Developers are * warned to stay away from these types when using this or any other * stdint.h. */ typedef int_least8_t int_fast8_t; typedef uint_least8_t uint_fast8_t; typedef int_least16_t int_fast16_t; typedef uint_least16_t uint_fast16_t; typedef int_least32_t int_fast32_t; typedef uint_least32_t uint_fast32_t; #define UINT_FAST8_MAX UINT_LEAST8_MAX #define INT_FAST8_MAX INT_LEAST8_MAX #define UINT_FAST16_MAX UINT_LEAST16_MAX #define INT_FAST16_MAX INT_LEAST16_MAX #define UINT_FAST32_MAX UINT_LEAST32_MAX #define INT_FAST32_MAX INT_LEAST32_MAX #define INT_FAST8_MIN INT_LEAST8_MIN #define INT_FAST16_MIN INT_LEAST16_MIN #define INT_FAST32_MIN INT_LEAST32_MIN #ifdef stdint_int64_defined typedef int_least64_t int_fast64_t; typedef uint_least64_t uint_fast64_t; # define UINT_FAST64_MAX UINT_LEAST64_MAX # define INT_FAST64_MAX INT_LEAST64_MAX # define INT_FAST64_MIN INT_LEAST64_MIN #endif #undef stdint_int64_defined /* * Whatever piecemeal, per compiler thing we can do about the wchar_t * type limits. */ #if defined(__WATCOMC__) || defined(_MSC_VER) || defined (__GNUC__) # include # ifndef WCHAR_MIN # define WCHAR_MIN 0 # endif # ifndef WCHAR_MAX # define WCHAR_MAX ((wchar_t)-1) # endif #endif /* * Whatever piecemeal, per compiler/platform thing we can do about the * (u)intptr_t types and limits. */ #if (defined (_MSC_VER) && defined (_UINTPTR_T_DEFINED)) || defined (_UINTPTR_T) # define STDINT_H_UINTPTR_T_DEFINED #endif #ifndef STDINT_H_UINTPTR_T_DEFINED # if defined (__alpha__) || defined (__ia64__) || defined (__x86_64__) || defined (_WIN64) || defined (__ppc64__) # define stdint_intptr_bits 64 # elif defined (__WATCOMC__) || defined (__TURBOC__) # if defined(__TINY__) || defined(__SMALL__) || defined(__MEDIUM__) # define stdint_intptr_bits 16 # else # define stdint_intptr_bits 32 # endif # elif defined (__i386__) || defined (_WIN32) || defined (WIN32) || defined (__ppc64__) # define stdint_intptr_bits 32 # elif defined (__INTEL_COMPILER) /* TODO -- what did Intel do about x86-64? */ # else /* #error "This platform might not be supported yet" */ # endif # ifdef stdint_intptr_bits # define stdint_intptr_glue3_i(a,b,c) a##b##c # define stdint_intptr_glue3(a,b,c) stdint_intptr_glue3_i(a,b,c) # ifndef PRINTF_INTPTR_MODIFIER # define PRINTF_INTPTR_MODIFIER stdint_intptr_glue3(PRINTF_INT,stdint_intptr_bits,_MODIFIER) # endif # ifndef PTRDIFF_MAX # define PTRDIFF_MAX stdint_intptr_glue3(INT,stdint_intptr_bits,_MAX) # endif # ifndef PTRDIFF_MIN # define PTRDIFF_MIN stdint_intptr_glue3(INT,stdint_intptr_bits,_MIN) # endif # ifndef UINTPTR_MAX # define UINTPTR_MAX stdint_intptr_glue3(UINT,stdint_intptr_bits,_MAX) # endif # ifndef INTPTR_MAX # define INTPTR_MAX stdint_intptr_glue3(INT,stdint_intptr_bits,_MAX) # endif # ifndef INTPTR_MIN # define INTPTR_MIN stdint_intptr_glue3(INT,stdint_intptr_bits,_MIN) # endif # ifndef INTPTR_C # define INTPTR_C(x) stdint_intptr_glue3(INT,stdint_intptr_bits,_C)(x) # endif # ifndef UINTPTR_C # define UINTPTR_C(x) stdint_intptr_glue3(UINT,stdint_intptr_bits,_C)(x) # endif typedef stdint_intptr_glue3(uint,stdint_intptr_bits,_t) uintptr_t; typedef stdint_intptr_glue3( int,stdint_intptr_bits,_t) intptr_t; # else /* TODO -- This following is likely wrong for some platforms, and does nothing for the definition of uintptr_t. */ typedef ptrdiff_t intptr_t; # endif # define STDINT_H_UINTPTR_T_DEFINED #endif /* * Assumes sig_atomic_t is signed and we have a 2s complement machine. */ #ifndef SIG_ATOMIC_MAX # define SIG_ATOMIC_MAX ((((sig_atomic_t) 1) << (sizeof (sig_atomic_t)*CHAR_BIT-1)) - 1) #endif #endif #if defined (__TEST_PSTDINT_FOR_CORRECTNESS) /* * Please compile with the maximum warning settings to make sure macros are not * defined more than once. */ #include #include #include #define glue3_aux(x,y,z) x ## y ## z #define glue3(x,y,z) glue3_aux(x,y,z) #define DECLU(bits) glue3(uint,bits,_t) glue3(u,bits,) = glue3(UINT,bits,_C) (0); #define DECLI(bits) glue3(int,bits,_t) glue3(i,bits,) = glue3(INT,bits,_C) (0); #define DECL(us,bits) glue3(DECL,us,) (bits) #define TESTUMAX(bits) glue3(u,bits,) = ~glue3(u,bits,); if (glue3(UINT,bits,_MAX) != glue3(u,bits,)) printf ("Something wrong with UINT%d_MAX\n", bits) int main () { DECL(I,8) DECL(U,8) DECL(I,16) DECL(U,16) DECL(I,32) DECL(U,32) #ifdef INT64_MAX DECL(I,64) DECL(U,64) #endif intmax_t imax = INTMAX_C(0); uintmax_t umax = UINTMAX_C(0); char str0[256], str1[256]; sprintf (str0, "%d %x\n", 0, ~0); sprintf (str1, "%d %x\n", i8, ~0); if (0 != strcmp (str0, str1)) printf ("Something wrong with i8 : %s\n", str1); sprintf (str1, "%u %x\n", u8, ~0); if (0 != strcmp (str0, str1)) printf ("Something wrong with u8 : %s\n", str1); sprintf (str1, "%d %x\n", i16, ~0); if (0 != strcmp (str0, str1)) printf ("Something wrong with i16 : %s\n", str1); sprintf (str1, "%u %x\n", u16, ~0); if (0 != strcmp (str0, str1)) printf ("Something wrong with u16 : %s\n", str1); sprintf (str1, "%" PRINTF_INT32_MODIFIER "d %x\n", i32, ~0); if (0 != strcmp (str0, str1)) printf ("Something wrong with i32 : %s\n", str1); sprintf (str1, "%" PRINTF_INT32_MODIFIER "u %x\n", u32, ~0); if (0 != strcmp (str0, str1)) printf ("Something wrong with u32 : %s\n", str1); #ifdef INT64_MAX sprintf (str1, "%" PRINTF_INT64_MODIFIER "d %x\n", i64, ~0); if (0 != strcmp (str0, str1)) printf ("Something wrong with i64 : %s\n", str1); #endif sprintf (str1, "%" PRINTF_INTMAX_MODIFIER "d %x\n", imax, ~0); if (0 != strcmp (str0, str1)) printf ("Something wrong with imax : %s\n", str1); sprintf (str1, "%" PRINTF_INTMAX_MODIFIER "u %x\n", umax, ~0); if (0 != strcmp (str0, str1)) printf ("Something wrong with umax : %s\n", str1); TESTUMAX(8); TESTUMAX(16); TESTUMAX(32); #ifdef INT64_MAX TESTUMAX(64); #endif return EXIT_SUCCESS; } #endif hat-trie-0.1.2/test/000077500000000000000000000000001312163177500142145ustar00rootroot00000000000000hat-trie-0.1.2/test/Makefile.am000066400000000000000000000011071312163177500162470ustar00rootroot00000000000000 TESTS = check_ahtable check_hattrie check_PROGRAMS = check_ahtable check_hattrie bench_sorted_iter check_ahtable_SOURCES = check_ahtable.c str_map.c check_ahtable_LDADD = $(top_builddir)/src/libhat-trie.la check_ahtable_CPPFLAGS = -I$(top_builddir)/src check_hattrie_SOURCES = check_hattrie.c str_map.c check_hattrie_LDADD = $(top_builddir)/src/libhat-trie.la check_hattrie_CPPFLAGS = -I$(top_builddir)/src bench_sorted_iter_SOURCES = bench_sorted_iter.c bench_sorted_iter_LDADD = $(top_builddir)/src/libhat-trie.la bench_sorted_iter_CPPFLAGS = -I$(top_builddir)/src hat-trie-0.1.2/test/bench_sorted_iter.c000066400000000000000000000033121312163177500200410ustar00rootroot00000000000000 /* A quick test of the degree to which ordered iteration is slower than unordered. */ #include "../src/hat-trie.h" #include #include /* Simple random string generation. */ void randstr(char* x, size_t len) { x[len] = '\0'; while (len > 0) { x[--len] = '\x20' + (rand() % ('\x7e' - '\x20' + 1)); } } int main() { hattrie_t* T = hattrie_create(); const size_t n = 1000000; // how many strings const size_t m_low = 50; // minimum length of each string const size_t m_high = 500; // maximum length of each string char x[501]; size_t i, m; for (i = 0; i < n; ++i) { m = m_low + rand() % (m_high - m_low); randstr(x, m); *hattrie_get(T, x, m) = 1; } hattrie_iter_t* it; clock_t t0, t; const size_t repetitions = 100; size_t r; /* iterate in unsorted order */ fprintf(stderr, "iterating out of order ... "); t0 = clock(); for (r = 0; r < repetitions; ++r) { it = hattrie_iter_begin(T, false); while (!hattrie_iter_finished(it)) { hattrie_iter_next(it); } hattrie_iter_free(it); } t = clock(); fprintf(stderr, "finished. (%0.2f seconds)\n", (double) (t - t0) / (double) CLOCKS_PER_SEC); /* iterate in sorted order */ fprintf(stderr, "iterating in order ... "); t0 = clock(); for (r = 0; r < repetitions; ++r) { it = hattrie_iter_begin(T, true); while (!hattrie_iter_finished(it)) { hattrie_iter_next(it); } hattrie_iter_free(it); } t = clock(); fprintf(stderr, "finished. (%0.2f seconds)\n", (double) (t - t0) / (double) CLOCKS_PER_SEC); hattrie_free(T); return 0; } hat-trie-0.1.2/test/check_ahtable.c000066400000000000000000000135751312163177500171300ustar00rootroot00000000000000 #include #include #include #include "str_map.h" #include "../src/ahtable.h" /* Simple random string generation. */ void randstr(char* x, size_t len) { x[len] = '\0'; while (len > 0) { x[--len] = '\x20' + (rand() % ('\x7e' - '\x20' + 1)); } } const size_t n = 100000; // how many unique strings const size_t m_low = 50; // minimum length of each string const size_t m_high = 500; // maximum length of each string const size_t k = 200000; // number of insertions char** xs; ahtable_t* T; str_map* M; void setup() { fprintf(stderr, "generating %zu keys ... ", n); xs = malloc(n * sizeof(char*)); size_t i; size_t m; for (i = 0; i < n; ++i) { m = m_low + rand() % (m_high - m_low); xs[i] = malloc(m + 1); randstr(xs[i], m); } T = ahtable_create(); M = str_map_create(); fprintf(stderr, "done.\n"); } void teardown() { ahtable_free(T); str_map_destroy(M); size_t i; for (i = 0; i < n; ++i) { free(xs[i]); } free(xs); } void test_ahtable_insert() { fprintf(stderr, "inserting %zu keys ... \n", k); size_t i, j; value_t* u; value_t v; for (j = 0; j < k; ++j) { i = rand() % n; v = 1 + str_map_get(M, xs[i], strlen(xs[i])); str_map_set(M, xs[i], strlen(xs[i]), v); u = ahtable_get(T, xs[i], strlen(xs[i])); *u += 1; if (*u != v) { fprintf(stderr, "[error] tally mismatch (reported: %lu, correct: %lu)\n", *u, v); } } fprintf(stderr, "sizeof: %zu\n", ahtable_sizeof(T)); /* delete some keys */ for (j = 0; i < k/100; ++j) { i = rand() % n; ahtable_del(T, xs[i], strlen(xs[i])); str_map_del(M, xs[i], strlen(xs[i])); u = ahtable_tryget(T, xs[i], strlen(xs[i])); if (u) { fprintf(stderr, "[error] deleted node found in ahtable\n"); } } fprintf(stderr, "done.\n"); } void test_ahtable_iteration() { fprintf(stderr, "iterating through %zu keys ... \n", k); ahtable_iter_t* i = ahtable_iter_begin(T, false); size_t count = 0; value_t* u; value_t v; size_t len; const char* key; while (!ahtable_iter_finished(i)) { ++count; key = ahtable_iter_key(i, &len); u = ahtable_iter_val(i); v = str_map_get(M, key, len); if (*u != v) { if (v == 0) { fprintf(stderr, "[error] incorrect iteration (%lu, %lu)\n", *u, v); } else { fprintf(stderr, "[error] incorrect iteration tally (%lu, %lu)\n", *u, v); } } // this way we will see an error if the same key is iterated through // twice str_map_set(M, key, len, 0); ahtable_iter_next(i); } if (count != M->m) { fprintf(stderr, "[error] iterated through %zu element, expected %zu\n", count, M->m); } ahtable_iter_free(i); fprintf(stderr, "done.\n"); } int cmpkey(const char* a, size_t ka, const char* b, size_t kb) { int c = memcmp(a, b, ka < kb ? ka : kb); return c == 0 ? (int) ka - (int) kb : c; } void test_ahtable_sorted_iteration() { fprintf(stderr, "iterating in order through %zu keys ... \n", k); ahtable_iter_t* i = ahtable_iter_begin(T, true); size_t count = 0; value_t* u; value_t v; char* prev_key = malloc(m_high + 1); size_t prev_len = 0; const char *key = NULL; size_t len = 0; while (!ahtable_iter_finished(i)) { memcpy(prev_key, key, len); prev_len = len; ++count; key = ahtable_iter_key(i, &len); if (prev_key != NULL && cmpkey(prev_key, prev_len, key, len) > 0) { fprintf(stderr, "[error] iteration is not correctly ordered.\n"); } u = ahtable_iter_val(i); v = str_map_get(M, key, len); if (*u != v) { if (v == 0) { fprintf(stderr, "[error] incorrect iteration (%lu, %lu)\n", *u, v); } else { fprintf(stderr, "[error] incorrect iteration tally (%lu, %lu)\n", *u, v); } } // this way we will see an error if the same key is iterated through // twice str_map_set(M, key, len, 0); ahtable_iter_next(i); } ahtable_iter_free(i); free(prev_key); fprintf(stderr, "done.\n"); } void test_ahtable_save_load() { fprintf(stderr, "saving ahtable ... \n"); FILE* fd_w = fopen("test.aht", "w"); ahtable_save(T, fd_w); fclose(fd_w); fprintf(stderr, "loading ahtable ... \n"); FILE* fd_r = fopen("test.aht", "r"); ahtable_t* U = ahtable_load(fd_r); fclose(fd_r); fprintf(stderr, "comparing ahtable ... \n"); ahtable_iter_t* i = ahtable_iter_begin(T, false); ahtable_iter_t* j = ahtable_iter_begin(U, false); const char *k1 = NULL; const char *k2 = NULL; value_t* v1; value_t* v2; size_t len1 = 0; size_t len2 = 0; while (!ahtable_iter_finished(i) && !ahtable_iter_finished(j)) { k1 = ahtable_iter_key(i, &len1); v1 = ahtable_iter_val(i); k2 = ahtable_iter_key(j, &len2); v2 = ahtable_iter_val(j); if(len1 != len2) { fprintf(stderr, "[error] key lengths don't match (%lu, %lu)\n", len1, len2); } if(*v1 != *v2) { fprintf(stderr, "[error] values don't match (%lu, %lu)\n", *v1, *v2); } ahtable_iter_next(i); ahtable_iter_next(j); } ahtable_iter_free(i); ahtable_iter_free(j); } int main() { setup(); test_ahtable_insert(); test_ahtable_iteration(); teardown(); setup(); test_ahtable_insert(); test_ahtable_sorted_iteration(); teardown(); setup(); test_ahtable_insert(); test_ahtable_save_load(); teardown(); return 0; } hat-trie-0.1.2/test/check_hattrie.c000066400000000000000000000131611312163177500171570ustar00rootroot00000000000000 #include #include #include #include "str_map.h" #include "../src/hat-trie.h" /* Simple random string generation. */ void randstr(char* x, size_t len) { x[len] = '\0'; while (len > 0) { x[--len] = '\x20' + (rand() % ('\x7e' - '\x20' + 1)); } } const size_t n = 100000; // how many unique strings const size_t m_low = 50; // minimum length of each string const size_t m_high = 500; // maximum length of each string const size_t k = 200000; // number of insertions const size_t d = 50000; char** xs; char** ds; hattrie_t* T; str_map* M; void setup() { fprintf(stderr, "generating %zu keys ... ", n); xs = malloc(n * sizeof(char*)); ds = malloc(d * sizeof(char*)); size_t i; size_t m; for (i = 0; i < n; ++i) { m = m_low + rand() % (m_high - m_low); xs[i] = malloc(m + 1); randstr(xs[i], m); } for (i = 0; i < d; ++i) { m = rand()%n; ds[i] = xs[m]; } T = hattrie_create(); M = str_map_create(); fprintf(stderr, "done.\n"); } void teardown() { hattrie_free(T); str_map_destroy(M); size_t i; for (i = 0; i < n; ++i) { free(xs[i]); } free(xs); free(ds); } void test_hattrie_insert() { fprintf(stderr, "inserting %zu keys ... \n", k); size_t i, j; value_t* u; value_t v; for (j = 0; j < k; ++j) { i = rand() % n; v = 1 + str_map_get(M, xs[i], strlen(xs[i])); str_map_set(M, xs[i], strlen(xs[i]), v); u = hattrie_get(T, xs[i], strlen(xs[i])); *u += 1; if (*u != v) { fprintf(stderr, "[error] tally mismatch (reported: %lu, correct: %lu)\n", *u, v); } } fprintf(stderr, "sizeof: %zu\n", hattrie_sizeof(T)); fprintf(stderr, "deleting %zu keys ... \n", d); for (j = 0; j < d; ++j) { str_map_del(M, ds[j], strlen(ds[j])); hattrie_del(T, ds[j], strlen(ds[j])); u = hattrie_tryget(T, ds[j], strlen(ds[j])); if (u) { fprintf(stderr, "[error] item %zu still found in trie after delete\n", j); } } fprintf(stderr, "done.\n"); } void test_hattrie_iteration() { fprintf(stderr, "iterating through %zu keys ... \n", k); hattrie_iter_t* i = hattrie_iter_begin(T, false); size_t count = 0; value_t* u; value_t v; size_t len; const char* key; while (!hattrie_iter_finished(i)) { ++count; key = hattrie_iter_key(i, &len); u = hattrie_iter_val(i); v = str_map_get(M, key, len); if (*u != v) { if (v == 0) { fprintf(stderr, "[error] incorrect iteration (%lu, %lu)\n", *u, v); } else { fprintf(stderr, "[error] incorrect iteration tally (%lu, %lu)\n", *u, v); } } // this way we will see an error if the same key is iterated through // twice str_map_set(M, key, len, 0); hattrie_iter_next(i); } if (count != M->m) { fprintf(stderr, "[error] iterated through %zu element, expected %zu\n", count, M->m); } hattrie_iter_free(i); fprintf(stderr, "done.\n"); } int cmpkey(const char* a, size_t ka, const char* b, size_t kb) { int c = memcmp(a, b, ka < kb ? ka : kb); return c == 0 ? (int) ka - (int) kb : c; } void test_hattrie_sorted_iteration() { fprintf(stderr, "iterating in order through %zu keys ... \n", k); hattrie_iter_t* i = hattrie_iter_begin(T, true); size_t count = 0; value_t* u; value_t v; char* key_copy = malloc(m_high + 1); char* prev_key = malloc(m_high + 1); memset(prev_key, 0, m_high + 1); size_t prev_len = 0; const char *key = NULL; size_t len = 0; while (!hattrie_iter_finished(i)) { memcpy(prev_key, key_copy, len); prev_key[len] = '\0'; prev_len = len; ++count; key = hattrie_iter_key(i, &len); /* memory for key may be changed on iter, copy it */ strncpy(key_copy, key, len); if (prev_key != NULL && cmpkey(prev_key, prev_len, key, len) > 0) { fprintf(stderr, "[error] iteration is not correctly ordered.\n"); } u = hattrie_iter_val(i); v = str_map_get(M, key, len); if (*u != v) { if (v == 0) { fprintf(stderr, "[error] incorrect iteration (%lu, %lu)\n", *u, v); } else { fprintf(stderr, "[error] incorrect iteration tally (%lu, %lu)\n", *u, v); } } // this way we will see an error if the same key is iterated through // twice str_map_set(M, key, len, 0); hattrie_iter_next(i); } if (count != M->m) { fprintf(stderr, "[error] iterated through %zu element, expected %zu\n", count, M->m); } hattrie_iter_free(i); free(prev_key); free(key_copy); fprintf(stderr, "done.\n"); } void test_trie_non_ascii() { fprintf(stderr, "checking non-ascii... \n"); value_t* u; hattrie_t* T = hattrie_create(); char* txt = "\x81\x70"; u = hattrie_get(T, txt, strlen(txt)); *u = 10; u = hattrie_tryget(T, txt, strlen(txt)); if (*u != 10){ fprintf(stderr, "can't store non-ascii strings\n"); } hattrie_free(T); fprintf(stderr, "done.\n"); } int main() { test_trie_non_ascii(); setup(); test_hattrie_insert(); test_hattrie_iteration(); teardown(); setup(); test_hattrie_insert(); test_hattrie_sorted_iteration(); teardown(); return 0; } hat-trie-0.1.2/test/str_map.c000066400000000000000000000113241312163177500160260ustar00rootroot00000000000000 /* * This file is part of fastq-tools. * * Copyright (c) 2011 by Daniel C. Jones * */ #include "str_map.h" #include "misc.h" #include #include #include static const size_t INITIAL_TABLE_SIZE = 16; static const double MAX_LOAD = 0.77; /* * Paul Hsieh's SuperFastHash * http://www.azillionmonkeys.com/qed/hash.html */ #undef get16bits #if (defined(__GNUC__) && defined(__i386__)) || defined(__WATCOMC__) \ || defined(_MSC_VER) || defined (__BORLANDC__) || defined (__TURBOC__) #define get16bits(d) (*((const uint16_t *) (d))) #endif #if !defined (get16bits) #define get16bits(d) ((((uint32_t)(((const uint8_t *)(d))[1])) << 8)\ +(uint32_t)(((const uint8_t *)(d))[0]) ) #endif static uint32_t hash(const char * data, size_t len) { uint32_t hash = len, tmp; int rem; if (len <= 0 || data == NULL) return 0; rem = len & 3; len >>= 2; /* Main loop */ for (;len > 0; len--) { hash += get16bits (data); tmp = (get16bits (data+2) << 11) ^ hash; hash = (hash << 16) ^ tmp; data += 2*sizeof (uint16_t); hash += hash >> 11; } /* Handle end cases */ switch (rem) { case 3: hash += get16bits (data); hash ^= hash << 16; hash ^= data[sizeof (uint16_t)] << 18; hash += hash >> 11; break; case 2: hash += get16bits (data); hash ^= hash << 11; hash += hash >> 17; break; case 1: hash += *data; hash ^= hash << 10; hash += hash >> 1; } /* Force "avalanching" of final 127 bits */ hash ^= hash << 3; hash += hash >> 5; hash ^= hash << 4; hash += hash >> 17; hash ^= hash << 25; hash += hash >> 6; return hash; } static void rehash(str_map* T, size_t new_n); static void clear(str_map*); str_map* str_map_create() { str_map* T = malloc_or_die(sizeof(str_map)); T->A = malloc_or_die(INITIAL_TABLE_SIZE * sizeof(str_map_pair*)); memset(T->A, 0, INITIAL_TABLE_SIZE * sizeof(str_map_pair*)); T->n = INITIAL_TABLE_SIZE; T->m = 0; T->max_m = T->n * MAX_LOAD; return T; } void str_map_destroy(str_map* T) { if (T != NULL) { clear(T); free(T->A); free(T); } } void clear(str_map* T) { str_map_pair* u; size_t i; for (i = 0; i < T->n; i++) { while (T->A[i]) { u = T->A[i]->next; free(T->A[i]->key); free(T->A[i]); T->A[i] = u; } } T->m = 0; } static void insert_without_copy(str_map* T, str_map_pair* V) { uint32_t h = hash(V->key, V->keylen) % T->n; V->next = T->A[h]; T->A[h] = V; T->m++; } static void rehash(str_map* T, size_t new_n) { str_map U; U.n = new_n; U.m = 0; U.max_m = U.n * MAX_LOAD; U.A = malloc_or_die(U.n * sizeof(str_map_pair*)); memset(U.A, 0, U.n * sizeof(str_map_pair*)); str_map_pair *j, *k; size_t i; for (i = 0; i < T->n; i++) { j = T->A[i]; while (j) { k = j->next; insert_without_copy(&U, j); j = k; } T->A[i] = NULL; } free(T->A); T->A = U.A; T->n = U.n; T->max_m = U.max_m; } void str_map_set(str_map* T, const char* key, size_t keylen, value_t value) { if (T->m >= T->max_m) rehash(T, T->n * 2); uint32_t h = hash(key, keylen) % T->n; str_map_pair* u = T->A[h]; while (u) { if (u->keylen == keylen && memcmp(u->key, key, keylen) == 0) { u->value = value; return; } u = u->next; } u = malloc_or_die(sizeof(str_map_pair)); u->key = malloc_or_die(keylen); memcpy(u->key, key, keylen); u->keylen = keylen; u->value = value; u->next = T->A[h]; T->A[h] = u; T->m++; } value_t str_map_get(const str_map* T, const char* key, size_t keylen) { uint32_t h = hash(key, keylen) % T->n; str_map_pair* u = T->A[h]; while (u) { if (u->keylen == keylen && memcmp(u->key, key, keylen) == 0) { return u->value; } u = u->next; } return 0; } void str_map_del(str_map* T, const char* key, size_t keylen) { uint32_t h = hash(key, keylen) % T->n; str_map_pair* u = T->A[h]; str_map_pair* p = NULL; while (u) { if (u->keylen == keylen && memcmp(u->key, key, keylen) == 0) { if (p) { p->next = u->next; } else { T->A[h] = u->next; } free(u->key); free(u); --T->m; return; } p = u; u = u->next; } } hat-trie-0.1.2/test/str_map.h000066400000000000000000000017631312163177500160410ustar00rootroot00000000000000/* * Copyright (c) 2011 by Daniel C. Jones * * hash : * A quick and simple hash table mapping strings to things. * */ #ifndef ISOLATOR_STR_MAP_H #define ISOLATOR_STR_MAP_H #if defined(__cplusplus) extern "C" { #endif #include #include #include "common.h" typedef struct str_map_pair_ { char* key; size_t keylen; value_t value; struct str_map_pair_* next; } str_map_pair; typedef struct { str_map_pair** A; /* table proper */ size_t n; /* table size */ size_t m; /* hashed items */ size_t max_m; /* max hashed items before rehash */ } str_map; str_map* str_map_create(void); void str_map_destroy(str_map*); void str_map_set(str_map*, const char* key, size_t keylen, value_t value); value_t str_map_get(const str_map*, const char* key, size_t keylen); void str_map_del(str_map* T, const char* key, size_t keylen); #if defined(__cplusplus) } #endif #endif