160484Sobrien/* An expandable hash tables datatype. 2218822Sdim Copyright (C) 1999, 2000, 2001, 2002, 2003, 2004 3218822Sdim Free Software Foundation, Inc. 460484Sobrien Contributed by Vladimir Makarov (vmakarov@cygnus.com). 560484Sobrien 660484SobrienThis file is part of the libiberty library. 760484SobrienLibiberty is free software; you can redistribute it and/or 860484Sobrienmodify it under the terms of the GNU Library General Public 960484SobrienLicense as published by the Free Software Foundation; either 1060484Sobrienversion 2 of the License, or (at your option) any later version. 1160484Sobrien 1260484SobrienLibiberty is distributed in the hope that it will be useful, 1360484Sobrienbut WITHOUT ANY WARRANTY; without even the implied warranty of 1460484SobrienMERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 1560484SobrienLibrary General Public License for more details. 1660484Sobrien 1760484SobrienYou should have received a copy of the GNU Library General Public 1860484SobrienLicense along with libiberty; see the file COPYING.LIB. If 19218822Sdimnot, write to the Free Software Foundation, Inc., 51 Franklin Street - Fifth Floor, 20218822SdimBoston, MA 02110-1301, USA. */ 2160484Sobrien 2260484Sobrien/* This package implements basic hash table functionality. It is possible 2360484Sobrien to search for an entry, create an entry and destroy an entry. 2460484Sobrien 2560484Sobrien Elements in the table are generic pointers. 2660484Sobrien 2760484Sobrien The size of the table is not fixed; if the occupancy of the table 2860484Sobrien grows too high the hash table will be expanded. 2960484Sobrien 3060484Sobrien The abstract data implementation is based on generalized Algorithm D 3160484Sobrien from Knuth's book "The art of computer programming". Hash table is 3260484Sobrien expanded by creation of new hash table and transferring elements from 3360484Sobrien the old table to the new table. */ 3460484Sobrien 3560484Sobrien#ifdef HAVE_CONFIG_H 3660484Sobrien#include "config.h" 3760484Sobrien#endif 3860484Sobrien 3960484Sobrien#include <sys/types.h> 4060484Sobrien 4160484Sobrien#ifdef HAVE_STDLIB_H 4260484Sobrien#include <stdlib.h> 4360484Sobrien#endif 4477298Sobrien#ifdef HAVE_STRING_H 4577298Sobrien#include <string.h> 4677298Sobrien#endif 47130561Sobrien#ifdef HAVE_MALLOC_H 48130561Sobrien#include <malloc.h> 49130561Sobrien#endif 50218822Sdim#ifdef HAVE_LIMITS_H 51218822Sdim#include <limits.h> 52218822Sdim#endif 53218822Sdim#ifdef HAVE_STDINT_H 54218822Sdim#include <stdint.h> 55218822Sdim#endif 56130561Sobrien 5760484Sobrien#include <stdio.h> 5860484Sobrien 5960484Sobrien#include "libiberty.h" 60218822Sdim#include "ansidecl.h" 6160484Sobrien#include "hashtab.h" 6260484Sobrien 63218822Sdim#ifndef CHAR_BIT 64218822Sdim#define CHAR_BIT 8 65218822Sdim#endif 6660484Sobrien 67218822Sdimstatic unsigned int higher_prime_index (unsigned long); 68218822Sdimstatic hashval_t htab_mod_1 (hashval_t, hashval_t, hashval_t, int); 69218822Sdimstatic hashval_t htab_mod (hashval_t, htab_t); 70218822Sdimstatic hashval_t htab_mod_m2 (hashval_t, htab_t); 71218822Sdimstatic hashval_t hash_pointer (const void *); 72218822Sdimstatic int eq_pointer (const void *, const void *); 73218822Sdimstatic int htab_expand (htab_t); 74218822Sdimstatic PTR *find_empty_slot_for_expand (htab_t, hashval_t); 7560484Sobrien 7677298Sobrien/* At some point, we could make these be NULL, and modify the 7777298Sobrien hash-table routines to handle NULL specially; that would avoid 7877298Sobrien function-call overhead for the common case of hashing pointers. */ 7977298Sobrienhtab_hash htab_hash_pointer = hash_pointer; 8077298Sobrienhtab_eq htab_eq_pointer = eq_pointer; 8177298Sobrien 82218822Sdim/* Table of primes and multiplicative inverses. 8377298Sobrien 84218822Sdim Note that these are not minimally reduced inverses. Unlike when generating 85218822Sdim code to divide by a constant, we want to be able to use the same algorithm 86218822Sdim all the time. All of these inverses (are implied to) have bit 32 set. 87218822Sdim 88218822Sdim For the record, here's the function that computed the table; it's a 89218822Sdim vastly simplified version of the function of the same name from gcc. */ 90218822Sdim 91218822Sdim#if 0 92218822Sdimunsigned int 93218822Sdimceil_log2 (unsigned int x) 9460484Sobrien{ 95218822Sdim int i; 96218822Sdim for (i = 31; i >= 0 ; --i) 97218822Sdim if (x > (1u << i)) 98218822Sdim return i+1; 99218822Sdim abort (); 100218822Sdim} 10160484Sobrien 102218822Sdimunsigned int 103218822Sdimchoose_multiplier (unsigned int d, unsigned int *mlp, unsigned char *shiftp) 104218822Sdim{ 105218822Sdim unsigned long long mhigh; 106218822Sdim double nx; 107218822Sdim int lgup, post_shift; 108218822Sdim int pow, pow2; 109218822Sdim int n = 32, precision = 32; 11060484Sobrien 111218822Sdim lgup = ceil_log2 (d); 112218822Sdim pow = n + lgup; 113218822Sdim pow2 = n + lgup - precision; 114218822Sdim 115218822Sdim nx = ldexp (1.0, pow) + ldexp (1.0, pow2); 116218822Sdim mhigh = nx / d; 117218822Sdim 118218822Sdim *shiftp = lgup - 1; 119218822Sdim *mlp = mhigh; 120218822Sdim return mhigh >> 32; 121218822Sdim} 122218822Sdim#endif 123218822Sdim 124218822Sdimstruct prime_ent 125218822Sdim{ 126218822Sdim hashval_t prime; 127218822Sdim hashval_t inv; 128218822Sdim hashval_t inv_m2; /* inverse of prime-2 */ 129218822Sdim hashval_t shift; 130218822Sdim}; 131218822Sdim 132218822Sdimstatic struct prime_ent const prime_tab[] = { 133218822Sdim { 7, 0x24924925, 0x9999999b, 2 }, 134218822Sdim { 13, 0x3b13b13c, 0x745d1747, 3 }, 135218822Sdim { 31, 0x08421085, 0x1a7b9612, 4 }, 136218822Sdim { 61, 0x0c9714fc, 0x15b1e5f8, 5 }, 137218822Sdim { 127, 0x02040811, 0x0624dd30, 6 }, 138218822Sdim { 251, 0x05197f7e, 0x073260a5, 7 }, 139218822Sdim { 509, 0x01824366, 0x02864fc8, 8 }, 140218822Sdim { 1021, 0x00c0906d, 0x014191f7, 9 }, 141218822Sdim { 2039, 0x0121456f, 0x0161e69e, 10 }, 142218822Sdim { 4093, 0x00300902, 0x00501908, 11 }, 143218822Sdim { 8191, 0x00080041, 0x00180241, 12 }, 144218822Sdim { 16381, 0x000c0091, 0x00140191, 13 }, 145218822Sdim { 32749, 0x002605a5, 0x002a06e6, 14 }, 146218822Sdim { 65521, 0x000f00e2, 0x00110122, 15 }, 147218822Sdim { 131071, 0x00008001, 0x00018003, 16 }, 148218822Sdim { 262139, 0x00014002, 0x0001c004, 17 }, 149218822Sdim { 524287, 0x00002001, 0x00006001, 18 }, 150218822Sdim { 1048573, 0x00003001, 0x00005001, 19 }, 151218822Sdim { 2097143, 0x00004801, 0x00005801, 20 }, 152218822Sdim { 4194301, 0x00000c01, 0x00001401, 21 }, 153218822Sdim { 8388593, 0x00001e01, 0x00002201, 22 }, 154218822Sdim { 16777213, 0x00000301, 0x00000501, 23 }, 155218822Sdim { 33554393, 0x00001381, 0x00001481, 24 }, 156218822Sdim { 67108859, 0x00000141, 0x000001c1, 25 }, 157218822Sdim { 134217689, 0x000004e1, 0x00000521, 26 }, 158218822Sdim { 268435399, 0x00000391, 0x000003b1, 27 }, 159218822Sdim { 536870909, 0x00000019, 0x00000029, 28 }, 160218822Sdim { 1073741789, 0x0000008d, 0x00000095, 29 }, 161218822Sdim { 2147483647, 0x00000003, 0x00000007, 30 }, 162218822Sdim /* Avoid "decimal constant so large it is unsigned" for 4294967291. */ 163218822Sdim { 0xfffffffb, 0x00000006, 0x00000008, 31 } 164218822Sdim}; 165218822Sdim 166218822Sdim/* The following function returns an index into the above table of the 167218822Sdim nearest prime number which is greater than N, and near a power of two. */ 168218822Sdim 169218822Sdimstatic unsigned int 170218822Sdimhigher_prime_index (unsigned long n) 171218822Sdim{ 172218822Sdim unsigned int low = 0; 173218822Sdim unsigned int high = sizeof(prime_tab) / sizeof(prime_tab[0]); 174218822Sdim 17577298Sobrien while (low != high) 17660484Sobrien { 177218822Sdim unsigned int mid = low + (high - low) / 2; 178218822Sdim if (n > prime_tab[mid].prime) 17977298Sobrien low = mid + 1; 18077298Sobrien else 18177298Sobrien high = mid; 18260484Sobrien } 18360484Sobrien 18477298Sobrien /* If we've run out of primes, abort. */ 185218822Sdim if (n > prime_tab[low].prime) 18677298Sobrien { 18777298Sobrien fprintf (stderr, "Cannot find prime bigger than %lu\n", n); 18877298Sobrien abort (); 18977298Sobrien } 19077298Sobrien 191218822Sdim return low; 19260484Sobrien} 19360484Sobrien 19477298Sobrien/* Returns a hash code for P. */ 19577298Sobrien 19677298Sobrienstatic hashval_t 197218822Sdimhash_pointer (const PTR p) 19877298Sobrien{ 19977298Sobrien return (hashval_t) ((long)p >> 3); 20077298Sobrien} 20177298Sobrien 20277298Sobrien/* Returns non-zero if P1 and P2 are equal. */ 20377298Sobrien 20477298Sobrienstatic int 205218822Sdimeq_pointer (const PTR p1, const PTR p2) 20677298Sobrien{ 20777298Sobrien return p1 == p2; 20877298Sobrien} 20977298Sobrien 210218822Sdim 211218822Sdim/* The parens around the function names in the next two definitions 212218822Sdim are essential in order to prevent macro expansions of the name. 213218822Sdim The bodies, however, are expanded as expected, so they are not 214218822Sdim recursive definitions. */ 215218822Sdim 216218822Sdim/* Return the current size of given hash table. */ 217218822Sdim 218218822Sdim#define htab_size(htab) ((htab)->size) 219218822Sdim 220218822Sdimsize_t 221218822Sdim(htab_size) (htab_t htab) 222218822Sdim{ 223218822Sdim return htab_size (htab); 224218822Sdim} 225218822Sdim 226218822Sdim/* Return the current number of elements in given hash table. */ 227218822Sdim 228218822Sdim#define htab_elements(htab) ((htab)->n_elements - (htab)->n_deleted) 229218822Sdim 230218822Sdimsize_t 231218822Sdim(htab_elements) (htab_t htab) 232218822Sdim{ 233218822Sdim return htab_elements (htab); 234218822Sdim} 235218822Sdim 236218822Sdim/* Return X % Y. */ 237218822Sdim 238218822Sdimstatic inline hashval_t 239218822Sdimhtab_mod_1 (hashval_t x, hashval_t y, hashval_t inv, int shift) 240218822Sdim{ 241218822Sdim /* The multiplicative inverses computed above are for 32-bit types, and 242218822Sdim requires that we be able to compute a highpart multiply. */ 243218822Sdim#ifdef UNSIGNED_64BIT_TYPE 244218822Sdim __extension__ typedef UNSIGNED_64BIT_TYPE ull; 245218822Sdim if (sizeof (hashval_t) * CHAR_BIT <= 32) 246218822Sdim { 247218822Sdim hashval_t t1, t2, t3, t4, q, r; 248218822Sdim 249218822Sdim t1 = ((ull)x * inv) >> 32; 250218822Sdim t2 = x - t1; 251218822Sdim t3 = t2 >> 1; 252218822Sdim t4 = t1 + t3; 253218822Sdim q = t4 >> shift; 254218822Sdim r = x - (q * y); 255218822Sdim 256218822Sdim return r; 257218822Sdim } 258218822Sdim#endif 259218822Sdim 260218822Sdim /* Otherwise just use the native division routines. */ 261218822Sdim return x % y; 262218822Sdim} 263218822Sdim 264218822Sdim/* Compute the primary hash for HASH given HTAB's current size. */ 265218822Sdim 266218822Sdimstatic inline hashval_t 267218822Sdimhtab_mod (hashval_t hash, htab_t htab) 268218822Sdim{ 269218822Sdim const struct prime_ent *p = &prime_tab[htab->size_prime_index]; 270218822Sdim return htab_mod_1 (hash, p->prime, p->inv, p->shift); 271218822Sdim} 272218822Sdim 273218822Sdim/* Compute the secondary hash for HASH given HTAB's current size. */ 274218822Sdim 275218822Sdimstatic inline hashval_t 276218822Sdimhtab_mod_m2 (hashval_t hash, htab_t htab) 277218822Sdim{ 278218822Sdim const struct prime_ent *p = &prime_tab[htab->size_prime_index]; 279218822Sdim return 1 + htab_mod_1 (hash, p->prime - 2, p->inv_m2, p->shift); 280218822Sdim} 281218822Sdim 28260484Sobrien/* This function creates table with length slightly longer than given 28360484Sobrien source length. Created hash table is initiated as empty (all the 284218822Sdim hash table entries are HTAB_EMPTY_ENTRY). The function returns the 285104834Sobrien created hash table, or NULL if memory allocation fails. */ 28660484Sobrien 28760484Sobrienhtab_t 288218822Sdimhtab_create_alloc (size_t size, htab_hash hash_f, htab_eq eq_f, 289218822Sdim htab_del del_f, htab_alloc alloc_f, htab_free free_f) 29060484Sobrien{ 29160484Sobrien htab_t result; 292218822Sdim unsigned int size_prime_index; 29360484Sobrien 294218822Sdim size_prime_index = higher_prime_index (size); 295218822Sdim size = prime_tab[size_prime_index].prime; 296218822Sdim 297104834Sobrien result = (htab_t) (*alloc_f) (1, sizeof (struct htab)); 298104834Sobrien if (result == NULL) 299104834Sobrien return NULL; 300104834Sobrien result->entries = (PTR *) (*alloc_f) (size, sizeof (PTR)); 301104834Sobrien if (result->entries == NULL) 302104834Sobrien { 303104834Sobrien if (free_f != NULL) 304104834Sobrien (*free_f) (result); 305104834Sobrien return NULL; 306104834Sobrien } 30760484Sobrien result->size = size; 308218822Sdim result->size_prime_index = size_prime_index; 30960484Sobrien result->hash_f = hash_f; 31060484Sobrien result->eq_f = eq_f; 31160484Sobrien result->del_f = del_f; 312104834Sobrien result->alloc_f = alloc_f; 313104834Sobrien result->free_f = free_f; 31460484Sobrien return result; 31560484Sobrien} 31660484Sobrien 317130561Sobrien/* As above, but use the variants of alloc_f and free_f which accept 318130561Sobrien an extra argument. */ 319130561Sobrien 320130561Sobrienhtab_t 321218822Sdimhtab_create_alloc_ex (size_t size, htab_hash hash_f, htab_eq eq_f, 322218822Sdim htab_del del_f, void *alloc_arg, 323218822Sdim htab_alloc_with_arg alloc_f, 324218822Sdim htab_free_with_arg free_f) 325130561Sobrien{ 326130561Sobrien htab_t result; 327218822Sdim unsigned int size_prime_index; 328130561Sobrien 329218822Sdim size_prime_index = higher_prime_index (size); 330218822Sdim size = prime_tab[size_prime_index].prime; 331218822Sdim 332130561Sobrien result = (htab_t) (*alloc_f) (alloc_arg, 1, sizeof (struct htab)); 333130561Sobrien if (result == NULL) 334130561Sobrien return NULL; 335130561Sobrien result->entries = (PTR *) (*alloc_f) (alloc_arg, size, sizeof (PTR)); 336130561Sobrien if (result->entries == NULL) 337130561Sobrien { 338130561Sobrien if (free_f != NULL) 339130561Sobrien (*free_f) (alloc_arg, result); 340130561Sobrien return NULL; 341130561Sobrien } 342130561Sobrien result->size = size; 343218822Sdim result->size_prime_index = size_prime_index; 344130561Sobrien result->hash_f = hash_f; 345130561Sobrien result->eq_f = eq_f; 346130561Sobrien result->del_f = del_f; 347130561Sobrien result->alloc_arg = alloc_arg; 348130561Sobrien result->alloc_with_arg_f = alloc_f; 349130561Sobrien result->free_with_arg_f = free_f; 350130561Sobrien return result; 351130561Sobrien} 352130561Sobrien 353130561Sobrien/* Update the function pointers and allocation parameter in the htab_t. */ 354130561Sobrien 355130561Sobrienvoid 356218822Sdimhtab_set_functions_ex (htab_t htab, htab_hash hash_f, htab_eq eq_f, 357218822Sdim htab_del del_f, PTR alloc_arg, 358218822Sdim htab_alloc_with_arg alloc_f, htab_free_with_arg free_f) 359130561Sobrien{ 360130561Sobrien htab->hash_f = hash_f; 361130561Sobrien htab->eq_f = eq_f; 362130561Sobrien htab->del_f = del_f; 363130561Sobrien htab->alloc_arg = alloc_arg; 364130561Sobrien htab->alloc_with_arg_f = alloc_f; 365130561Sobrien htab->free_with_arg_f = free_f; 366130561Sobrien} 367130561Sobrien 368104834Sobrien/* These functions exist solely for backward compatibility. */ 36977298Sobrien 370104834Sobrien#undef htab_create 37177298Sobrienhtab_t 372218822Sdimhtab_create (size_t size, htab_hash hash_f, htab_eq eq_f, htab_del del_f) 373104834Sobrien{ 374104834Sobrien return htab_create_alloc (size, hash_f, eq_f, del_f, xcalloc, free); 375104834Sobrien} 376104834Sobrien 377104834Sobrienhtab_t 378218822Sdimhtab_try_create (size_t size, htab_hash hash_f, htab_eq eq_f, htab_del del_f) 37977298Sobrien{ 380104834Sobrien return htab_create_alloc (size, hash_f, eq_f, del_f, calloc, free); 38177298Sobrien} 38277298Sobrien 38360484Sobrien/* This function frees all memory allocated for given hash table. 38460484Sobrien Naturally the hash table must already exist. */ 38560484Sobrien 38660484Sobrienvoid 387218822Sdimhtab_delete (htab_t htab) 38860484Sobrien{ 389218822Sdim size_t size = htab_size (htab); 390218822Sdim PTR *entries = htab->entries; 39160484Sobrien int i; 39277298Sobrien 39360484Sobrien if (htab->del_f) 394218822Sdim for (i = size - 1; i >= 0; i--) 395218822Sdim if (entries[i] != HTAB_EMPTY_ENTRY && entries[i] != HTAB_DELETED_ENTRY) 396218822Sdim (*htab->del_f) (entries[i]); 39760484Sobrien 398104834Sobrien if (htab->free_f != NULL) 399104834Sobrien { 400218822Sdim (*htab->free_f) (entries); 401104834Sobrien (*htab->free_f) (htab); 402104834Sobrien } 403130561Sobrien else if (htab->free_with_arg_f != NULL) 404130561Sobrien { 405218822Sdim (*htab->free_with_arg_f) (htab->alloc_arg, entries); 406130561Sobrien (*htab->free_with_arg_f) (htab->alloc_arg, htab); 407130561Sobrien } 40860484Sobrien} 40960484Sobrien 41060484Sobrien/* This function clears all entries in the given hash table. */ 41160484Sobrien 41260484Sobrienvoid 413218822Sdimhtab_empty (htab_t htab) 41460484Sobrien{ 415218822Sdim size_t size = htab_size (htab); 416218822Sdim PTR *entries = htab->entries; 41760484Sobrien int i; 41877298Sobrien 41960484Sobrien if (htab->del_f) 420218822Sdim for (i = size - 1; i >= 0; i--) 421218822Sdim if (entries[i] != HTAB_EMPTY_ENTRY && entries[i] != HTAB_DELETED_ENTRY) 422218822Sdim (*htab->del_f) (entries[i]); 42360484Sobrien 424218822Sdim /* Instead of clearing megabyte, downsize the table. */ 425218822Sdim if (size > 1024*1024 / sizeof (PTR)) 426218822Sdim { 427218822Sdim int nindex = higher_prime_index (1024 / sizeof (PTR)); 428218822Sdim int nsize = prime_tab[nindex].prime; 429218822Sdim 430218822Sdim if (htab->free_f != NULL) 431218822Sdim (*htab->free_f) (htab->entries); 432218822Sdim else if (htab->free_with_arg_f != NULL) 433218822Sdim (*htab->free_with_arg_f) (htab->alloc_arg, htab->entries); 434218822Sdim if (htab->alloc_with_arg_f != NULL) 435218822Sdim htab->entries = (PTR *) (*htab->alloc_with_arg_f) (htab->alloc_arg, nsize, 436218822Sdim sizeof (PTR *)); 437218822Sdim else 438218822Sdim htab->entries = (PTR *) (*htab->alloc_f) (nsize, sizeof (PTR *)); 439218822Sdim htab->size = nsize; 440218822Sdim htab->size_prime_index = nindex; 441218822Sdim } 442218822Sdim else 443218822Sdim memset (entries, 0, size * sizeof (PTR)); 444218822Sdim htab->n_deleted = 0; 445218822Sdim htab->n_elements = 0; 44660484Sobrien} 44760484Sobrien 44860484Sobrien/* Similar to htab_find_slot, but without several unwanted side effects: 44960484Sobrien - Does not call htab->eq_f when it finds an existing entry. 45060484Sobrien - Does not change the count of elements/searches/collisions in the 45160484Sobrien hash table. 45260484Sobrien This function also assumes there are no deleted entries in the table. 45360484Sobrien HASH is the hash value for the element to be inserted. */ 45477298Sobrien 45577298Sobrienstatic PTR * 456218822Sdimfind_empty_slot_for_expand (htab_t htab, hashval_t hash) 45760484Sobrien{ 458218822Sdim hashval_t index = htab_mod (hash, htab); 459218822Sdim size_t size = htab_size (htab); 460104834Sobrien PTR *slot = htab->entries + index; 461104834Sobrien hashval_t hash2; 46260484Sobrien 463218822Sdim if (*slot == HTAB_EMPTY_ENTRY) 464104834Sobrien return slot; 465218822Sdim else if (*slot == HTAB_DELETED_ENTRY) 466104834Sobrien abort (); 467104834Sobrien 468218822Sdim hash2 = htab_mod_m2 (hash, htab); 46960484Sobrien for (;;) 47060484Sobrien { 471104834Sobrien index += hash2; 472104834Sobrien if (index >= size) 473104834Sobrien index -= size; 47477298Sobrien 475104834Sobrien slot = htab->entries + index; 476218822Sdim if (*slot == HTAB_EMPTY_ENTRY) 47760484Sobrien return slot; 478218822Sdim else if (*slot == HTAB_DELETED_ENTRY) 47960484Sobrien abort (); 48060484Sobrien } 48160484Sobrien} 48260484Sobrien 48360484Sobrien/* The following function changes size of memory allocated for the 48460484Sobrien entries and repeatedly inserts the table elements. The occupancy 48560484Sobrien of the table after the call will be about 50%. Naturally the hash 48660484Sobrien table must already exist. Remember also that the place of the 48777298Sobrien table entries is changed. If memory allocation failures are allowed, 48877298Sobrien this function will return zero, indicating that the table could not be 48977298Sobrien expanded. If all goes well, it will return a non-zero value. */ 49060484Sobrien 49177298Sobrienstatic int 492218822Sdimhtab_expand (htab_t htab) 49360484Sobrien{ 49477298Sobrien PTR *oentries; 49577298Sobrien PTR *olimit; 49677298Sobrien PTR *p; 497104834Sobrien PTR *nentries; 498218822Sdim size_t nsize, osize, elts; 499218822Sdim unsigned int oindex, nindex; 50060484Sobrien 50160484Sobrien oentries = htab->entries; 502218822Sdim oindex = htab->size_prime_index; 503218822Sdim osize = htab->size; 504218822Sdim olimit = oentries + osize; 505218822Sdim elts = htab_elements (htab); 50660484Sobrien 507130561Sobrien /* Resize only when table after removal of unused elements is either 508130561Sobrien too full or too empty. */ 509218822Sdim if (elts * 2 > osize || (elts * 8 < osize && osize > 32)) 510218822Sdim { 511218822Sdim nindex = higher_prime_index (elts * 2); 512218822Sdim nsize = prime_tab[nindex].prime; 513218822Sdim } 514130561Sobrien else 515218822Sdim { 516218822Sdim nindex = oindex; 517218822Sdim nsize = osize; 518218822Sdim } 51960484Sobrien 520130561Sobrien if (htab->alloc_with_arg_f != NULL) 521130561Sobrien nentries = (PTR *) (*htab->alloc_with_arg_f) (htab->alloc_arg, nsize, 522130561Sobrien sizeof (PTR *)); 523130561Sobrien else 524130561Sobrien nentries = (PTR *) (*htab->alloc_f) (nsize, sizeof (PTR *)); 525104834Sobrien if (nentries == NULL) 526104834Sobrien return 0; 527104834Sobrien htab->entries = nentries; 528130561Sobrien htab->size = nsize; 529218822Sdim htab->size_prime_index = nindex; 53060484Sobrien htab->n_elements -= htab->n_deleted; 53160484Sobrien htab->n_deleted = 0; 53260484Sobrien 53360484Sobrien p = oentries; 53460484Sobrien do 53560484Sobrien { 53677298Sobrien PTR x = *p; 53777298Sobrien 538218822Sdim if (x != HTAB_EMPTY_ENTRY && x != HTAB_DELETED_ENTRY) 53960484Sobrien { 54077298Sobrien PTR *q = find_empty_slot_for_expand (htab, (*htab->hash_f) (x)); 54177298Sobrien 54260484Sobrien *q = x; 54360484Sobrien } 54477298Sobrien 54560484Sobrien p++; 54660484Sobrien } 54760484Sobrien while (p < olimit); 54877298Sobrien 549104834Sobrien if (htab->free_f != NULL) 550104834Sobrien (*htab->free_f) (oentries); 551130561Sobrien else if (htab->free_with_arg_f != NULL) 552130561Sobrien (*htab->free_with_arg_f) (htab->alloc_arg, oentries); 55377298Sobrien return 1; 55460484Sobrien} 55560484Sobrien 55660484Sobrien/* This function searches for a hash table entry equal to the given 55760484Sobrien element. It cannot be used to insert or delete an element. */ 55860484Sobrien 55977298SobrienPTR 560218822Sdimhtab_find_with_hash (htab_t htab, const PTR element, hashval_t hash) 56160484Sobrien{ 562218822Sdim hashval_t index, hash2; 56360484Sobrien size_t size; 56477298Sobrien PTR entry; 56560484Sobrien 56660484Sobrien htab->searches++; 567218822Sdim size = htab_size (htab); 568218822Sdim index = htab_mod (hash, htab); 56960484Sobrien 57077298Sobrien entry = htab->entries[index]; 571218822Sdim if (entry == HTAB_EMPTY_ENTRY 572218822Sdim || (entry != HTAB_DELETED_ENTRY && (*htab->eq_f) (entry, element))) 57377298Sobrien return entry; 57477298Sobrien 575218822Sdim hash2 = htab_mod_m2 (hash, htab); 57660484Sobrien for (;;) 57760484Sobrien { 57860484Sobrien htab->collisions++; 57960484Sobrien index += hash2; 58060484Sobrien if (index >= size) 58160484Sobrien index -= size; 58277298Sobrien 58377298Sobrien entry = htab->entries[index]; 584218822Sdim if (entry == HTAB_EMPTY_ENTRY 585218822Sdim || (entry != HTAB_DELETED_ENTRY && (*htab->eq_f) (entry, element))) 58677298Sobrien return entry; 58760484Sobrien } 58860484Sobrien} 58960484Sobrien 59060484Sobrien/* Like htab_find_slot_with_hash, but compute the hash value from the 59160484Sobrien element. */ 59277298Sobrien 59377298SobrienPTR 594218822Sdimhtab_find (htab_t htab, const PTR element) 59560484Sobrien{ 59660484Sobrien return htab_find_with_hash (htab, element, (*htab->hash_f) (element)); 59760484Sobrien} 59860484Sobrien 59960484Sobrien/* This function searches for a hash table slot containing an entry 60060484Sobrien equal to the given element. To delete an entry, call this with 601218822Sdim insert=NO_INSERT, then call htab_clear_slot on the slot returned 602218822Sdim (possibly after doing some checks). To insert an entry, call this 603218822Sdim with insert=INSERT, then write the value you want into the returned 604218822Sdim slot. When inserting an entry, NULL may be returned if memory 605218822Sdim allocation fails. */ 60660484Sobrien 60777298SobrienPTR * 608218822Sdimhtab_find_slot_with_hash (htab_t htab, const PTR element, 609218822Sdim hashval_t hash, enum insert_option insert) 61060484Sobrien{ 61177298Sobrien PTR *first_deleted_slot; 612218822Sdim hashval_t index, hash2; 61360484Sobrien size_t size; 614104834Sobrien PTR entry; 61560484Sobrien 616218822Sdim size = htab_size (htab); 617218822Sdim if (insert == INSERT && size * 3 <= htab->n_elements * 4) 618218822Sdim { 619218822Sdim if (htab_expand (htab) == 0) 620218822Sdim return NULL; 621218822Sdim size = htab_size (htab); 622218822Sdim } 62360484Sobrien 624218822Sdim index = htab_mod (hash, htab); 62560484Sobrien 62660484Sobrien htab->searches++; 62760484Sobrien first_deleted_slot = NULL; 62860484Sobrien 629104834Sobrien entry = htab->entries[index]; 630218822Sdim if (entry == HTAB_EMPTY_ENTRY) 631104834Sobrien goto empty_entry; 632218822Sdim else if (entry == HTAB_DELETED_ENTRY) 633104834Sobrien first_deleted_slot = &htab->entries[index]; 634104834Sobrien else if ((*htab->eq_f) (entry, element)) 635104834Sobrien return &htab->entries[index]; 636104834Sobrien 637218822Sdim hash2 = htab_mod_m2 (hash, htab); 63860484Sobrien for (;;) 63960484Sobrien { 640104834Sobrien htab->collisions++; 641104834Sobrien index += hash2; 642104834Sobrien if (index >= size) 643104834Sobrien index -= size; 644104834Sobrien 645104834Sobrien entry = htab->entries[index]; 646218822Sdim if (entry == HTAB_EMPTY_ENTRY) 647104834Sobrien goto empty_entry; 648218822Sdim else if (entry == HTAB_DELETED_ENTRY) 64960484Sobrien { 65060484Sobrien if (!first_deleted_slot) 65160484Sobrien first_deleted_slot = &htab->entries[index]; 65260484Sobrien } 653104834Sobrien else if ((*htab->eq_f) (entry, element)) 65477298Sobrien return &htab->entries[index]; 65560484Sobrien } 656104834Sobrien 657104834Sobrien empty_entry: 658104834Sobrien if (insert == NO_INSERT) 659104834Sobrien return NULL; 660104834Sobrien 661104834Sobrien if (first_deleted_slot) 662104834Sobrien { 663130561Sobrien htab->n_deleted--; 664218822Sdim *first_deleted_slot = HTAB_EMPTY_ENTRY; 665104834Sobrien return first_deleted_slot; 666104834Sobrien } 667104834Sobrien 668130561Sobrien htab->n_elements++; 669104834Sobrien return &htab->entries[index]; 67060484Sobrien} 67160484Sobrien 67260484Sobrien/* Like htab_find_slot_with_hash, but compute the hash value from the 67360484Sobrien element. */ 67477298Sobrien 67577298SobrienPTR * 676218822Sdimhtab_find_slot (htab_t htab, const PTR element, enum insert_option insert) 67760484Sobrien{ 67860484Sobrien return htab_find_slot_with_hash (htab, element, (*htab->hash_f) (element), 67960484Sobrien insert); 68060484Sobrien} 68160484Sobrien 68260484Sobrien/* This function deletes an element with the given value from hash 683218822Sdim table (the hash is computed from the element). If there is no matching 684218822Sdim element in the hash table, this function does nothing. */ 685218822Sdim 686218822Sdimvoid 687218822Sdimhtab_remove_elt (htab_t htab, PTR element) 688218822Sdim{ 689218822Sdim htab_remove_elt_with_hash (htab, element, (*htab->hash_f) (element)); 690218822Sdim} 691218822Sdim 692218822Sdim 693218822Sdim/* This function deletes an element with the given value from hash 69460484Sobrien table. If there is no matching element in the hash table, this 69560484Sobrien function does nothing. */ 69660484Sobrien 69760484Sobrienvoid 698218822Sdimhtab_remove_elt_with_hash (htab_t htab, PTR element, hashval_t hash) 69960484Sobrien{ 70077298Sobrien PTR *slot; 70160484Sobrien 702218822Sdim slot = htab_find_slot_with_hash (htab, element, hash, NO_INSERT); 703218822Sdim if (*slot == HTAB_EMPTY_ENTRY) 70460484Sobrien return; 70560484Sobrien 70660484Sobrien if (htab->del_f) 70760484Sobrien (*htab->del_f) (*slot); 70860484Sobrien 709218822Sdim *slot = HTAB_DELETED_ENTRY; 71060484Sobrien htab->n_deleted++; 71160484Sobrien} 71260484Sobrien 71360484Sobrien/* This function clears a specified slot in a hash table. It is 71460484Sobrien useful when you've already done the lookup and don't want to do it 71560484Sobrien again. */ 71660484Sobrien 71760484Sobrienvoid 718218822Sdimhtab_clear_slot (htab_t htab, PTR *slot) 71960484Sobrien{ 720218822Sdim if (slot < htab->entries || slot >= htab->entries + htab_size (htab) 721218822Sdim || *slot == HTAB_EMPTY_ENTRY || *slot == HTAB_DELETED_ENTRY) 72260484Sobrien abort (); 72377298Sobrien 72460484Sobrien if (htab->del_f) 72560484Sobrien (*htab->del_f) (*slot); 72677298Sobrien 727218822Sdim *slot = HTAB_DELETED_ENTRY; 72860484Sobrien htab->n_deleted++; 72960484Sobrien} 73060484Sobrien 73160484Sobrien/* This function scans over the entire hash table calling 73260484Sobrien CALLBACK for each live entry. If CALLBACK returns false, 73360484Sobrien the iteration stops. INFO is passed as CALLBACK's second 73460484Sobrien argument. */ 73560484Sobrien 73660484Sobrienvoid 737218822Sdimhtab_traverse_noresize (htab_t htab, htab_trav callback, PTR info) 73860484Sobrien{ 739130561Sobrien PTR *slot; 740130561Sobrien PTR *limit; 741218822Sdim 742130561Sobrien slot = htab->entries; 743218822Sdim limit = slot + htab_size (htab); 744130561Sobrien 74560484Sobrien do 74660484Sobrien { 74777298Sobrien PTR x = *slot; 74877298Sobrien 749218822Sdim if (x != HTAB_EMPTY_ENTRY && x != HTAB_DELETED_ENTRY) 75060484Sobrien if (!(*callback) (slot, info)) 75160484Sobrien break; 75260484Sobrien } 75360484Sobrien while (++slot < limit); 75460484Sobrien} 75560484Sobrien 756130561Sobrien/* Like htab_traverse_noresize, but does resize the table when it is 757130561Sobrien too empty to improve effectivity of subsequent calls. */ 758130561Sobrien 759130561Sobrienvoid 760218822Sdimhtab_traverse (htab_t htab, htab_trav callback, PTR info) 761130561Sobrien{ 762218822Sdim if (htab_elements (htab) * 8 < htab_size (htab)) 763130561Sobrien htab_expand (htab); 764130561Sobrien 765130561Sobrien htab_traverse_noresize (htab, callback, info); 766130561Sobrien} 767130561Sobrien 76877298Sobrien/* Return the fraction of fixed collisions during all work with given 76977298Sobrien hash table. */ 77060484Sobrien 77160484Sobriendouble 772218822Sdimhtab_collisions (htab_t htab) 77360484Sobrien{ 77477298Sobrien if (htab->searches == 0) 77577298Sobrien return 0.0; 77660484Sobrien 77777298Sobrien return (double) htab->collisions / (double) htab->searches; 77860484Sobrien} 77989857Sobrien 78089857Sobrien/* Hash P as a null-terminated string. 78189857Sobrien 78289857Sobrien Copied from gcc/hashtable.c. Zack had the following to say with respect 78389857Sobrien to applicability, though note that unlike hashtable.c, this hash table 78489857Sobrien implementation re-hashes rather than chain buckets. 78589857Sobrien 78689857Sobrien http://gcc.gnu.org/ml/gcc-patches/2001-08/msg01021.html 78789857Sobrien From: Zack Weinberg <zackw@panix.com> 78889857Sobrien Date: Fri, 17 Aug 2001 02:15:56 -0400 78989857Sobrien 79089857Sobrien I got it by extracting all the identifiers from all the source code 79189857Sobrien I had lying around in mid-1999, and testing many recurrences of 79289857Sobrien the form "H_n = H_{n-1} * K + c_n * L + M" where K, L, M were either 79389857Sobrien prime numbers or the appropriate identity. This was the best one. 79489857Sobrien I don't remember exactly what constituted "best", except I was 79589857Sobrien looking at bucket-length distributions mostly. 79689857Sobrien 79789857Sobrien So it should be very good at hashing identifiers, but might not be 79889857Sobrien as good at arbitrary strings. 79989857Sobrien 80089857Sobrien I'll add that it thoroughly trounces the hash functions recommended 80189857Sobrien for this use at http://burtleburtle.net/bob/hash/index.html, both 80289857Sobrien on speed and bucket distribution. I haven't tried it against the 80389857Sobrien function they just started using for Perl's hashes. */ 80489857Sobrien 80589857Sobrienhashval_t 806218822Sdimhtab_hash_string (const PTR p) 80789857Sobrien{ 80889857Sobrien const unsigned char *str = (const unsigned char *) p; 80989857Sobrien hashval_t r = 0; 81089857Sobrien unsigned char c; 81189857Sobrien 81289857Sobrien while ((c = *str++) != 0) 81389857Sobrien r = r * 67 + c - 113; 81489857Sobrien 81589857Sobrien return r; 81689857Sobrien} 817130561Sobrien 818130561Sobrien/* DERIVED FROM: 819130561Sobrien-------------------------------------------------------------------- 820130561Sobrienlookup2.c, by Bob Jenkins, December 1996, Public Domain. 821130561Sobrienhash(), hash2(), hash3, and mix() are externally useful functions. 822130561SobrienRoutines to test the hash are included if SELF_TEST is defined. 823130561SobrienYou can use this free for any purpose. It has no warranty. 824130561Sobrien-------------------------------------------------------------------- 825130561Sobrien*/ 826130561Sobrien 827130561Sobrien/* 828130561Sobrien-------------------------------------------------------------------- 829130561Sobrienmix -- mix 3 32-bit values reversibly. 830130561SobrienFor every delta with one or two bit set, and the deltas of all three 831130561Sobrien high bits or all three low bits, whether the original value of a,b,c 832130561Sobrien is almost all zero or is uniformly distributed, 833130561Sobrien* If mix() is run forward or backward, at least 32 bits in a,b,c 834130561Sobrien have at least 1/4 probability of changing. 835130561Sobrien* If mix() is run forward, every bit of c will change between 1/3 and 836130561Sobrien 2/3 of the time. (Well, 22/100 and 78/100 for some 2-bit deltas.) 837130561Sobrienmix() was built out of 36 single-cycle latency instructions in a 838130561Sobrien structure that could supported 2x parallelism, like so: 839130561Sobrien a -= b; 840130561Sobrien a -= c; x = (c>>13); 841130561Sobrien b -= c; a ^= x; 842130561Sobrien b -= a; x = (a<<8); 843130561Sobrien c -= a; b ^= x; 844130561Sobrien c -= b; x = (b>>13); 845130561Sobrien ... 846130561Sobrien Unfortunately, superscalar Pentiums and Sparcs can't take advantage 847130561Sobrien of that parallelism. They've also turned some of those single-cycle 848130561Sobrien latency instructions into multi-cycle latency instructions. Still, 849130561Sobrien this is the fastest good hash I could find. There were about 2^^68 850130561Sobrien to choose from. I only looked at a billion or so. 851130561Sobrien-------------------------------------------------------------------- 852130561Sobrien*/ 853130561Sobrien/* same, but slower, works on systems that might have 8 byte hashval_t's */ 854130561Sobrien#define mix(a,b,c) \ 855130561Sobrien{ \ 856130561Sobrien a -= b; a -= c; a ^= (c>>13); \ 857130561Sobrien b -= c; b -= a; b ^= (a<< 8); \ 858130561Sobrien c -= a; c -= b; c ^= ((b&0xffffffff)>>13); \ 859130561Sobrien a -= b; a -= c; a ^= ((c&0xffffffff)>>12); \ 860130561Sobrien b -= c; b -= a; b = (b ^ (a<<16)) & 0xffffffff; \ 861130561Sobrien c -= a; c -= b; c = (c ^ (b>> 5)) & 0xffffffff; \ 862130561Sobrien a -= b; a -= c; a = (a ^ (c>> 3)) & 0xffffffff; \ 863130561Sobrien b -= c; b -= a; b = (b ^ (a<<10)) & 0xffffffff; \ 864130561Sobrien c -= a; c -= b; c = (c ^ (b>>15)) & 0xffffffff; \ 865130561Sobrien} 866130561Sobrien 867130561Sobrien/* 868130561Sobrien-------------------------------------------------------------------- 869130561Sobrienhash() -- hash a variable-length key into a 32-bit value 870130561Sobrien k : the key (the unaligned variable-length array of bytes) 871130561Sobrien len : the length of the key, counting by bytes 872130561Sobrien level : can be any 4-byte value 873130561SobrienReturns a 32-bit value. Every bit of the key affects every bit of 874130561Sobrienthe return value. Every 1-bit and 2-bit delta achieves avalanche. 875130561SobrienAbout 36+6len instructions. 876130561Sobrien 877130561SobrienThe best hash table sizes are powers of 2. There is no need to do 878130561Sobrienmod a prime (mod is sooo slow!). If you need less than 32 bits, 879130561Sobrienuse a bitmask. For example, if you need only 10 bits, do 880130561Sobrien h = (h & hashmask(10)); 881130561SobrienIn which case, the hash table should have hashsize(10) elements. 882130561Sobrien 883130561SobrienIf you are hashing n strings (ub1 **)k, do it like this: 884130561Sobrien for (i=0, h=0; i<n; ++i) h = hash( k[i], len[i], h); 885130561Sobrien 886130561SobrienBy Bob Jenkins, 1996. bob_jenkins@burtleburtle.net. You may use this 887130561Sobriencode any way you wish, private, educational, or commercial. It's free. 888130561Sobrien 889130561SobrienSee http://burtleburtle.net/bob/hash/evahash.html 890130561SobrienUse for hash table lookup, or anything where one collision in 2^32 is 891130561Sobrienacceptable. Do NOT use for cryptographic purposes. 892130561Sobrien-------------------------------------------------------------------- 893130561Sobrien*/ 894130561Sobrien 895218822Sdimhashval_t 896218822Sdimiterative_hash (const PTR k_in /* the key */, 897218822Sdim register size_t length /* the length of the key */, 898218822Sdim register hashval_t initval /* the previous hash, or 899218822Sdim an arbitrary value */) 900130561Sobrien{ 901130561Sobrien register const unsigned char *k = (const unsigned char *)k_in; 902130561Sobrien register hashval_t a,b,c,len; 903130561Sobrien 904130561Sobrien /* Set up the internal state */ 905130561Sobrien len = length; 906130561Sobrien a = b = 0x9e3779b9; /* the golden ratio; an arbitrary value */ 907130561Sobrien c = initval; /* the previous hash value */ 908130561Sobrien 909130561Sobrien /*---------------------------------------- handle most of the key */ 910130561Sobrien#ifndef WORDS_BIGENDIAN 911130561Sobrien /* On a little-endian machine, if the data is 4-byte aligned we can hash 912130561Sobrien by word for better speed. This gives nondeterministic results on 913130561Sobrien big-endian machines. */ 914130561Sobrien if (sizeof (hashval_t) == 4 && (((size_t)k)&3) == 0) 915130561Sobrien while (len >= 12) /* aligned */ 916130561Sobrien { 917130561Sobrien a += *(hashval_t *)(k+0); 918130561Sobrien b += *(hashval_t *)(k+4); 919130561Sobrien c += *(hashval_t *)(k+8); 920130561Sobrien mix(a,b,c); 921130561Sobrien k += 12; len -= 12; 922130561Sobrien } 923130561Sobrien else /* unaligned */ 924130561Sobrien#endif 925130561Sobrien while (len >= 12) 926130561Sobrien { 927130561Sobrien a += (k[0] +((hashval_t)k[1]<<8) +((hashval_t)k[2]<<16) +((hashval_t)k[3]<<24)); 928130561Sobrien b += (k[4] +((hashval_t)k[5]<<8) +((hashval_t)k[6]<<16) +((hashval_t)k[7]<<24)); 929130561Sobrien c += (k[8] +((hashval_t)k[9]<<8) +((hashval_t)k[10]<<16)+((hashval_t)k[11]<<24)); 930130561Sobrien mix(a,b,c); 931130561Sobrien k += 12; len -= 12; 932130561Sobrien } 933130561Sobrien 934130561Sobrien /*------------------------------------- handle the last 11 bytes */ 935130561Sobrien c += length; 936130561Sobrien switch(len) /* all the case statements fall through */ 937130561Sobrien { 938130561Sobrien case 11: c+=((hashval_t)k[10]<<24); 939130561Sobrien case 10: c+=((hashval_t)k[9]<<16); 940130561Sobrien case 9 : c+=((hashval_t)k[8]<<8); 941130561Sobrien /* the first byte of c is reserved for the length */ 942130561Sobrien case 8 : b+=((hashval_t)k[7]<<24); 943130561Sobrien case 7 : b+=((hashval_t)k[6]<<16); 944130561Sobrien case 6 : b+=((hashval_t)k[5]<<8); 945130561Sobrien case 5 : b+=k[4]; 946130561Sobrien case 4 : a+=((hashval_t)k[3]<<24); 947130561Sobrien case 3 : a+=((hashval_t)k[2]<<16); 948130561Sobrien case 2 : a+=((hashval_t)k[1]<<8); 949130561Sobrien case 1 : a+=k[0]; 950130561Sobrien /* case 0: nothing left to add */ 951130561Sobrien } 952130561Sobrien mix(a,b,c); 953130561Sobrien /*-------------------------------------------- report the result */ 954130561Sobrien return c; 955130561Sobrien} 956