1169695Skan/* An expandable hash tables datatype. 2169695Skan Copyright (C) 1999, 2000, 2001, 2002, 2003, 2004 3169695Skan Free Software Foundation, Inc. 4169695Skan Contributed by Vladimir Makarov (vmakarov@cygnus.com). 5169695Skan 6169695SkanThis file is part of the libiberty library. 7169695SkanLibiberty is free software; you can redistribute it and/or 8169695Skanmodify it under the terms of the GNU Library General Public 9169695SkanLicense as published by the Free Software Foundation; either 10169695Skanversion 2 of the License, or (at your option) any later version. 11169695Skan 12169695SkanLibiberty is distributed in the hope that it will be useful, 13169695Skanbut WITHOUT ANY WARRANTY; without even the implied warranty of 14169695SkanMERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 15169695SkanLibrary General Public License for more details. 16169695Skan 17169695SkanYou should have received a copy of the GNU Library General Public 18169695SkanLicense along with libiberty; see the file COPYING.LIB. If 19169695Skannot, write to the Free Software Foundation, Inc., 51 Franklin Street - Fifth Floor, 20169695SkanBoston, MA 02110-1301, USA. */ 21169695Skan 22169695Skan/* This package implements basic hash table functionality. It is possible 23169695Skan to search for an entry, create an entry and destroy an entry. 24169695Skan 25169695Skan Elements in the table are generic pointers. 26169695Skan 27169695Skan The size of the table is not fixed; if the occupancy of the table 28169695Skan grows too high the hash table will be expanded. 29169695Skan 30169695Skan The abstract data implementation is based on generalized Algorithm D 31169695Skan from Knuth's book "The art of computer programming". Hash table is 32169695Skan expanded by creation of new hash table and transferring elements from 33169695Skan the old table to the new table. */ 34169695Skan 35169695Skan#ifdef HAVE_CONFIG_H 36169695Skan#include "config.h" 37169695Skan#endif 38169695Skan 39169695Skan#include <sys/types.h> 40169695Skan 41169695Skan#ifdef HAVE_STDLIB_H 42169695Skan#include <stdlib.h> 43169695Skan#endif 44169695Skan#ifdef HAVE_STRING_H 45169695Skan#include <string.h> 46169695Skan#endif 47169695Skan#ifdef HAVE_MALLOC_H 48169695Skan#include <malloc.h> 49169695Skan#endif 50169695Skan#ifdef HAVE_LIMITS_H 51169695Skan#include <limits.h> 52169695Skan#endif 53169695Skan#ifdef HAVE_STDINT_H 54169695Skan#include <stdint.h> 55169695Skan#endif 56169695Skan 57169695Skan#include <stdio.h> 58169695Skan 59169695Skan#include "libiberty.h" 60169695Skan#include "ansidecl.h" 61169695Skan#include "hashtab.h" 62169695Skan 63169695Skan#ifndef CHAR_BIT 64169695Skan#define CHAR_BIT 8 65169695Skan#endif 66169695Skan 67169695Skanstatic unsigned int higher_prime_index (unsigned long); 68169695Skanstatic hashval_t htab_mod_1 (hashval_t, hashval_t, hashval_t, int); 69169695Skanstatic hashval_t htab_mod (hashval_t, htab_t); 70169695Skanstatic hashval_t htab_mod_m2 (hashval_t, htab_t); 71169695Skanstatic hashval_t hash_pointer (const void *); 72169695Skanstatic int eq_pointer (const void *, const void *); 73169695Skanstatic int htab_expand (htab_t); 74169695Skanstatic PTR *find_empty_slot_for_expand (htab_t, hashval_t); 75169695Skan 76169695Skan/* At some point, we could make these be NULL, and modify the 77169695Skan hash-table routines to handle NULL specially; that would avoid 78169695Skan function-call overhead for the common case of hashing pointers. */ 79169695Skanhtab_hash htab_hash_pointer = hash_pointer; 80169695Skanhtab_eq htab_eq_pointer = eq_pointer; 81169695Skan 82169695Skan/* Table of primes and multiplicative inverses. 83169695Skan 84169695Skan Note that these are not minimally reduced inverses. Unlike when generating 85169695Skan code to divide by a constant, we want to be able to use the same algorithm 86169695Skan all the time. All of these inverses (are implied to) have bit 32 set. 87169695Skan 88169695Skan For the record, here's the function that computed the table; it's a 89169695Skan vastly simplified version of the function of the same name from gcc. */ 90169695Skan 91169695Skan#if 0 92169695Skanunsigned int 93169695Skanceil_log2 (unsigned int x) 94169695Skan{ 95169695Skan int i; 96169695Skan for (i = 31; i >= 0 ; --i) 97169695Skan if (x > (1u << i)) 98169695Skan return i+1; 99169695Skan abort (); 100169695Skan} 101169695Skan 102169695Skanunsigned int 103169695Skanchoose_multiplier (unsigned int d, unsigned int *mlp, unsigned char *shiftp) 104169695Skan{ 105169695Skan unsigned long long mhigh; 106169695Skan double nx; 107169695Skan int lgup, post_shift; 108169695Skan int pow, pow2; 109169695Skan int n = 32, precision = 32; 110169695Skan 111169695Skan lgup = ceil_log2 (d); 112169695Skan pow = n + lgup; 113169695Skan pow2 = n + lgup - precision; 114169695Skan 115169695Skan nx = ldexp (1.0, pow) + ldexp (1.0, pow2); 116169695Skan mhigh = nx / d; 117169695Skan 118169695Skan *shiftp = lgup - 1; 119169695Skan *mlp = mhigh; 120169695Skan return mhigh >> 32; 121169695Skan} 122169695Skan#endif 123169695Skan 124169695Skanstruct prime_ent 125169695Skan{ 126169695Skan hashval_t prime; 127169695Skan hashval_t inv; 128169695Skan hashval_t inv_m2; /* inverse of prime-2 */ 129169695Skan hashval_t shift; 130169695Skan}; 131169695Skan 132169695Skanstatic struct prime_ent const prime_tab[] = { 133169695Skan { 7, 0x24924925, 0x9999999b, 2 }, 134169695Skan { 13, 0x3b13b13c, 0x745d1747, 3 }, 135169695Skan { 31, 0x08421085, 0x1a7b9612, 4 }, 136169695Skan { 61, 0x0c9714fc, 0x15b1e5f8, 5 }, 137169695Skan { 127, 0x02040811, 0x0624dd30, 6 }, 138169695Skan { 251, 0x05197f7e, 0x073260a5, 7 }, 139169695Skan { 509, 0x01824366, 0x02864fc8, 8 }, 140169695Skan { 1021, 0x00c0906d, 0x014191f7, 9 }, 141169695Skan { 2039, 0x0121456f, 0x0161e69e, 10 }, 142169695Skan { 4093, 0x00300902, 0x00501908, 11 }, 143169695Skan { 8191, 0x00080041, 0x00180241, 12 }, 144169695Skan { 16381, 0x000c0091, 0x00140191, 13 }, 145169695Skan { 32749, 0x002605a5, 0x002a06e6, 14 }, 146169695Skan { 65521, 0x000f00e2, 0x00110122, 15 }, 147169695Skan { 131071, 0x00008001, 0x00018003, 16 }, 148169695Skan { 262139, 0x00014002, 0x0001c004, 17 }, 149169695Skan { 524287, 0x00002001, 0x00006001, 18 }, 150169695Skan { 1048573, 0x00003001, 0x00005001, 19 }, 151169695Skan { 2097143, 0x00004801, 0x00005801, 20 }, 152169695Skan { 4194301, 0x00000c01, 0x00001401, 21 }, 153169695Skan { 8388593, 0x00001e01, 0x00002201, 22 }, 154169695Skan { 16777213, 0x00000301, 0x00000501, 23 }, 155169695Skan { 33554393, 0x00001381, 0x00001481, 24 }, 156169695Skan { 67108859, 0x00000141, 0x000001c1, 25 }, 157169695Skan { 134217689, 0x000004e1, 0x00000521, 26 }, 158169695Skan { 268435399, 0x00000391, 0x000003b1, 27 }, 159169695Skan { 536870909, 0x00000019, 0x00000029, 28 }, 160169695Skan { 1073741789, 0x0000008d, 0x00000095, 29 }, 161169695Skan { 2147483647, 0x00000003, 0x00000007, 30 }, 162169695Skan /* Avoid "decimal constant so large it is unsigned" for 4294967291. */ 163169695Skan { 0xfffffffb, 0x00000006, 0x00000008, 31 } 164169695Skan}; 165169695Skan 166169695Skan/* The following function returns an index into the above table of the 167169695Skan nearest prime number which is greater than N, and near a power of two. */ 168169695Skan 169169695Skanstatic unsigned int 170169695Skanhigher_prime_index (unsigned long n) 171169695Skan{ 172169695Skan unsigned int low = 0; 173169695Skan unsigned int high = sizeof(prime_tab) / sizeof(prime_tab[0]); 174169695Skan 175169695Skan while (low != high) 176169695Skan { 177169695Skan unsigned int mid = low + (high - low) / 2; 178169695Skan if (n > prime_tab[mid].prime) 179169695Skan low = mid + 1; 180169695Skan else 181169695Skan high = mid; 182169695Skan } 183169695Skan 184169695Skan /* If we've run out of primes, abort. */ 185169695Skan if (n > prime_tab[low].prime) 186169695Skan { 187169695Skan fprintf (stderr, "Cannot find prime bigger than %lu\n", n); 188169695Skan abort (); 189169695Skan } 190169695Skan 191169695Skan return low; 192169695Skan} 193169695Skan 194169695Skan/* Returns a hash code for P. */ 195169695Skan 196169695Skanstatic hashval_t 197169695Skanhash_pointer (const PTR p) 198169695Skan{ 199169695Skan return (hashval_t) ((long)p >> 3); 200169695Skan} 201169695Skan 202169695Skan/* Returns non-zero if P1 and P2 are equal. */ 203169695Skan 204169695Skanstatic int 205169695Skaneq_pointer (const PTR p1, const PTR p2) 206169695Skan{ 207169695Skan return p1 == p2; 208169695Skan} 209169695Skan 210169695Skan 211169695Skan/* The parens around the function names in the next two definitions 212169695Skan are essential in order to prevent macro expansions of the name. 213169695Skan The bodies, however, are expanded as expected, so they are not 214169695Skan recursive definitions. */ 215169695Skan 216169695Skan/* Return the current size of given hash table. */ 217169695Skan 218169695Skan#define htab_size(htab) ((htab)->size) 219169695Skan 220169695Skansize_t 221169695Skan(htab_size) (htab_t htab) 222169695Skan{ 223169695Skan return htab_size (htab); 224169695Skan} 225169695Skan 226169695Skan/* Return the current number of elements in given hash table. */ 227169695Skan 228169695Skan#define htab_elements(htab) ((htab)->n_elements - (htab)->n_deleted) 229169695Skan 230169695Skansize_t 231169695Skan(htab_elements) (htab_t htab) 232169695Skan{ 233169695Skan return htab_elements (htab); 234169695Skan} 235169695Skan 236169695Skan/* Return X % Y. */ 237169695Skan 238169695Skanstatic inline hashval_t 239169695Skanhtab_mod_1 (hashval_t x, hashval_t y, hashval_t inv, int shift) 240169695Skan{ 241169695Skan /* The multiplicative inverses computed above are for 32-bit types, and 242169695Skan requires that we be able to compute a highpart multiply. */ 243169695Skan#ifdef UNSIGNED_64BIT_TYPE 244169695Skan __extension__ typedef UNSIGNED_64BIT_TYPE ull; 245169695Skan if (sizeof (hashval_t) * CHAR_BIT <= 32) 246169695Skan { 247169695Skan hashval_t t1, t2, t3, t4, q, r; 248169695Skan 249169695Skan t1 = ((ull)x * inv) >> 32; 250169695Skan t2 = x - t1; 251169695Skan t3 = t2 >> 1; 252169695Skan t4 = t1 + t3; 253169695Skan q = t4 >> shift; 254169695Skan r = x - (q * y); 255169695Skan 256169695Skan return r; 257169695Skan } 258169695Skan#endif 259169695Skan 260169695Skan /* Otherwise just use the native division routines. */ 261169695Skan return x % y; 262169695Skan} 263169695Skan 264169695Skan/* Compute the primary hash for HASH given HTAB's current size. */ 265169695Skan 266169695Skanstatic inline hashval_t 267169695Skanhtab_mod (hashval_t hash, htab_t htab) 268169695Skan{ 269169695Skan const struct prime_ent *p = &prime_tab[htab->size_prime_index]; 270169695Skan return htab_mod_1 (hash, p->prime, p->inv, p->shift); 271169695Skan} 272169695Skan 273169695Skan/* Compute the secondary hash for HASH given HTAB's current size. */ 274169695Skan 275169695Skanstatic inline hashval_t 276169695Skanhtab_mod_m2 (hashval_t hash, htab_t htab) 277169695Skan{ 278169695Skan const struct prime_ent *p = &prime_tab[htab->size_prime_index]; 279169695Skan return 1 + htab_mod_1 (hash, p->prime - 2, p->inv_m2, p->shift); 280169695Skan} 281169695Skan 282169695Skan/* This function creates table with length slightly longer than given 283169695Skan source length. Created hash table is initiated as empty (all the 284169695Skan hash table entries are HTAB_EMPTY_ENTRY). The function returns the 285169695Skan created hash table, or NULL if memory allocation fails. */ 286169695Skan 287169695Skanhtab_t 288169695Skanhtab_create_alloc (size_t size, htab_hash hash_f, htab_eq eq_f, 289169695Skan htab_del del_f, htab_alloc alloc_f, htab_free free_f) 290169695Skan{ 291169695Skan htab_t result; 292169695Skan unsigned int size_prime_index; 293169695Skan 294169695Skan size_prime_index = higher_prime_index (size); 295169695Skan size = prime_tab[size_prime_index].prime; 296169695Skan 297169695Skan result = (htab_t) (*alloc_f) (1, sizeof (struct htab)); 298169695Skan if (result == NULL) 299169695Skan return NULL; 300169695Skan result->entries = (PTR *) (*alloc_f) (size, sizeof (PTR)); 301169695Skan if (result->entries == NULL) 302169695Skan { 303169695Skan if (free_f != NULL) 304169695Skan (*free_f) (result); 305169695Skan return NULL; 306169695Skan } 307169695Skan result->size = size; 308169695Skan result->size_prime_index = size_prime_index; 309169695Skan result->hash_f = hash_f; 310169695Skan result->eq_f = eq_f; 311169695Skan result->del_f = del_f; 312169695Skan result->alloc_f = alloc_f; 313169695Skan result->free_f = free_f; 314169695Skan return result; 315169695Skan} 316169695Skan 317169695Skan/* As above, but use the variants of alloc_f and free_f which accept 318169695Skan an extra argument. */ 319169695Skan 320169695Skanhtab_t 321169695Skanhtab_create_alloc_ex (size_t size, htab_hash hash_f, htab_eq eq_f, 322169695Skan htab_del del_f, void *alloc_arg, 323169695Skan htab_alloc_with_arg alloc_f, 324169695Skan htab_free_with_arg free_f) 325169695Skan{ 326169695Skan htab_t result; 327169695Skan unsigned int size_prime_index; 328169695Skan 329169695Skan size_prime_index = higher_prime_index (size); 330169695Skan size = prime_tab[size_prime_index].prime; 331169695Skan 332169695Skan result = (htab_t) (*alloc_f) (alloc_arg, 1, sizeof (struct htab)); 333169695Skan if (result == NULL) 334169695Skan return NULL; 335169695Skan result->entries = (PTR *) (*alloc_f) (alloc_arg, size, sizeof (PTR)); 336169695Skan if (result->entries == NULL) 337169695Skan { 338169695Skan if (free_f != NULL) 339169695Skan (*free_f) (alloc_arg, result); 340169695Skan return NULL; 341169695Skan } 342169695Skan result->size = size; 343169695Skan result->size_prime_index = size_prime_index; 344169695Skan result->hash_f = hash_f; 345169695Skan result->eq_f = eq_f; 346169695Skan result->del_f = del_f; 347169695Skan result->alloc_arg = alloc_arg; 348169695Skan result->alloc_with_arg_f = alloc_f; 349169695Skan result->free_with_arg_f = free_f; 350169695Skan return result; 351169695Skan} 352169695Skan 353169695Skan/* Update the function pointers and allocation parameter in the htab_t. */ 354169695Skan 355169695Skanvoid 356169695Skanhtab_set_functions_ex (htab_t htab, htab_hash hash_f, htab_eq eq_f, 357169695Skan htab_del del_f, PTR alloc_arg, 358169695Skan htab_alloc_with_arg alloc_f, htab_free_with_arg free_f) 359169695Skan{ 360169695Skan htab->hash_f = hash_f; 361169695Skan htab->eq_f = eq_f; 362169695Skan htab->del_f = del_f; 363169695Skan htab->alloc_arg = alloc_arg; 364169695Skan htab->alloc_with_arg_f = alloc_f; 365169695Skan htab->free_with_arg_f = free_f; 366169695Skan} 367169695Skan 368169695Skan/* These functions exist solely for backward compatibility. */ 369169695Skan 370169695Skan#undef htab_create 371169695Skanhtab_t 372169695Skanhtab_create (size_t size, htab_hash hash_f, htab_eq eq_f, htab_del del_f) 373169695Skan{ 374169695Skan return htab_create_alloc (size, hash_f, eq_f, del_f, xcalloc, free); 375169695Skan} 376169695Skan 377169695Skanhtab_t 378169695Skanhtab_try_create (size_t size, htab_hash hash_f, htab_eq eq_f, htab_del del_f) 379169695Skan{ 380169695Skan return htab_create_alloc (size, hash_f, eq_f, del_f, calloc, free); 381169695Skan} 382169695Skan 383169695Skan/* This function frees all memory allocated for given hash table. 384169695Skan Naturally the hash table must already exist. */ 385169695Skan 386169695Skanvoid 387169695Skanhtab_delete (htab_t htab) 388169695Skan{ 389169695Skan size_t size = htab_size (htab); 390169695Skan PTR *entries = htab->entries; 391169695Skan int i; 392169695Skan 393169695Skan if (htab->del_f) 394169695Skan for (i = size - 1; i >= 0; i--) 395169695Skan if (entries[i] != HTAB_EMPTY_ENTRY && entries[i] != HTAB_DELETED_ENTRY) 396169695Skan (*htab->del_f) (entries[i]); 397169695Skan 398169695Skan if (htab->free_f != NULL) 399169695Skan { 400169695Skan (*htab->free_f) (entries); 401169695Skan (*htab->free_f) (htab); 402169695Skan } 403169695Skan else if (htab->free_with_arg_f != NULL) 404169695Skan { 405169695Skan (*htab->free_with_arg_f) (htab->alloc_arg, entries); 406169695Skan (*htab->free_with_arg_f) (htab->alloc_arg, htab); 407169695Skan } 408169695Skan} 409169695Skan 410169695Skan/* This function clears all entries in the given hash table. */ 411169695Skan 412169695Skanvoid 413169695Skanhtab_empty (htab_t htab) 414169695Skan{ 415169695Skan size_t size = htab_size (htab); 416169695Skan PTR *entries = htab->entries; 417169695Skan int i; 418169695Skan 419169695Skan if (htab->del_f) 420169695Skan for (i = size - 1; i >= 0; i--) 421169695Skan if (entries[i] != HTAB_EMPTY_ENTRY && entries[i] != HTAB_DELETED_ENTRY) 422169695Skan (*htab->del_f) (entries[i]); 423169695Skan 424169695Skan /* Instead of clearing megabyte, downsize the table. */ 425169695Skan if (size > 1024*1024 / sizeof (PTR)) 426169695Skan { 427169695Skan int nindex = higher_prime_index (1024 / sizeof (PTR)); 428169695Skan int nsize = prime_tab[nindex].prime; 429169695Skan 430169695Skan if (htab->free_f != NULL) 431169695Skan (*htab->free_f) (htab->entries); 432169695Skan else if (htab->free_with_arg_f != NULL) 433169695Skan (*htab->free_with_arg_f) (htab->alloc_arg, htab->entries); 434169695Skan if (htab->alloc_with_arg_f != NULL) 435169695Skan htab->entries = (PTR *) (*htab->alloc_with_arg_f) (htab->alloc_arg, nsize, 436169695Skan sizeof (PTR *)); 437169695Skan else 438169695Skan htab->entries = (PTR *) (*htab->alloc_f) (nsize, sizeof (PTR *)); 439169695Skan htab->size = nsize; 440169695Skan htab->size_prime_index = nindex; 441169695Skan } 442169695Skan else 443169695Skan memset (entries, 0, size * sizeof (PTR)); 444169695Skan htab->n_deleted = 0; 445169695Skan htab->n_elements = 0; 446169695Skan} 447169695Skan 448169695Skan/* Similar to htab_find_slot, but without several unwanted side effects: 449169695Skan - Does not call htab->eq_f when it finds an existing entry. 450169695Skan - Does not change the count of elements/searches/collisions in the 451169695Skan hash table. 452169695Skan This function also assumes there are no deleted entries in the table. 453169695Skan HASH is the hash value for the element to be inserted. */ 454169695Skan 455169695Skanstatic PTR * 456169695Skanfind_empty_slot_for_expand (htab_t htab, hashval_t hash) 457169695Skan{ 458169695Skan hashval_t index = htab_mod (hash, htab); 459169695Skan size_t size = htab_size (htab); 460169695Skan PTR *slot = htab->entries + index; 461169695Skan hashval_t hash2; 462169695Skan 463169695Skan if (*slot == HTAB_EMPTY_ENTRY) 464169695Skan return slot; 465169695Skan else if (*slot == HTAB_DELETED_ENTRY) 466169695Skan abort (); 467169695Skan 468169695Skan hash2 = htab_mod_m2 (hash, htab); 469169695Skan for (;;) 470169695Skan { 471169695Skan index += hash2; 472169695Skan if (index >= size) 473169695Skan index -= size; 474169695Skan 475169695Skan slot = htab->entries + index; 476169695Skan if (*slot == HTAB_EMPTY_ENTRY) 477169695Skan return slot; 478169695Skan else if (*slot == HTAB_DELETED_ENTRY) 479169695Skan abort (); 480169695Skan } 481169695Skan} 482169695Skan 483169695Skan/* The following function changes size of memory allocated for the 484169695Skan entries and repeatedly inserts the table elements. The occupancy 485169695Skan of the table after the call will be about 50%. Naturally the hash 486169695Skan table must already exist. Remember also that the place of the 487169695Skan table entries is changed. If memory allocation failures are allowed, 488169695Skan this function will return zero, indicating that the table could not be 489169695Skan expanded. If all goes well, it will return a non-zero value. */ 490169695Skan 491169695Skanstatic int 492169695Skanhtab_expand (htab_t htab) 493169695Skan{ 494169695Skan PTR *oentries; 495169695Skan PTR *olimit; 496169695Skan PTR *p; 497169695Skan PTR *nentries; 498169695Skan size_t nsize, osize, elts; 499169695Skan unsigned int oindex, nindex; 500169695Skan 501169695Skan oentries = htab->entries; 502169695Skan oindex = htab->size_prime_index; 503169695Skan osize = htab->size; 504169695Skan olimit = oentries + osize; 505169695Skan elts = htab_elements (htab); 506169695Skan 507169695Skan /* Resize only when table after removal of unused elements is either 508169695Skan too full or too empty. */ 509169695Skan if (elts * 2 > osize || (elts * 8 < osize && osize > 32)) 510169695Skan { 511169695Skan nindex = higher_prime_index (elts * 2); 512169695Skan nsize = prime_tab[nindex].prime; 513169695Skan } 514169695Skan else 515169695Skan { 516169695Skan nindex = oindex; 517169695Skan nsize = osize; 518169695Skan } 519169695Skan 520169695Skan if (htab->alloc_with_arg_f != NULL) 521169695Skan nentries = (PTR *) (*htab->alloc_with_arg_f) (htab->alloc_arg, nsize, 522169695Skan sizeof (PTR *)); 523169695Skan else 524169695Skan nentries = (PTR *) (*htab->alloc_f) (nsize, sizeof (PTR *)); 525169695Skan if (nentries == NULL) 526169695Skan return 0; 527169695Skan htab->entries = nentries; 528169695Skan htab->size = nsize; 529169695Skan htab->size_prime_index = nindex; 530169695Skan htab->n_elements -= htab->n_deleted; 531169695Skan htab->n_deleted = 0; 532169695Skan 533169695Skan p = oentries; 534169695Skan do 535169695Skan { 536169695Skan PTR x = *p; 537169695Skan 538169695Skan if (x != HTAB_EMPTY_ENTRY && x != HTAB_DELETED_ENTRY) 539169695Skan { 540169695Skan PTR *q = find_empty_slot_for_expand (htab, (*htab->hash_f) (x)); 541169695Skan 542169695Skan *q = x; 543169695Skan } 544169695Skan 545169695Skan p++; 546169695Skan } 547169695Skan while (p < olimit); 548169695Skan 549169695Skan if (htab->free_f != NULL) 550169695Skan (*htab->free_f) (oentries); 551169695Skan else if (htab->free_with_arg_f != NULL) 552169695Skan (*htab->free_with_arg_f) (htab->alloc_arg, oentries); 553169695Skan return 1; 554169695Skan} 555169695Skan 556169695Skan/* This function searches for a hash table entry equal to the given 557169695Skan element. It cannot be used to insert or delete an element. */ 558169695Skan 559169695SkanPTR 560169695Skanhtab_find_with_hash (htab_t htab, const PTR element, hashval_t hash) 561169695Skan{ 562169695Skan hashval_t index, hash2; 563169695Skan size_t size; 564169695Skan PTR entry; 565169695Skan 566169695Skan htab->searches++; 567169695Skan size = htab_size (htab); 568169695Skan index = htab_mod (hash, htab); 569169695Skan 570169695Skan entry = htab->entries[index]; 571169695Skan if (entry == HTAB_EMPTY_ENTRY 572169695Skan || (entry != HTAB_DELETED_ENTRY && (*htab->eq_f) (entry, element))) 573169695Skan return entry; 574169695Skan 575169695Skan hash2 = htab_mod_m2 (hash, htab); 576169695Skan for (;;) 577169695Skan { 578169695Skan htab->collisions++; 579169695Skan index += hash2; 580169695Skan if (index >= size) 581169695Skan index -= size; 582169695Skan 583169695Skan entry = htab->entries[index]; 584169695Skan if (entry == HTAB_EMPTY_ENTRY 585169695Skan || (entry != HTAB_DELETED_ENTRY && (*htab->eq_f) (entry, element))) 586169695Skan return entry; 587169695Skan } 588169695Skan} 589169695Skan 590169695Skan/* Like htab_find_slot_with_hash, but compute the hash value from the 591169695Skan element. */ 592169695Skan 593169695SkanPTR 594169695Skanhtab_find (htab_t htab, const PTR element) 595169695Skan{ 596169695Skan return htab_find_with_hash (htab, element, (*htab->hash_f) (element)); 597169695Skan} 598169695Skan 599169695Skan/* This function searches for a hash table slot containing an entry 600169695Skan equal to the given element. To delete an entry, call this with 601169695Skan insert=NO_INSERT, then call htab_clear_slot on the slot returned 602169695Skan (possibly after doing some checks). To insert an entry, call this 603169695Skan with insert=INSERT, then write the value you want into the returned 604169695Skan slot. When inserting an entry, NULL may be returned if memory 605169695Skan allocation fails. */ 606169695Skan 607169695SkanPTR * 608169695Skanhtab_find_slot_with_hash (htab_t htab, const PTR element, 609169695Skan hashval_t hash, enum insert_option insert) 610169695Skan{ 611169695Skan PTR *first_deleted_slot; 612169695Skan hashval_t index, hash2; 613169695Skan size_t size; 614169695Skan PTR entry; 615169695Skan 616169695Skan size = htab_size (htab); 617169695Skan if (insert == INSERT && size * 3 <= htab->n_elements * 4) 618169695Skan { 619169695Skan if (htab_expand (htab) == 0) 620169695Skan return NULL; 621169695Skan size = htab_size (htab); 622169695Skan } 623169695Skan 624169695Skan index = htab_mod (hash, htab); 625169695Skan 626169695Skan htab->searches++; 627169695Skan first_deleted_slot = NULL; 628169695Skan 629169695Skan entry = htab->entries[index]; 630169695Skan if (entry == HTAB_EMPTY_ENTRY) 631169695Skan goto empty_entry; 632169695Skan else if (entry == HTAB_DELETED_ENTRY) 633169695Skan first_deleted_slot = &htab->entries[index]; 634169695Skan else if ((*htab->eq_f) (entry, element)) 635169695Skan return &htab->entries[index]; 636169695Skan 637169695Skan hash2 = htab_mod_m2 (hash, htab); 638169695Skan for (;;) 639169695Skan { 640169695Skan htab->collisions++; 641169695Skan index += hash2; 642169695Skan if (index >= size) 643169695Skan index -= size; 644169695Skan 645169695Skan entry = htab->entries[index]; 646169695Skan if (entry == HTAB_EMPTY_ENTRY) 647169695Skan goto empty_entry; 648169695Skan else if (entry == HTAB_DELETED_ENTRY) 649169695Skan { 650169695Skan if (!first_deleted_slot) 651169695Skan first_deleted_slot = &htab->entries[index]; 652169695Skan } 653169695Skan else if ((*htab->eq_f) (entry, element)) 654169695Skan return &htab->entries[index]; 655169695Skan } 656169695Skan 657169695Skan empty_entry: 658169695Skan if (insert == NO_INSERT) 659169695Skan return NULL; 660169695Skan 661169695Skan if (first_deleted_slot) 662169695Skan { 663169695Skan htab->n_deleted--; 664169695Skan *first_deleted_slot = HTAB_EMPTY_ENTRY; 665169695Skan return first_deleted_slot; 666169695Skan } 667169695Skan 668169695Skan htab->n_elements++; 669169695Skan return &htab->entries[index]; 670169695Skan} 671169695Skan 672169695Skan/* Like htab_find_slot_with_hash, but compute the hash value from the 673169695Skan element. */ 674169695Skan 675169695SkanPTR * 676169695Skanhtab_find_slot (htab_t htab, const PTR element, enum insert_option insert) 677169695Skan{ 678169695Skan return htab_find_slot_with_hash (htab, element, (*htab->hash_f) (element), 679169695Skan insert); 680169695Skan} 681169695Skan 682169695Skan/* This function deletes an element with the given value from hash 683169695Skan table (the hash is computed from the element). If there is no matching 684169695Skan element in the hash table, this function does nothing. */ 685169695Skan 686169695Skanvoid 687169695Skanhtab_remove_elt (htab_t htab, PTR element) 688169695Skan{ 689169695Skan htab_remove_elt_with_hash (htab, element, (*htab->hash_f) (element)); 690169695Skan} 691169695Skan 692169695Skan 693169695Skan/* This function deletes an element with the given value from hash 694169695Skan table. If there is no matching element in the hash table, this 695169695Skan function does nothing. */ 696169695Skan 697169695Skanvoid 698169695Skanhtab_remove_elt_with_hash (htab_t htab, PTR element, hashval_t hash) 699169695Skan{ 700169695Skan PTR *slot; 701169695Skan 702169695Skan slot = htab_find_slot_with_hash (htab, element, hash, NO_INSERT); 703169695Skan if (*slot == HTAB_EMPTY_ENTRY) 704169695Skan return; 705169695Skan 706169695Skan if (htab->del_f) 707169695Skan (*htab->del_f) (*slot); 708169695Skan 709169695Skan *slot = HTAB_DELETED_ENTRY; 710169695Skan htab->n_deleted++; 711169695Skan} 712169695Skan 713169695Skan/* This function clears a specified slot in a hash table. It is 714169695Skan useful when you've already done the lookup and don't want to do it 715169695Skan again. */ 716169695Skan 717169695Skanvoid 718169695Skanhtab_clear_slot (htab_t htab, PTR *slot) 719169695Skan{ 720169695Skan if (slot < htab->entries || slot >= htab->entries + htab_size (htab) 721169695Skan || *slot == HTAB_EMPTY_ENTRY || *slot == HTAB_DELETED_ENTRY) 722169695Skan abort (); 723169695Skan 724169695Skan if (htab->del_f) 725169695Skan (*htab->del_f) (*slot); 726169695Skan 727169695Skan *slot = HTAB_DELETED_ENTRY; 728169695Skan htab->n_deleted++; 729169695Skan} 730169695Skan 731169695Skan/* This function scans over the entire hash table calling 732169695Skan CALLBACK for each live entry. If CALLBACK returns false, 733169695Skan the iteration stops. INFO is passed as CALLBACK's second 734169695Skan argument. */ 735169695Skan 736169695Skanvoid 737169695Skanhtab_traverse_noresize (htab_t htab, htab_trav callback, PTR info) 738169695Skan{ 739169695Skan PTR *slot; 740169695Skan PTR *limit; 741169695Skan 742169695Skan slot = htab->entries; 743169695Skan limit = slot + htab_size (htab); 744169695Skan 745169695Skan do 746169695Skan { 747169695Skan PTR x = *slot; 748169695Skan 749169695Skan if (x != HTAB_EMPTY_ENTRY && x != HTAB_DELETED_ENTRY) 750169695Skan if (!(*callback) (slot, info)) 751169695Skan break; 752169695Skan } 753169695Skan while (++slot < limit); 754169695Skan} 755169695Skan 756169695Skan/* Like htab_traverse_noresize, but does resize the table when it is 757169695Skan too empty to improve effectivity of subsequent calls. */ 758169695Skan 759169695Skanvoid 760169695Skanhtab_traverse (htab_t htab, htab_trav callback, PTR info) 761169695Skan{ 762169695Skan if (htab_elements (htab) * 8 < htab_size (htab)) 763169695Skan htab_expand (htab); 764169695Skan 765169695Skan htab_traverse_noresize (htab, callback, info); 766169695Skan} 767169695Skan 768169695Skan/* Return the fraction of fixed collisions during all work with given 769169695Skan hash table. */ 770169695Skan 771169695Skandouble 772169695Skanhtab_collisions (htab_t htab) 773169695Skan{ 774169695Skan if (htab->searches == 0) 775169695Skan return 0.0; 776169695Skan 777169695Skan return (double) htab->collisions / (double) htab->searches; 778169695Skan} 779169695Skan 780169695Skan/* Hash P as a null-terminated string. 781169695Skan 782169695Skan Copied from gcc/hashtable.c. Zack had the following to say with respect 783169695Skan to applicability, though note that unlike hashtable.c, this hash table 784169695Skan implementation re-hashes rather than chain buckets. 785169695Skan 786169695Skan http://gcc.gnu.org/ml/gcc-patches/2001-08/msg01021.html 787169695Skan From: Zack Weinberg <zackw@panix.com> 788169695Skan Date: Fri, 17 Aug 2001 02:15:56 -0400 789169695Skan 790169695Skan I got it by extracting all the identifiers from all the source code 791169695Skan I had lying around in mid-1999, and testing many recurrences of 792169695Skan the form "H_n = H_{n-1} * K + c_n * L + M" where K, L, M were either 793169695Skan prime numbers or the appropriate identity. This was the best one. 794169695Skan I don't remember exactly what constituted "best", except I was 795169695Skan looking at bucket-length distributions mostly. 796169695Skan 797169695Skan So it should be very good at hashing identifiers, but might not be 798169695Skan as good at arbitrary strings. 799169695Skan 800169695Skan I'll add that it thoroughly trounces the hash functions recommended 801169695Skan for this use at http://burtleburtle.net/bob/hash/index.html, both 802169695Skan on speed and bucket distribution. I haven't tried it against the 803169695Skan function they just started using for Perl's hashes. */ 804169695Skan 805169695Skanhashval_t 806169695Skanhtab_hash_string (const PTR p) 807169695Skan{ 808169695Skan const unsigned char *str = (const unsigned char *) p; 809169695Skan hashval_t r = 0; 810169695Skan unsigned char c; 811169695Skan 812169695Skan while ((c = *str++) != 0) 813169695Skan r = r * 67 + c - 113; 814169695Skan 815169695Skan return r; 816169695Skan} 817169695Skan 818169695Skan/* DERIVED FROM: 819169695Skan-------------------------------------------------------------------- 820169695Skanlookup2.c, by Bob Jenkins, December 1996, Public Domain. 821169695Skanhash(), hash2(), hash3, and mix() are externally useful functions. 822169695SkanRoutines to test the hash are included if SELF_TEST is defined. 823169695SkanYou can use this free for any purpose. It has no warranty. 824169695Skan-------------------------------------------------------------------- 825169695Skan*/ 826169695Skan 827169695Skan/* 828169695Skan-------------------------------------------------------------------- 829169695Skanmix -- mix 3 32-bit values reversibly. 830169695SkanFor every delta with one or two bit set, and the deltas of all three 831169695Skan high bits or all three low bits, whether the original value of a,b,c 832169695Skan is almost all zero or is uniformly distributed, 833169695Skan* If mix() is run forward or backward, at least 32 bits in a,b,c 834169695Skan have at least 1/4 probability of changing. 835169695Skan* If mix() is run forward, every bit of c will change between 1/3 and 836169695Skan 2/3 of the time. (Well, 22/100 and 78/100 for some 2-bit deltas.) 837169695Skanmix() was built out of 36 single-cycle latency instructions in a 838169695Skan structure that could supported 2x parallelism, like so: 839169695Skan a -= b; 840169695Skan a -= c; x = (c>>13); 841169695Skan b -= c; a ^= x; 842169695Skan b -= a; x = (a<<8); 843169695Skan c -= a; b ^= x; 844169695Skan c -= b; x = (b>>13); 845169695Skan ... 846169695Skan Unfortunately, superscalar Pentiums and Sparcs can't take advantage 847169695Skan of that parallelism. They've also turned some of those single-cycle 848169695Skan latency instructions into multi-cycle latency instructions. Still, 849169695Skan this is the fastest good hash I could find. There were about 2^^68 850169695Skan to choose from. I only looked at a billion or so. 851169695Skan-------------------------------------------------------------------- 852169695Skan*/ 853169695Skan/* same, but slower, works on systems that might have 8 byte hashval_t's */ 854169695Skan#define mix(a,b,c) \ 855169695Skan{ \ 856169695Skan a -= b; a -= c; a ^= (c>>13); \ 857169695Skan b -= c; b -= a; b ^= (a<< 8); \ 858169695Skan c -= a; c -= b; c ^= ((b&0xffffffff)>>13); \ 859169695Skan a -= b; a -= c; a ^= ((c&0xffffffff)>>12); \ 860169695Skan b -= c; b -= a; b = (b ^ (a<<16)) & 0xffffffff; \ 861169695Skan c -= a; c -= b; c = (c ^ (b>> 5)) & 0xffffffff; \ 862169695Skan a -= b; a -= c; a = (a ^ (c>> 3)) & 0xffffffff; \ 863169695Skan b -= c; b -= a; b = (b ^ (a<<10)) & 0xffffffff; \ 864169695Skan c -= a; c -= b; c = (c ^ (b>>15)) & 0xffffffff; \ 865169695Skan} 866169695Skan 867169695Skan/* 868169695Skan-------------------------------------------------------------------- 869169695Skanhash() -- hash a variable-length key into a 32-bit value 870169695Skan k : the key (the unaligned variable-length array of bytes) 871169695Skan len : the length of the key, counting by bytes 872169695Skan level : can be any 4-byte value 873169695SkanReturns a 32-bit value. Every bit of the key affects every bit of 874169695Skanthe return value. Every 1-bit and 2-bit delta achieves avalanche. 875169695SkanAbout 36+6len instructions. 876169695Skan 877169695SkanThe best hash table sizes are powers of 2. There is no need to do 878169695Skanmod a prime (mod is sooo slow!). If you need less than 32 bits, 879169695Skanuse a bitmask. For example, if you need only 10 bits, do 880169695Skan h = (h & hashmask(10)); 881169695SkanIn which case, the hash table should have hashsize(10) elements. 882169695Skan 883169695SkanIf you are hashing n strings (ub1 **)k, do it like this: 884169695Skan for (i=0, h=0; i<n; ++i) h = hash( k[i], len[i], h); 885169695Skan 886169695SkanBy Bob Jenkins, 1996. bob_jenkins@burtleburtle.net. You may use this 887169695Skancode any way you wish, private, educational, or commercial. It's free. 888169695Skan 889169695SkanSee http://burtleburtle.net/bob/hash/evahash.html 890169695SkanUse for hash table lookup, or anything where one collision in 2^32 is 891169695Skanacceptable. Do NOT use for cryptographic purposes. 892169695Skan-------------------------------------------------------------------- 893169695Skan*/ 894169695Skan 895169695Skanhashval_t 896169695Skaniterative_hash (const PTR k_in /* the key */, 897169695Skan register size_t length /* the length of the key */, 898169695Skan register hashval_t initval /* the previous hash, or 899169695Skan an arbitrary value */) 900169695Skan{ 901169695Skan register const unsigned char *k = (const unsigned char *)k_in; 902169695Skan register hashval_t a,b,c,len; 903169695Skan 904169695Skan /* Set up the internal state */ 905169695Skan len = length; 906169695Skan a = b = 0x9e3779b9; /* the golden ratio; an arbitrary value */ 907169695Skan c = initval; /* the previous hash value */ 908169695Skan 909169695Skan /*---------------------------------------- handle most of the key */ 910169695Skan#ifndef WORDS_BIGENDIAN 911169695Skan /* On a little-endian machine, if the data is 4-byte aligned we can hash 912169695Skan by word for better speed. This gives nondeterministic results on 913169695Skan big-endian machines. */ 914169695Skan if (sizeof (hashval_t) == 4 && (((size_t)k)&3) == 0) 915169695Skan while (len >= 12) /* aligned */ 916169695Skan { 917169695Skan a += *(hashval_t *)(k+0); 918169695Skan b += *(hashval_t *)(k+4); 919169695Skan c += *(hashval_t *)(k+8); 920169695Skan mix(a,b,c); 921169695Skan k += 12; len -= 12; 922169695Skan } 923169695Skan else /* unaligned */ 924169695Skan#endif 925169695Skan while (len >= 12) 926169695Skan { 927169695Skan a += (k[0] +((hashval_t)k[1]<<8) +((hashval_t)k[2]<<16) +((hashval_t)k[3]<<24)); 928169695Skan b += (k[4] +((hashval_t)k[5]<<8) +((hashval_t)k[6]<<16) +((hashval_t)k[7]<<24)); 929169695Skan c += (k[8] +((hashval_t)k[9]<<8) +((hashval_t)k[10]<<16)+((hashval_t)k[11]<<24)); 930169695Skan mix(a,b,c); 931169695Skan k += 12; len -= 12; 932169695Skan } 933169695Skan 934169695Skan /*------------------------------------- handle the last 11 bytes */ 935169695Skan c += length; 936169695Skan switch(len) /* all the case statements fall through */ 937169695Skan { 938169695Skan case 11: c+=((hashval_t)k[10]<<24); 939169695Skan case 10: c+=((hashval_t)k[9]<<16); 940169695Skan case 9 : c+=((hashval_t)k[8]<<8); 941169695Skan /* the first byte of c is reserved for the length */ 942169695Skan case 8 : b+=((hashval_t)k[7]<<24); 943169695Skan case 7 : b+=((hashval_t)k[6]<<16); 944169695Skan case 6 : b+=((hashval_t)k[5]<<8); 945169695Skan case 5 : b+=k[4]; 946169695Skan case 4 : a+=((hashval_t)k[3]<<24); 947169695Skan case 3 : a+=((hashval_t)k[2]<<16); 948169695Skan case 2 : a+=((hashval_t)k[1]<<8); 949169695Skan case 1 : a+=k[0]; 950169695Skan /* case 0: nothing left to add */ 951169695Skan } 952169695Skan mix(a,b,c); 953169695Skan /*-------------------------------------------- report the result */ 954169695Skan return c; 955169695Skan} 956