160484Sobrien/* An expandable hash tables datatype.
2218822Sdim   Copyright (C) 1999, 2000, 2001, 2002, 2003, 2004
3218822Sdim   Free Software Foundation, Inc.
460484Sobrien   Contributed by Vladimir Makarov (vmakarov@cygnus.com).
560484Sobrien
660484SobrienThis file is part of the libiberty library.
760484SobrienLibiberty is free software; you can redistribute it and/or
860484Sobrienmodify it under the terms of the GNU Library General Public
960484SobrienLicense as published by the Free Software Foundation; either
1060484Sobrienversion 2 of the License, or (at your option) any later version.
1160484Sobrien
1260484SobrienLibiberty is distributed in the hope that it will be useful,
1360484Sobrienbut WITHOUT ANY WARRANTY; without even the implied warranty of
1460484SobrienMERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
1560484SobrienLibrary General Public License for more details.
1660484Sobrien
1760484SobrienYou should have received a copy of the GNU Library General Public
1860484SobrienLicense along with libiberty; see the file COPYING.LIB.  If
19218822Sdimnot, write to the Free Software Foundation, Inc., 51 Franklin Street - Fifth Floor,
20218822SdimBoston, MA 02110-1301, USA.  */
2160484Sobrien
2260484Sobrien/* This package implements basic hash table functionality.  It is possible
2360484Sobrien   to search for an entry, create an entry and destroy an entry.
2460484Sobrien
2560484Sobrien   Elements in the table are generic pointers.
2660484Sobrien
2760484Sobrien   The size of the table is not fixed; if the occupancy of the table
2860484Sobrien   grows too high the hash table will be expanded.
2960484Sobrien
3060484Sobrien   The abstract data implementation is based on generalized Algorithm D
3160484Sobrien   from Knuth's book "The art of computer programming".  Hash table is
3260484Sobrien   expanded by creation of new hash table and transferring elements from
3360484Sobrien   the old table to the new table. */
3460484Sobrien
3560484Sobrien#ifdef HAVE_CONFIG_H
3660484Sobrien#include "config.h"
3760484Sobrien#endif
3860484Sobrien
3960484Sobrien#include <sys/types.h>
4060484Sobrien
4160484Sobrien#ifdef HAVE_STDLIB_H
4260484Sobrien#include <stdlib.h>
4360484Sobrien#endif
4477298Sobrien#ifdef HAVE_STRING_H
4577298Sobrien#include <string.h>
4677298Sobrien#endif
47130561Sobrien#ifdef HAVE_MALLOC_H
48130561Sobrien#include <malloc.h>
49130561Sobrien#endif
50218822Sdim#ifdef HAVE_LIMITS_H
51218822Sdim#include <limits.h>
52218822Sdim#endif
53218822Sdim#ifdef HAVE_STDINT_H
54218822Sdim#include <stdint.h>
55218822Sdim#endif
56130561Sobrien
5760484Sobrien#include <stdio.h>
5860484Sobrien
5960484Sobrien#include "libiberty.h"
60218822Sdim#include "ansidecl.h"
6160484Sobrien#include "hashtab.h"
6260484Sobrien
63218822Sdim#ifndef CHAR_BIT
64218822Sdim#define CHAR_BIT 8
65218822Sdim#endif
6660484Sobrien
67218822Sdimstatic unsigned int higher_prime_index (unsigned long);
68218822Sdimstatic hashval_t htab_mod_1 (hashval_t, hashval_t, hashval_t, int);
69218822Sdimstatic hashval_t htab_mod (hashval_t, htab_t);
70218822Sdimstatic hashval_t htab_mod_m2 (hashval_t, htab_t);
71218822Sdimstatic hashval_t hash_pointer (const void *);
72218822Sdimstatic int eq_pointer (const void *, const void *);
73218822Sdimstatic int htab_expand (htab_t);
74218822Sdimstatic PTR *find_empty_slot_for_expand (htab_t, hashval_t);
7560484Sobrien
7677298Sobrien/* At some point, we could make these be NULL, and modify the
7777298Sobrien   hash-table routines to handle NULL specially; that would avoid
7877298Sobrien   function-call overhead for the common case of hashing pointers.  */
7977298Sobrienhtab_hash htab_hash_pointer = hash_pointer;
8077298Sobrienhtab_eq htab_eq_pointer = eq_pointer;
8177298Sobrien
82218822Sdim/* Table of primes and multiplicative inverses.
8377298Sobrien
84218822Sdim   Note that these are not minimally reduced inverses.  Unlike when generating
85218822Sdim   code to divide by a constant, we want to be able to use the same algorithm
86218822Sdim   all the time.  All of these inverses (are implied to) have bit 32 set.
87218822Sdim
88218822Sdim   For the record, here's the function that computed the table; it's a
89218822Sdim   vastly simplified version of the function of the same name from gcc.  */
90218822Sdim
91218822Sdim#if 0
92218822Sdimunsigned int
93218822Sdimceil_log2 (unsigned int x)
9460484Sobrien{
95218822Sdim  int i;
96218822Sdim  for (i = 31; i >= 0 ; --i)
97218822Sdim    if (x > (1u << i))
98218822Sdim      return i+1;
99218822Sdim  abort ();
100218822Sdim}
10160484Sobrien
102218822Sdimunsigned int
103218822Sdimchoose_multiplier (unsigned int d, unsigned int *mlp, unsigned char *shiftp)
104218822Sdim{
105218822Sdim  unsigned long long mhigh;
106218822Sdim  double nx;
107218822Sdim  int lgup, post_shift;
108218822Sdim  int pow, pow2;
109218822Sdim  int n = 32, precision = 32;
11060484Sobrien
111218822Sdim  lgup = ceil_log2 (d);
112218822Sdim  pow = n + lgup;
113218822Sdim  pow2 = n + lgup - precision;
114218822Sdim
115218822Sdim  nx = ldexp (1.0, pow) + ldexp (1.0, pow2);
116218822Sdim  mhigh = nx / d;
117218822Sdim
118218822Sdim  *shiftp = lgup - 1;
119218822Sdim  *mlp = mhigh;
120218822Sdim  return mhigh >> 32;
121218822Sdim}
122218822Sdim#endif
123218822Sdim
124218822Sdimstruct prime_ent
125218822Sdim{
126218822Sdim  hashval_t prime;
127218822Sdim  hashval_t inv;
128218822Sdim  hashval_t inv_m2;	/* inverse of prime-2 */
129218822Sdim  hashval_t shift;
130218822Sdim};
131218822Sdim
132218822Sdimstatic struct prime_ent const prime_tab[] = {
133218822Sdim  {          7, 0x24924925, 0x9999999b, 2 },
134218822Sdim  {         13, 0x3b13b13c, 0x745d1747, 3 },
135218822Sdim  {         31, 0x08421085, 0x1a7b9612, 4 },
136218822Sdim  {         61, 0x0c9714fc, 0x15b1e5f8, 5 },
137218822Sdim  {        127, 0x02040811, 0x0624dd30, 6 },
138218822Sdim  {        251, 0x05197f7e, 0x073260a5, 7 },
139218822Sdim  {        509, 0x01824366, 0x02864fc8, 8 },
140218822Sdim  {       1021, 0x00c0906d, 0x014191f7, 9 },
141218822Sdim  {       2039, 0x0121456f, 0x0161e69e, 10 },
142218822Sdim  {       4093, 0x00300902, 0x00501908, 11 },
143218822Sdim  {       8191, 0x00080041, 0x00180241, 12 },
144218822Sdim  {      16381, 0x000c0091, 0x00140191, 13 },
145218822Sdim  {      32749, 0x002605a5, 0x002a06e6, 14 },
146218822Sdim  {      65521, 0x000f00e2, 0x00110122, 15 },
147218822Sdim  {     131071, 0x00008001, 0x00018003, 16 },
148218822Sdim  {     262139, 0x00014002, 0x0001c004, 17 },
149218822Sdim  {     524287, 0x00002001, 0x00006001, 18 },
150218822Sdim  {    1048573, 0x00003001, 0x00005001, 19 },
151218822Sdim  {    2097143, 0x00004801, 0x00005801, 20 },
152218822Sdim  {    4194301, 0x00000c01, 0x00001401, 21 },
153218822Sdim  {    8388593, 0x00001e01, 0x00002201, 22 },
154218822Sdim  {   16777213, 0x00000301, 0x00000501, 23 },
155218822Sdim  {   33554393, 0x00001381, 0x00001481, 24 },
156218822Sdim  {   67108859, 0x00000141, 0x000001c1, 25 },
157218822Sdim  {  134217689, 0x000004e1, 0x00000521, 26 },
158218822Sdim  {  268435399, 0x00000391, 0x000003b1, 27 },
159218822Sdim  {  536870909, 0x00000019, 0x00000029, 28 },
160218822Sdim  { 1073741789, 0x0000008d, 0x00000095, 29 },
161218822Sdim  { 2147483647, 0x00000003, 0x00000007, 30 },
162218822Sdim  /* Avoid "decimal constant so large it is unsigned" for 4294967291.  */
163218822Sdim  { 0xfffffffb, 0x00000006, 0x00000008, 31 }
164218822Sdim};
165218822Sdim
166218822Sdim/* The following function returns an index into the above table of the
167218822Sdim   nearest prime number which is greater than N, and near a power of two. */
168218822Sdim
169218822Sdimstatic unsigned int
170218822Sdimhigher_prime_index (unsigned long n)
171218822Sdim{
172218822Sdim  unsigned int low = 0;
173218822Sdim  unsigned int high = sizeof(prime_tab) / sizeof(prime_tab[0]);
174218822Sdim
17577298Sobrien  while (low != high)
17660484Sobrien    {
177218822Sdim      unsigned int mid = low + (high - low) / 2;
178218822Sdim      if (n > prime_tab[mid].prime)
17977298Sobrien	low = mid + 1;
18077298Sobrien      else
18177298Sobrien	high = mid;
18260484Sobrien    }
18360484Sobrien
18477298Sobrien  /* If we've run out of primes, abort.  */
185218822Sdim  if (n > prime_tab[low].prime)
18677298Sobrien    {
18777298Sobrien      fprintf (stderr, "Cannot find prime bigger than %lu\n", n);
18877298Sobrien      abort ();
18977298Sobrien    }
19077298Sobrien
191218822Sdim  return low;
19260484Sobrien}
19360484Sobrien
19477298Sobrien/* Returns a hash code for P.  */
19577298Sobrien
19677298Sobrienstatic hashval_t
197218822Sdimhash_pointer (const PTR p)
19877298Sobrien{
19977298Sobrien  return (hashval_t) ((long)p >> 3);
20077298Sobrien}
20177298Sobrien
20277298Sobrien/* Returns non-zero if P1 and P2 are equal.  */
20377298Sobrien
20477298Sobrienstatic int
205218822Sdimeq_pointer (const PTR p1, const PTR p2)
20677298Sobrien{
20777298Sobrien  return p1 == p2;
20877298Sobrien}
20977298Sobrien
210218822Sdim
211218822Sdim/* The parens around the function names in the next two definitions
212218822Sdim   are essential in order to prevent macro expansions of the name.
213218822Sdim   The bodies, however, are expanded as expected, so they are not
214218822Sdim   recursive definitions.  */
215218822Sdim
216218822Sdim/* Return the current size of given hash table.  */
217218822Sdim
218218822Sdim#define htab_size(htab)  ((htab)->size)
219218822Sdim
220218822Sdimsize_t
221218822Sdim(htab_size) (htab_t htab)
222218822Sdim{
223218822Sdim  return htab_size (htab);
224218822Sdim}
225218822Sdim
226218822Sdim/* Return the current number of elements in given hash table. */
227218822Sdim
228218822Sdim#define htab_elements(htab)  ((htab)->n_elements - (htab)->n_deleted)
229218822Sdim
230218822Sdimsize_t
231218822Sdim(htab_elements) (htab_t htab)
232218822Sdim{
233218822Sdim  return htab_elements (htab);
234218822Sdim}
235218822Sdim
236218822Sdim/* Return X % Y.  */
237218822Sdim
238218822Sdimstatic inline hashval_t
239218822Sdimhtab_mod_1 (hashval_t x, hashval_t y, hashval_t inv, int shift)
240218822Sdim{
241218822Sdim  /* The multiplicative inverses computed above are for 32-bit types, and
242218822Sdim     requires that we be able to compute a highpart multiply.  */
243218822Sdim#ifdef UNSIGNED_64BIT_TYPE
244218822Sdim  __extension__ typedef UNSIGNED_64BIT_TYPE ull;
245218822Sdim  if (sizeof (hashval_t) * CHAR_BIT <= 32)
246218822Sdim    {
247218822Sdim      hashval_t t1, t2, t3, t4, q, r;
248218822Sdim
249218822Sdim      t1 = ((ull)x * inv) >> 32;
250218822Sdim      t2 = x - t1;
251218822Sdim      t3 = t2 >> 1;
252218822Sdim      t4 = t1 + t3;
253218822Sdim      q  = t4 >> shift;
254218822Sdim      r  = x - (q * y);
255218822Sdim
256218822Sdim      return r;
257218822Sdim    }
258218822Sdim#endif
259218822Sdim
260218822Sdim  /* Otherwise just use the native division routines.  */
261218822Sdim  return x % y;
262218822Sdim}
263218822Sdim
264218822Sdim/* Compute the primary hash for HASH given HTAB's current size.  */
265218822Sdim
266218822Sdimstatic inline hashval_t
267218822Sdimhtab_mod (hashval_t hash, htab_t htab)
268218822Sdim{
269218822Sdim  const struct prime_ent *p = &prime_tab[htab->size_prime_index];
270218822Sdim  return htab_mod_1 (hash, p->prime, p->inv, p->shift);
271218822Sdim}
272218822Sdim
273218822Sdim/* Compute the secondary hash for HASH given HTAB's current size.  */
274218822Sdim
275218822Sdimstatic inline hashval_t
276218822Sdimhtab_mod_m2 (hashval_t hash, htab_t htab)
277218822Sdim{
278218822Sdim  const struct prime_ent *p = &prime_tab[htab->size_prime_index];
279218822Sdim  return 1 + htab_mod_1 (hash, p->prime - 2, p->inv_m2, p->shift);
280218822Sdim}
281218822Sdim
28260484Sobrien/* This function creates table with length slightly longer than given
28360484Sobrien   source length.  Created hash table is initiated as empty (all the
284218822Sdim   hash table entries are HTAB_EMPTY_ENTRY).  The function returns the
285104834Sobrien   created hash table, or NULL if memory allocation fails.  */
28660484Sobrien
28760484Sobrienhtab_t
288218822Sdimhtab_create_alloc (size_t size, htab_hash hash_f, htab_eq eq_f,
289218822Sdim                   htab_del del_f, htab_alloc alloc_f, htab_free free_f)
29060484Sobrien{
29160484Sobrien  htab_t result;
292218822Sdim  unsigned int size_prime_index;
29360484Sobrien
294218822Sdim  size_prime_index = higher_prime_index (size);
295218822Sdim  size = prime_tab[size_prime_index].prime;
296218822Sdim
297104834Sobrien  result = (htab_t) (*alloc_f) (1, sizeof (struct htab));
298104834Sobrien  if (result == NULL)
299104834Sobrien    return NULL;
300104834Sobrien  result->entries = (PTR *) (*alloc_f) (size, sizeof (PTR));
301104834Sobrien  if (result->entries == NULL)
302104834Sobrien    {
303104834Sobrien      if (free_f != NULL)
304104834Sobrien	(*free_f) (result);
305104834Sobrien      return NULL;
306104834Sobrien    }
30760484Sobrien  result->size = size;
308218822Sdim  result->size_prime_index = size_prime_index;
30960484Sobrien  result->hash_f = hash_f;
31060484Sobrien  result->eq_f = eq_f;
31160484Sobrien  result->del_f = del_f;
312104834Sobrien  result->alloc_f = alloc_f;
313104834Sobrien  result->free_f = free_f;
31460484Sobrien  return result;
31560484Sobrien}
31660484Sobrien
317130561Sobrien/* As above, but use the variants of alloc_f and free_f which accept
318130561Sobrien   an extra argument.  */
319130561Sobrien
320130561Sobrienhtab_t
321218822Sdimhtab_create_alloc_ex (size_t size, htab_hash hash_f, htab_eq eq_f,
322218822Sdim                      htab_del del_f, void *alloc_arg,
323218822Sdim                      htab_alloc_with_arg alloc_f,
324218822Sdim		      htab_free_with_arg free_f)
325130561Sobrien{
326130561Sobrien  htab_t result;
327218822Sdim  unsigned int size_prime_index;
328130561Sobrien
329218822Sdim  size_prime_index = higher_prime_index (size);
330218822Sdim  size = prime_tab[size_prime_index].prime;
331218822Sdim
332130561Sobrien  result = (htab_t) (*alloc_f) (alloc_arg, 1, sizeof (struct htab));
333130561Sobrien  if (result == NULL)
334130561Sobrien    return NULL;
335130561Sobrien  result->entries = (PTR *) (*alloc_f) (alloc_arg, size, sizeof (PTR));
336130561Sobrien  if (result->entries == NULL)
337130561Sobrien    {
338130561Sobrien      if (free_f != NULL)
339130561Sobrien	(*free_f) (alloc_arg, result);
340130561Sobrien      return NULL;
341130561Sobrien    }
342130561Sobrien  result->size = size;
343218822Sdim  result->size_prime_index = size_prime_index;
344130561Sobrien  result->hash_f = hash_f;
345130561Sobrien  result->eq_f = eq_f;
346130561Sobrien  result->del_f = del_f;
347130561Sobrien  result->alloc_arg = alloc_arg;
348130561Sobrien  result->alloc_with_arg_f = alloc_f;
349130561Sobrien  result->free_with_arg_f = free_f;
350130561Sobrien  return result;
351130561Sobrien}
352130561Sobrien
353130561Sobrien/* Update the function pointers and allocation parameter in the htab_t.  */
354130561Sobrien
355130561Sobrienvoid
356218822Sdimhtab_set_functions_ex (htab_t htab, htab_hash hash_f, htab_eq eq_f,
357218822Sdim                       htab_del del_f, PTR alloc_arg,
358218822Sdim                       htab_alloc_with_arg alloc_f, htab_free_with_arg free_f)
359130561Sobrien{
360130561Sobrien  htab->hash_f = hash_f;
361130561Sobrien  htab->eq_f = eq_f;
362130561Sobrien  htab->del_f = del_f;
363130561Sobrien  htab->alloc_arg = alloc_arg;
364130561Sobrien  htab->alloc_with_arg_f = alloc_f;
365130561Sobrien  htab->free_with_arg_f = free_f;
366130561Sobrien}
367130561Sobrien
368104834Sobrien/* These functions exist solely for backward compatibility.  */
36977298Sobrien
370104834Sobrien#undef htab_create
37177298Sobrienhtab_t
372218822Sdimhtab_create (size_t size, htab_hash hash_f, htab_eq eq_f, htab_del del_f)
373104834Sobrien{
374104834Sobrien  return htab_create_alloc (size, hash_f, eq_f, del_f, xcalloc, free);
375104834Sobrien}
376104834Sobrien
377104834Sobrienhtab_t
378218822Sdimhtab_try_create (size_t size, htab_hash hash_f, htab_eq eq_f, htab_del del_f)
37977298Sobrien{
380104834Sobrien  return htab_create_alloc (size, hash_f, eq_f, del_f, calloc, free);
38177298Sobrien}
38277298Sobrien
38360484Sobrien/* This function frees all memory allocated for given hash table.
38460484Sobrien   Naturally the hash table must already exist. */
38560484Sobrien
38660484Sobrienvoid
387218822Sdimhtab_delete (htab_t htab)
38860484Sobrien{
389218822Sdim  size_t size = htab_size (htab);
390218822Sdim  PTR *entries = htab->entries;
39160484Sobrien  int i;
39277298Sobrien
39360484Sobrien  if (htab->del_f)
394218822Sdim    for (i = size - 1; i >= 0; i--)
395218822Sdim      if (entries[i] != HTAB_EMPTY_ENTRY && entries[i] != HTAB_DELETED_ENTRY)
396218822Sdim	(*htab->del_f) (entries[i]);
39760484Sobrien
398104834Sobrien  if (htab->free_f != NULL)
399104834Sobrien    {
400218822Sdim      (*htab->free_f) (entries);
401104834Sobrien      (*htab->free_f) (htab);
402104834Sobrien    }
403130561Sobrien  else if (htab->free_with_arg_f != NULL)
404130561Sobrien    {
405218822Sdim      (*htab->free_with_arg_f) (htab->alloc_arg, entries);
406130561Sobrien      (*htab->free_with_arg_f) (htab->alloc_arg, htab);
407130561Sobrien    }
40860484Sobrien}
40960484Sobrien
41060484Sobrien/* This function clears all entries in the given hash table.  */
41160484Sobrien
41260484Sobrienvoid
413218822Sdimhtab_empty (htab_t htab)
41460484Sobrien{
415218822Sdim  size_t size = htab_size (htab);
416218822Sdim  PTR *entries = htab->entries;
41760484Sobrien  int i;
41877298Sobrien
41960484Sobrien  if (htab->del_f)
420218822Sdim    for (i = size - 1; i >= 0; i--)
421218822Sdim      if (entries[i] != HTAB_EMPTY_ENTRY && entries[i] != HTAB_DELETED_ENTRY)
422218822Sdim	(*htab->del_f) (entries[i]);
42360484Sobrien
424218822Sdim  /* Instead of clearing megabyte, downsize the table.  */
425218822Sdim  if (size > 1024*1024 / sizeof (PTR))
426218822Sdim    {
427218822Sdim      int nindex = higher_prime_index (1024 / sizeof (PTR));
428218822Sdim      int nsize = prime_tab[nindex].prime;
429218822Sdim
430218822Sdim      if (htab->free_f != NULL)
431218822Sdim	(*htab->free_f) (htab->entries);
432218822Sdim      else if (htab->free_with_arg_f != NULL)
433218822Sdim	(*htab->free_with_arg_f) (htab->alloc_arg, htab->entries);
434218822Sdim      if (htab->alloc_with_arg_f != NULL)
435218822Sdim	htab->entries = (PTR *) (*htab->alloc_with_arg_f) (htab->alloc_arg, nsize,
436218822Sdim						           sizeof (PTR *));
437218822Sdim      else
438218822Sdim	htab->entries = (PTR *) (*htab->alloc_f) (nsize, sizeof (PTR *));
439218822Sdim     htab->size = nsize;
440218822Sdim     htab->size_prime_index = nindex;
441218822Sdim    }
442218822Sdim  else
443218822Sdim    memset (entries, 0, size * sizeof (PTR));
444218822Sdim  htab->n_deleted = 0;
445218822Sdim  htab->n_elements = 0;
44660484Sobrien}
44760484Sobrien
44860484Sobrien/* Similar to htab_find_slot, but without several unwanted side effects:
44960484Sobrien    - Does not call htab->eq_f when it finds an existing entry.
45060484Sobrien    - Does not change the count of elements/searches/collisions in the
45160484Sobrien      hash table.
45260484Sobrien   This function also assumes there are no deleted entries in the table.
45360484Sobrien   HASH is the hash value for the element to be inserted.  */
45477298Sobrien
45577298Sobrienstatic PTR *
456218822Sdimfind_empty_slot_for_expand (htab_t htab, hashval_t hash)
45760484Sobrien{
458218822Sdim  hashval_t index = htab_mod (hash, htab);
459218822Sdim  size_t size = htab_size (htab);
460104834Sobrien  PTR *slot = htab->entries + index;
461104834Sobrien  hashval_t hash2;
46260484Sobrien
463218822Sdim  if (*slot == HTAB_EMPTY_ENTRY)
464104834Sobrien    return slot;
465218822Sdim  else if (*slot == HTAB_DELETED_ENTRY)
466104834Sobrien    abort ();
467104834Sobrien
468218822Sdim  hash2 = htab_mod_m2 (hash, htab);
46960484Sobrien  for (;;)
47060484Sobrien    {
471104834Sobrien      index += hash2;
472104834Sobrien      if (index >= size)
473104834Sobrien	index -= size;
47477298Sobrien
475104834Sobrien      slot = htab->entries + index;
476218822Sdim      if (*slot == HTAB_EMPTY_ENTRY)
47760484Sobrien	return slot;
478218822Sdim      else if (*slot == HTAB_DELETED_ENTRY)
47960484Sobrien	abort ();
48060484Sobrien    }
48160484Sobrien}
48260484Sobrien
48360484Sobrien/* The following function changes size of memory allocated for the
48460484Sobrien   entries and repeatedly inserts the table elements.  The occupancy
48560484Sobrien   of the table after the call will be about 50%.  Naturally the hash
48660484Sobrien   table must already exist.  Remember also that the place of the
48777298Sobrien   table entries is changed.  If memory allocation failures are allowed,
48877298Sobrien   this function will return zero, indicating that the table could not be
48977298Sobrien   expanded.  If all goes well, it will return a non-zero value.  */
49060484Sobrien
49177298Sobrienstatic int
492218822Sdimhtab_expand (htab_t htab)
49360484Sobrien{
49477298Sobrien  PTR *oentries;
49577298Sobrien  PTR *olimit;
49677298Sobrien  PTR *p;
497104834Sobrien  PTR *nentries;
498218822Sdim  size_t nsize, osize, elts;
499218822Sdim  unsigned int oindex, nindex;
50060484Sobrien
50160484Sobrien  oentries = htab->entries;
502218822Sdim  oindex = htab->size_prime_index;
503218822Sdim  osize = htab->size;
504218822Sdim  olimit = oentries + osize;
505218822Sdim  elts = htab_elements (htab);
50660484Sobrien
507130561Sobrien  /* Resize only when table after removal of unused elements is either
508130561Sobrien     too full or too empty.  */
509218822Sdim  if (elts * 2 > osize || (elts * 8 < osize && osize > 32))
510218822Sdim    {
511218822Sdim      nindex = higher_prime_index (elts * 2);
512218822Sdim      nsize = prime_tab[nindex].prime;
513218822Sdim    }
514130561Sobrien  else
515218822Sdim    {
516218822Sdim      nindex = oindex;
517218822Sdim      nsize = osize;
518218822Sdim    }
51960484Sobrien
520130561Sobrien  if (htab->alloc_with_arg_f != NULL)
521130561Sobrien    nentries = (PTR *) (*htab->alloc_with_arg_f) (htab->alloc_arg, nsize,
522130561Sobrien						  sizeof (PTR *));
523130561Sobrien  else
524130561Sobrien    nentries = (PTR *) (*htab->alloc_f) (nsize, sizeof (PTR *));
525104834Sobrien  if (nentries == NULL)
526104834Sobrien    return 0;
527104834Sobrien  htab->entries = nentries;
528130561Sobrien  htab->size = nsize;
529218822Sdim  htab->size_prime_index = nindex;
53060484Sobrien  htab->n_elements -= htab->n_deleted;
53160484Sobrien  htab->n_deleted = 0;
53260484Sobrien
53360484Sobrien  p = oentries;
53460484Sobrien  do
53560484Sobrien    {
53677298Sobrien      PTR x = *p;
53777298Sobrien
538218822Sdim      if (x != HTAB_EMPTY_ENTRY && x != HTAB_DELETED_ENTRY)
53960484Sobrien	{
54077298Sobrien	  PTR *q = find_empty_slot_for_expand (htab, (*htab->hash_f) (x));
54177298Sobrien
54260484Sobrien	  *q = x;
54360484Sobrien	}
54477298Sobrien
54560484Sobrien      p++;
54660484Sobrien    }
54760484Sobrien  while (p < olimit);
54877298Sobrien
549104834Sobrien  if (htab->free_f != NULL)
550104834Sobrien    (*htab->free_f) (oentries);
551130561Sobrien  else if (htab->free_with_arg_f != NULL)
552130561Sobrien    (*htab->free_with_arg_f) (htab->alloc_arg, oentries);
55377298Sobrien  return 1;
55460484Sobrien}
55560484Sobrien
55660484Sobrien/* This function searches for a hash table entry equal to the given
55760484Sobrien   element.  It cannot be used to insert or delete an element.  */
55860484Sobrien
55977298SobrienPTR
560218822Sdimhtab_find_with_hash (htab_t htab, const PTR element, hashval_t hash)
56160484Sobrien{
562218822Sdim  hashval_t index, hash2;
56360484Sobrien  size_t size;
56477298Sobrien  PTR entry;
56560484Sobrien
56660484Sobrien  htab->searches++;
567218822Sdim  size = htab_size (htab);
568218822Sdim  index = htab_mod (hash, htab);
56960484Sobrien
57077298Sobrien  entry = htab->entries[index];
571218822Sdim  if (entry == HTAB_EMPTY_ENTRY
572218822Sdim      || (entry != HTAB_DELETED_ENTRY && (*htab->eq_f) (entry, element)))
57377298Sobrien    return entry;
57477298Sobrien
575218822Sdim  hash2 = htab_mod_m2 (hash, htab);
57660484Sobrien  for (;;)
57760484Sobrien    {
57860484Sobrien      htab->collisions++;
57960484Sobrien      index += hash2;
58060484Sobrien      if (index >= size)
58160484Sobrien	index -= size;
58277298Sobrien
58377298Sobrien      entry = htab->entries[index];
584218822Sdim      if (entry == HTAB_EMPTY_ENTRY
585218822Sdim	  || (entry != HTAB_DELETED_ENTRY && (*htab->eq_f) (entry, element)))
58677298Sobrien	return entry;
58760484Sobrien    }
58860484Sobrien}
58960484Sobrien
59060484Sobrien/* Like htab_find_slot_with_hash, but compute the hash value from the
59160484Sobrien   element.  */
59277298Sobrien
59377298SobrienPTR
594218822Sdimhtab_find (htab_t htab, const PTR element)
59560484Sobrien{
59660484Sobrien  return htab_find_with_hash (htab, element, (*htab->hash_f) (element));
59760484Sobrien}
59860484Sobrien
59960484Sobrien/* This function searches for a hash table slot containing an entry
60060484Sobrien   equal to the given element.  To delete an entry, call this with
601218822Sdim   insert=NO_INSERT, then call htab_clear_slot on the slot returned
602218822Sdim   (possibly after doing some checks).  To insert an entry, call this
603218822Sdim   with insert=INSERT, then write the value you want into the returned
604218822Sdim   slot.  When inserting an entry, NULL may be returned if memory
605218822Sdim   allocation fails.  */
60660484Sobrien
60777298SobrienPTR *
608218822Sdimhtab_find_slot_with_hash (htab_t htab, const PTR element,
609218822Sdim                          hashval_t hash, enum insert_option insert)
61060484Sobrien{
61177298Sobrien  PTR *first_deleted_slot;
612218822Sdim  hashval_t index, hash2;
61360484Sobrien  size_t size;
614104834Sobrien  PTR entry;
61560484Sobrien
616218822Sdim  size = htab_size (htab);
617218822Sdim  if (insert == INSERT && size * 3 <= htab->n_elements * 4)
618218822Sdim    {
619218822Sdim      if (htab_expand (htab) == 0)
620218822Sdim	return NULL;
621218822Sdim      size = htab_size (htab);
622218822Sdim    }
62360484Sobrien
624218822Sdim  index = htab_mod (hash, htab);
62560484Sobrien
62660484Sobrien  htab->searches++;
62760484Sobrien  first_deleted_slot = NULL;
62860484Sobrien
629104834Sobrien  entry = htab->entries[index];
630218822Sdim  if (entry == HTAB_EMPTY_ENTRY)
631104834Sobrien    goto empty_entry;
632218822Sdim  else if (entry == HTAB_DELETED_ENTRY)
633104834Sobrien    first_deleted_slot = &htab->entries[index];
634104834Sobrien  else if ((*htab->eq_f) (entry, element))
635104834Sobrien    return &htab->entries[index];
636104834Sobrien
637218822Sdim  hash2 = htab_mod_m2 (hash, htab);
63860484Sobrien  for (;;)
63960484Sobrien    {
640104834Sobrien      htab->collisions++;
641104834Sobrien      index += hash2;
642104834Sobrien      if (index >= size)
643104834Sobrien	index -= size;
644104834Sobrien
645104834Sobrien      entry = htab->entries[index];
646218822Sdim      if (entry == HTAB_EMPTY_ENTRY)
647104834Sobrien	goto empty_entry;
648218822Sdim      else if (entry == HTAB_DELETED_ENTRY)
64960484Sobrien	{
65060484Sobrien	  if (!first_deleted_slot)
65160484Sobrien	    first_deleted_slot = &htab->entries[index];
65260484Sobrien	}
653104834Sobrien      else if ((*htab->eq_f) (entry, element))
65477298Sobrien	return &htab->entries[index];
65560484Sobrien    }
656104834Sobrien
657104834Sobrien empty_entry:
658104834Sobrien  if (insert == NO_INSERT)
659104834Sobrien    return NULL;
660104834Sobrien
661104834Sobrien  if (first_deleted_slot)
662104834Sobrien    {
663130561Sobrien      htab->n_deleted--;
664218822Sdim      *first_deleted_slot = HTAB_EMPTY_ENTRY;
665104834Sobrien      return first_deleted_slot;
666104834Sobrien    }
667104834Sobrien
668130561Sobrien  htab->n_elements++;
669104834Sobrien  return &htab->entries[index];
67060484Sobrien}
67160484Sobrien
67260484Sobrien/* Like htab_find_slot_with_hash, but compute the hash value from the
67360484Sobrien   element.  */
67477298Sobrien
67577298SobrienPTR *
676218822Sdimhtab_find_slot (htab_t htab, const PTR element, enum insert_option insert)
67760484Sobrien{
67860484Sobrien  return htab_find_slot_with_hash (htab, element, (*htab->hash_f) (element),
67960484Sobrien				   insert);
68060484Sobrien}
68160484Sobrien
68260484Sobrien/* This function deletes an element with the given value from hash
683218822Sdim   table (the hash is computed from the element).  If there is no matching
684218822Sdim   element in the hash table, this function does nothing.  */
685218822Sdim
686218822Sdimvoid
687218822Sdimhtab_remove_elt (htab_t htab, PTR element)
688218822Sdim{
689218822Sdim  htab_remove_elt_with_hash (htab, element, (*htab->hash_f) (element));
690218822Sdim}
691218822Sdim
692218822Sdim
693218822Sdim/* This function deletes an element with the given value from hash
69460484Sobrien   table.  If there is no matching element in the hash table, this
69560484Sobrien   function does nothing.  */
69660484Sobrien
69760484Sobrienvoid
698218822Sdimhtab_remove_elt_with_hash (htab_t htab, PTR element, hashval_t hash)
69960484Sobrien{
70077298Sobrien  PTR *slot;
70160484Sobrien
702218822Sdim  slot = htab_find_slot_with_hash (htab, element, hash, NO_INSERT);
703218822Sdim  if (*slot == HTAB_EMPTY_ENTRY)
70460484Sobrien    return;
70560484Sobrien
70660484Sobrien  if (htab->del_f)
70760484Sobrien    (*htab->del_f) (*slot);
70860484Sobrien
709218822Sdim  *slot = HTAB_DELETED_ENTRY;
71060484Sobrien  htab->n_deleted++;
71160484Sobrien}
71260484Sobrien
71360484Sobrien/* This function clears a specified slot in a hash table.  It is
71460484Sobrien   useful when you've already done the lookup and don't want to do it
71560484Sobrien   again.  */
71660484Sobrien
71760484Sobrienvoid
718218822Sdimhtab_clear_slot (htab_t htab, PTR *slot)
71960484Sobrien{
720218822Sdim  if (slot < htab->entries || slot >= htab->entries + htab_size (htab)
721218822Sdim      || *slot == HTAB_EMPTY_ENTRY || *slot == HTAB_DELETED_ENTRY)
72260484Sobrien    abort ();
72377298Sobrien
72460484Sobrien  if (htab->del_f)
72560484Sobrien    (*htab->del_f) (*slot);
72677298Sobrien
727218822Sdim  *slot = HTAB_DELETED_ENTRY;
72860484Sobrien  htab->n_deleted++;
72960484Sobrien}
73060484Sobrien
73160484Sobrien/* This function scans over the entire hash table calling
73260484Sobrien   CALLBACK for each live entry.  If CALLBACK returns false,
73360484Sobrien   the iteration stops.  INFO is passed as CALLBACK's second
73460484Sobrien   argument.  */
73560484Sobrien
73660484Sobrienvoid
737218822Sdimhtab_traverse_noresize (htab_t htab, htab_trav callback, PTR info)
73860484Sobrien{
739130561Sobrien  PTR *slot;
740130561Sobrien  PTR *limit;
741218822Sdim
742130561Sobrien  slot = htab->entries;
743218822Sdim  limit = slot + htab_size (htab);
744130561Sobrien
74560484Sobrien  do
74660484Sobrien    {
74777298Sobrien      PTR x = *slot;
74877298Sobrien
749218822Sdim      if (x != HTAB_EMPTY_ENTRY && x != HTAB_DELETED_ENTRY)
75060484Sobrien	if (!(*callback) (slot, info))
75160484Sobrien	  break;
75260484Sobrien    }
75360484Sobrien  while (++slot < limit);
75460484Sobrien}
75560484Sobrien
756130561Sobrien/* Like htab_traverse_noresize, but does resize the table when it is
757130561Sobrien   too empty to improve effectivity of subsequent calls.  */
758130561Sobrien
759130561Sobrienvoid
760218822Sdimhtab_traverse (htab_t htab, htab_trav callback, PTR info)
761130561Sobrien{
762218822Sdim  if (htab_elements (htab) * 8 < htab_size (htab))
763130561Sobrien    htab_expand (htab);
764130561Sobrien
765130561Sobrien  htab_traverse_noresize (htab, callback, info);
766130561Sobrien}
767130561Sobrien
76877298Sobrien/* Return the fraction of fixed collisions during all work with given
76977298Sobrien   hash table. */
77060484Sobrien
77160484Sobriendouble
772218822Sdimhtab_collisions (htab_t htab)
77360484Sobrien{
77477298Sobrien  if (htab->searches == 0)
77577298Sobrien    return 0.0;
77660484Sobrien
77777298Sobrien  return (double) htab->collisions / (double) htab->searches;
77860484Sobrien}
77989857Sobrien
78089857Sobrien/* Hash P as a null-terminated string.
78189857Sobrien
78289857Sobrien   Copied from gcc/hashtable.c.  Zack had the following to say with respect
78389857Sobrien   to applicability, though note that unlike hashtable.c, this hash table
78489857Sobrien   implementation re-hashes rather than chain buckets.
78589857Sobrien
78689857Sobrien   http://gcc.gnu.org/ml/gcc-patches/2001-08/msg01021.html
78789857Sobrien   From: Zack Weinberg <zackw@panix.com>
78889857Sobrien   Date: Fri, 17 Aug 2001 02:15:56 -0400
78989857Sobrien
79089857Sobrien   I got it by extracting all the identifiers from all the source code
79189857Sobrien   I had lying around in mid-1999, and testing many recurrences of
79289857Sobrien   the form "H_n = H_{n-1} * K + c_n * L + M" where K, L, M were either
79389857Sobrien   prime numbers or the appropriate identity.  This was the best one.
79489857Sobrien   I don't remember exactly what constituted "best", except I was
79589857Sobrien   looking at bucket-length distributions mostly.
79689857Sobrien
79789857Sobrien   So it should be very good at hashing identifiers, but might not be
79889857Sobrien   as good at arbitrary strings.
79989857Sobrien
80089857Sobrien   I'll add that it thoroughly trounces the hash functions recommended
80189857Sobrien   for this use at http://burtleburtle.net/bob/hash/index.html, both
80289857Sobrien   on speed and bucket distribution.  I haven't tried it against the
80389857Sobrien   function they just started using for Perl's hashes.  */
80489857Sobrien
80589857Sobrienhashval_t
806218822Sdimhtab_hash_string (const PTR p)
80789857Sobrien{
80889857Sobrien  const unsigned char *str = (const unsigned char *) p;
80989857Sobrien  hashval_t r = 0;
81089857Sobrien  unsigned char c;
81189857Sobrien
81289857Sobrien  while ((c = *str++) != 0)
81389857Sobrien    r = r * 67 + c - 113;
81489857Sobrien
81589857Sobrien  return r;
81689857Sobrien}
817130561Sobrien
818130561Sobrien/* DERIVED FROM:
819130561Sobrien--------------------------------------------------------------------
820130561Sobrienlookup2.c, by Bob Jenkins, December 1996, Public Domain.
821130561Sobrienhash(), hash2(), hash3, and mix() are externally useful functions.
822130561SobrienRoutines to test the hash are included if SELF_TEST is defined.
823130561SobrienYou can use this free for any purpose.  It has no warranty.
824130561Sobrien--------------------------------------------------------------------
825130561Sobrien*/
826130561Sobrien
827130561Sobrien/*
828130561Sobrien--------------------------------------------------------------------
829130561Sobrienmix -- mix 3 32-bit values reversibly.
830130561SobrienFor every delta with one or two bit set, and the deltas of all three
831130561Sobrien  high bits or all three low bits, whether the original value of a,b,c
832130561Sobrien  is almost all zero or is uniformly distributed,
833130561Sobrien* If mix() is run forward or backward, at least 32 bits in a,b,c
834130561Sobrien  have at least 1/4 probability of changing.
835130561Sobrien* If mix() is run forward, every bit of c will change between 1/3 and
836130561Sobrien  2/3 of the time.  (Well, 22/100 and 78/100 for some 2-bit deltas.)
837130561Sobrienmix() was built out of 36 single-cycle latency instructions in a
838130561Sobrien  structure that could supported 2x parallelism, like so:
839130561Sobrien      a -= b;
840130561Sobrien      a -= c; x = (c>>13);
841130561Sobrien      b -= c; a ^= x;
842130561Sobrien      b -= a; x = (a<<8);
843130561Sobrien      c -= a; b ^= x;
844130561Sobrien      c -= b; x = (b>>13);
845130561Sobrien      ...
846130561Sobrien  Unfortunately, superscalar Pentiums and Sparcs can't take advantage
847130561Sobrien  of that parallelism.  They've also turned some of those single-cycle
848130561Sobrien  latency instructions into multi-cycle latency instructions.  Still,
849130561Sobrien  this is the fastest good hash I could find.  There were about 2^^68
850130561Sobrien  to choose from.  I only looked at a billion or so.
851130561Sobrien--------------------------------------------------------------------
852130561Sobrien*/
853130561Sobrien/* same, but slower, works on systems that might have 8 byte hashval_t's */
854130561Sobrien#define mix(a,b,c) \
855130561Sobrien{ \
856130561Sobrien  a -= b; a -= c; a ^= (c>>13); \
857130561Sobrien  b -= c; b -= a; b ^= (a<< 8); \
858130561Sobrien  c -= a; c -= b; c ^= ((b&0xffffffff)>>13); \
859130561Sobrien  a -= b; a -= c; a ^= ((c&0xffffffff)>>12); \
860130561Sobrien  b -= c; b -= a; b = (b ^ (a<<16)) & 0xffffffff; \
861130561Sobrien  c -= a; c -= b; c = (c ^ (b>> 5)) & 0xffffffff; \
862130561Sobrien  a -= b; a -= c; a = (a ^ (c>> 3)) & 0xffffffff; \
863130561Sobrien  b -= c; b -= a; b = (b ^ (a<<10)) & 0xffffffff; \
864130561Sobrien  c -= a; c -= b; c = (c ^ (b>>15)) & 0xffffffff; \
865130561Sobrien}
866130561Sobrien
867130561Sobrien/*
868130561Sobrien--------------------------------------------------------------------
869130561Sobrienhash() -- hash a variable-length key into a 32-bit value
870130561Sobrien  k     : the key (the unaligned variable-length array of bytes)
871130561Sobrien  len   : the length of the key, counting by bytes
872130561Sobrien  level : can be any 4-byte value
873130561SobrienReturns a 32-bit value.  Every bit of the key affects every bit of
874130561Sobrienthe return value.  Every 1-bit and 2-bit delta achieves avalanche.
875130561SobrienAbout 36+6len instructions.
876130561Sobrien
877130561SobrienThe best hash table sizes are powers of 2.  There is no need to do
878130561Sobrienmod a prime (mod is sooo slow!).  If you need less than 32 bits,
879130561Sobrienuse a bitmask.  For example, if you need only 10 bits, do
880130561Sobrien  h = (h & hashmask(10));
881130561SobrienIn which case, the hash table should have hashsize(10) elements.
882130561Sobrien
883130561SobrienIf you are hashing n strings (ub1 **)k, do it like this:
884130561Sobrien  for (i=0, h=0; i<n; ++i) h = hash( k[i], len[i], h);
885130561Sobrien
886130561SobrienBy Bob Jenkins, 1996.  bob_jenkins@burtleburtle.net.  You may use this
887130561Sobriencode any way you wish, private, educational, or commercial.  It's free.
888130561Sobrien
889130561SobrienSee http://burtleburtle.net/bob/hash/evahash.html
890130561SobrienUse for hash table lookup, or anything where one collision in 2^32 is
891130561Sobrienacceptable.  Do NOT use for cryptographic purposes.
892130561Sobrien--------------------------------------------------------------------
893130561Sobrien*/
894130561Sobrien
895218822Sdimhashval_t
896218822Sdimiterative_hash (const PTR k_in /* the key */,
897218822Sdim                register size_t  length /* the length of the key */,
898218822Sdim                register hashval_t initval /* the previous hash, or
899218822Sdim                                              an arbitrary value */)
900130561Sobrien{
901130561Sobrien  register const unsigned char *k = (const unsigned char *)k_in;
902130561Sobrien  register hashval_t a,b,c,len;
903130561Sobrien
904130561Sobrien  /* Set up the internal state */
905130561Sobrien  len = length;
906130561Sobrien  a = b = 0x9e3779b9;  /* the golden ratio; an arbitrary value */
907130561Sobrien  c = initval;           /* the previous hash value */
908130561Sobrien
909130561Sobrien  /*---------------------------------------- handle most of the key */
910130561Sobrien#ifndef WORDS_BIGENDIAN
911130561Sobrien  /* On a little-endian machine, if the data is 4-byte aligned we can hash
912130561Sobrien     by word for better speed.  This gives nondeterministic results on
913130561Sobrien     big-endian machines.  */
914130561Sobrien  if (sizeof (hashval_t) == 4 && (((size_t)k)&3) == 0)
915130561Sobrien    while (len >= 12)    /* aligned */
916130561Sobrien      {
917130561Sobrien	a += *(hashval_t *)(k+0);
918130561Sobrien	b += *(hashval_t *)(k+4);
919130561Sobrien	c += *(hashval_t *)(k+8);
920130561Sobrien	mix(a,b,c);
921130561Sobrien	k += 12; len -= 12;
922130561Sobrien      }
923130561Sobrien  else /* unaligned */
924130561Sobrien#endif
925130561Sobrien    while (len >= 12)
926130561Sobrien      {
927130561Sobrien	a += (k[0] +((hashval_t)k[1]<<8) +((hashval_t)k[2]<<16) +((hashval_t)k[3]<<24));
928130561Sobrien	b += (k[4] +((hashval_t)k[5]<<8) +((hashval_t)k[6]<<16) +((hashval_t)k[7]<<24));
929130561Sobrien	c += (k[8] +((hashval_t)k[9]<<8) +((hashval_t)k[10]<<16)+((hashval_t)k[11]<<24));
930130561Sobrien	mix(a,b,c);
931130561Sobrien	k += 12; len -= 12;
932130561Sobrien      }
933130561Sobrien
934130561Sobrien  /*------------------------------------- handle the last 11 bytes */
935130561Sobrien  c += length;
936130561Sobrien  switch(len)              /* all the case statements fall through */
937130561Sobrien    {
938130561Sobrien    case 11: c+=((hashval_t)k[10]<<24);
939130561Sobrien    case 10: c+=((hashval_t)k[9]<<16);
940130561Sobrien    case 9 : c+=((hashval_t)k[8]<<8);
941130561Sobrien      /* the first byte of c is reserved for the length */
942130561Sobrien    case 8 : b+=((hashval_t)k[7]<<24);
943130561Sobrien    case 7 : b+=((hashval_t)k[6]<<16);
944130561Sobrien    case 6 : b+=((hashval_t)k[5]<<8);
945130561Sobrien    case 5 : b+=k[4];
946130561Sobrien    case 4 : a+=((hashval_t)k[3]<<24);
947130561Sobrien    case 3 : a+=((hashval_t)k[2]<<16);
948130561Sobrien    case 2 : a+=((hashval_t)k[1]<<8);
949130561Sobrien    case 1 : a+=k[0];
950130561Sobrien      /* case 0: nothing left to add */
951130561Sobrien    }
952130561Sobrien  mix(a,b,c);
953130561Sobrien  /*-------------------------------------------- report the result */
954130561Sobrien  return c;
955130561Sobrien}
956