1/* hash - hashing table processing.
2
3   Copyright (C) 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2006 Free
4   Software Foundation, Inc.
5
6   Written by Jim Meyering, 1992.
7
8   This program is free software; you can redistribute it and/or modify
9   it under the terms of the GNU General Public License as published by
10   the Free Software Foundation; either version 2, or (at your option)
11   any later version.
12
13   This program is distributed in the hope that it will be useful,
14   but WITHOUT ANY WARRANTY; without even the implied warranty of
15   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16   GNU General Public License for more details.
17
18   You should have received a copy of the GNU General Public License
19   along with this program; if not, write to the Free Software Foundation,
20   Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.  */
21
22/* A generic hash table package.  */
23
24/* Define USE_OBSTACK to 1 if you want the allocator to use obstacks instead
25   of malloc.  If you change USE_OBSTACK, you have to recompile!  */
26
27#include <config.h>
28
29#include "hash.h"
30#include "xalloc.h"
31
32#include <limits.h>
33#include <stdio.h>
34#include <stdlib.h>
35
36#if USE_OBSTACK
37# include "obstack.h"
38# ifndef obstack_chunk_alloc
39#  define obstack_chunk_alloc malloc
40# endif
41# ifndef obstack_chunk_free
42#  define obstack_chunk_free free
43# endif
44#endif
45
46#ifndef SIZE_MAX
47# define SIZE_MAX ((size_t) -1)
48#endif
49
50struct hash_table
51  {
52    /* The array of buckets starts at BUCKET and extends to BUCKET_LIMIT-1,
53       for a possibility of N_BUCKETS.  Among those, N_BUCKETS_USED buckets
54       are not empty, there are N_ENTRIES active entries in the table.  */
55    struct hash_entry *bucket;
56    struct hash_entry const *bucket_limit;
57    size_t n_buckets;
58    size_t n_buckets_used;
59    size_t n_entries;
60
61    /* Tuning arguments, kept in a physicaly separate structure.  */
62    const Hash_tuning *tuning;
63
64    /* Three functions are given to `hash_initialize', see the documentation
65       block for this function.  In a word, HASHER randomizes a user entry
66       into a number up from 0 up to some maximum minus 1; COMPARATOR returns
67       true if two user entries compare equally; and DATA_FREER is the cleanup
68       function for a user entry.  */
69    Hash_hasher hasher;
70    Hash_comparator comparator;
71    Hash_data_freer data_freer;
72
73    /* A linked list of freed struct hash_entry structs.  */
74    struct hash_entry *free_entry_list;
75
76#if USE_OBSTACK
77    /* Whenever obstacks are used, it is possible to allocate all overflowed
78       entries into a single stack, so they all can be freed in a single
79       operation.  It is not clear if the speedup is worth the trouble.  */
80    struct obstack entry_stack;
81#endif
82  };
83
84/* A hash table contains many internal entries, each holding a pointer to
85   some user provided data (also called a user entry).  An entry indistinctly
86   refers to both the internal entry and its associated user entry.  A user
87   entry contents may be hashed by a randomization function (the hashing
88   function, or just `hasher' for short) into a number (or `slot') between 0
89   and the current table size.  At each slot position in the hash table,
90   starts a linked chain of entries for which the user data all hash to this
91   slot.  A bucket is the collection of all entries hashing to the same slot.
92
93   A good `hasher' function will distribute entries rather evenly in buckets.
94   In the ideal case, the length of each bucket is roughly the number of
95   entries divided by the table size.  Finding the slot for a data is usually
96   done in constant time by the `hasher', and the later finding of a precise
97   entry is linear in time with the size of the bucket.  Consequently, a
98   larger hash table size (that is, a larger number of buckets) is prone to
99   yielding shorter chains, *given* the `hasher' function behaves properly.
100
101   Long buckets slow down the lookup algorithm.  One might use big hash table
102   sizes in hope to reduce the average length of buckets, but this might
103   become inordinate, as unused slots in the hash table take some space.  The
104   best bet is to make sure you are using a good `hasher' function (beware
105   that those are not that easy to write! :-), and to use a table size
106   larger than the actual number of entries.  */
107
108/* If an insertion makes the ratio of nonempty buckets to table size larger
109   than the growth threshold (a number between 0.0 and 1.0), then increase
110   the table size by multiplying by the growth factor (a number greater than
111   1.0).  The growth threshold defaults to 0.8, and the growth factor
112   defaults to 1.414, meaning that the table will have doubled its size
113   every second time 80% of the buckets get used.  */
114#define DEFAULT_GROWTH_THRESHOLD 0.8
115#define DEFAULT_GROWTH_FACTOR 1.414
116
117/* If a deletion empties a bucket and causes the ratio of used buckets to
118   table size to become smaller than the shrink threshold (a number between
119   0.0 and 1.0), then shrink the table by multiplying by the shrink factor (a
120   number greater than the shrink threshold but smaller than 1.0).  The shrink
121   threshold and factor default to 0.0 and 1.0, meaning that the table never
122   shrinks.  */
123#define DEFAULT_SHRINK_THRESHOLD 0.0
124#define DEFAULT_SHRINK_FACTOR 1.0
125
126/* Use this to initialize or reset a TUNING structure to
127   some sensible values. */
128static const Hash_tuning default_tuning =
129  {
130    DEFAULT_SHRINK_THRESHOLD,
131    DEFAULT_SHRINK_FACTOR,
132    DEFAULT_GROWTH_THRESHOLD,
133    DEFAULT_GROWTH_FACTOR,
134    false
135  };
136
137/* Information and lookup.  */
138
139/* The following few functions provide information about the overall hash
140   table organization: the number of entries, number of buckets and maximum
141   length of buckets.  */
142
143/* Return the number of buckets in the hash table.  The table size, the total
144   number of buckets (used plus unused), or the maximum number of slots, are
145   the same quantity.  */
146
147size_t
148hash_get_n_buckets (const Hash_table *table)
149{
150  return table->n_buckets;
151}
152
153/* Return the number of slots in use (non-empty buckets).  */
154
155size_t
156hash_get_n_buckets_used (const Hash_table *table)
157{
158  return table->n_buckets_used;
159}
160
161/* Return the number of active entries.  */
162
163size_t
164hash_get_n_entries (const Hash_table *table)
165{
166  return table->n_entries;
167}
168
169/* Return the length of the longest chain (bucket).  */
170
171size_t
172hash_get_max_bucket_length (const Hash_table *table)
173{
174  struct hash_entry const *bucket;
175  size_t max_bucket_length = 0;
176
177  for (bucket = table->bucket; bucket < table->bucket_limit; bucket++)
178    {
179      if (bucket->data)
180	{
181	  struct hash_entry const *cursor = bucket;
182	  size_t bucket_length = 1;
183
184	  while (cursor = cursor->next, cursor)
185	    bucket_length++;
186
187	  if (bucket_length > max_bucket_length)
188	    max_bucket_length = bucket_length;
189	}
190    }
191
192  return max_bucket_length;
193}
194
195/* Do a mild validation of a hash table, by traversing it and checking two
196   statistics.  */
197
198bool
199hash_table_ok (const Hash_table *table)
200{
201  struct hash_entry const *bucket;
202  size_t n_buckets_used = 0;
203  size_t n_entries = 0;
204
205  for (bucket = table->bucket; bucket < table->bucket_limit; bucket++)
206    {
207      if (bucket->data)
208	{
209	  struct hash_entry const *cursor = bucket;
210
211	  /* Count bucket head.  */
212	  n_buckets_used++;
213	  n_entries++;
214
215	  /* Count bucket overflow.  */
216	  while (cursor = cursor->next, cursor)
217	    n_entries++;
218	}
219    }
220
221  if (n_buckets_used == table->n_buckets_used && n_entries == table->n_entries)
222    return true;
223
224  return false;
225}
226
227void
228hash_print_statistics (const Hash_table *table, FILE *stream)
229{
230  size_t n_entries = hash_get_n_entries (table);
231  size_t n_buckets = hash_get_n_buckets (table);
232  size_t n_buckets_used = hash_get_n_buckets_used (table);
233  size_t max_bucket_length = hash_get_max_bucket_length (table);
234
235  fprintf (stream, "# entries:         %lu\n", (unsigned long int) n_entries);
236  fprintf (stream, "# buckets:         %lu\n", (unsigned long int) n_buckets);
237  fprintf (stream, "# buckets used:    %lu (%.2f%%)\n",
238	   (unsigned long int) n_buckets_used,
239	   (100.0 * n_buckets_used) / n_buckets);
240  fprintf (stream, "max bucket length: %lu\n",
241	   (unsigned long int) max_bucket_length);
242}
243
244/* If ENTRY matches an entry already in the hash table, return the
245   entry from the table.  Otherwise, return NULL.  */
246
247void *
248hash_lookup (const Hash_table *table, const void *entry)
249{
250  struct hash_entry const *bucket
251    = table->bucket + table->hasher (entry, table->n_buckets);
252  struct hash_entry const *cursor;
253
254  if (! (bucket < table->bucket_limit))
255    abort ();
256
257  if (bucket->data == NULL)
258    return NULL;
259
260  for (cursor = bucket; cursor; cursor = cursor->next)
261    if (table->comparator (entry, cursor->data))
262      return cursor->data;
263
264  return NULL;
265}
266
267/* Walking.  */
268
269/* The functions in this page traverse the hash table and process the
270   contained entries.  For the traversal to work properly, the hash table
271   should not be resized nor modified while any particular entry is being
272   processed.  In particular, entries should not be added or removed.  */
273
274/* Return the first data in the table, or NULL if the table is empty.  */
275
276void *
277hash_get_first (const Hash_table *table)
278{
279  struct hash_entry const *bucket;
280
281  if (table->n_entries == 0)
282    return NULL;
283
284  for (bucket = table->bucket; ; bucket++)
285    if (! (bucket < table->bucket_limit))
286      abort ();
287    else if (bucket->data)
288      return bucket->data;
289}
290
291/* Return the user data for the entry following ENTRY, where ENTRY has been
292   returned by a previous call to either `hash_get_first' or `hash_get_next'.
293   Return NULL if there are no more entries.  */
294
295void *
296hash_get_next (const Hash_table *table, const void *entry)
297{
298  struct hash_entry const *bucket
299    = table->bucket + table->hasher (entry, table->n_buckets);
300  struct hash_entry const *cursor;
301
302  if (! (bucket < table->bucket_limit))
303    abort ();
304
305  /* Find next entry in the same bucket.  */
306  for (cursor = bucket; cursor; cursor = cursor->next)
307    if (cursor->data == entry && cursor->next)
308      return cursor->next->data;
309
310  /* Find first entry in any subsequent bucket.  */
311  while (++bucket < table->bucket_limit)
312    if (bucket->data)
313      return bucket->data;
314
315  /* None found.  */
316  return NULL;
317}
318
319/* Fill BUFFER with pointers to active user entries in the hash table, then
320   return the number of pointers copied.  Do not copy more than BUFFER_SIZE
321   pointers.  */
322
323size_t
324hash_get_entries (const Hash_table *table, void **buffer,
325		  size_t buffer_size)
326{
327  size_t counter = 0;
328  struct hash_entry const *bucket;
329  struct hash_entry const *cursor;
330
331  for (bucket = table->bucket; bucket < table->bucket_limit; bucket++)
332    {
333      if (bucket->data)
334	{
335	  for (cursor = bucket; cursor; cursor = cursor->next)
336	    {
337	      if (counter >= buffer_size)
338		return counter;
339	      buffer[counter++] = cursor->data;
340	    }
341	}
342    }
343
344  return counter;
345}
346
347/* Call a PROCESSOR function for each entry of a hash table, and return the
348   number of entries for which the processor function returned success.  A
349   pointer to some PROCESSOR_DATA which will be made available to each call to
350   the processor function.  The PROCESSOR accepts two arguments: the first is
351   the user entry being walked into, the second is the value of PROCESSOR_DATA
352   as received.  The walking continue for as long as the PROCESSOR function
353   returns nonzero.  When it returns zero, the walking is interrupted.  */
354
355size_t
356hash_do_for_each (const Hash_table *table, Hash_processor processor,
357		  void *processor_data)
358{
359  size_t counter = 0;
360  struct hash_entry const *bucket;
361  struct hash_entry const *cursor;
362
363  for (bucket = table->bucket; bucket < table->bucket_limit; bucket++)
364    {
365      if (bucket->data)
366	{
367	  for (cursor = bucket; cursor; cursor = cursor->next)
368	    {
369	      if (!(*processor) (cursor->data, processor_data))
370		return counter;
371	      counter++;
372	    }
373	}
374    }
375
376  return counter;
377}
378
379/* Allocation and clean-up.  */
380
381/* Return a hash index for a NUL-terminated STRING between 0 and N_BUCKETS-1.
382   This is a convenience routine for constructing other hashing functions.  */
383
384#if USE_DIFF_HASH
385
386/* About hashings, Paul Eggert writes to me (FP), on 1994-01-01: "Please see
387   B. J. McKenzie, R. Harries & T. Bell, Selecting a hashing algorithm,
388   Software--practice & experience 20, 2 (Feb 1990), 209-224.  Good hash
389   algorithms tend to be domain-specific, so what's good for [diffutils'] io.c
390   may not be good for your application."  */
391
392size_t
393hash_string (const char *string, size_t n_buckets)
394{
395# define ROTATE_LEFT(Value, Shift) \
396  ((Value) << (Shift) | (Value) >> ((sizeof (size_t) * CHAR_BIT) - (Shift)))
397# define HASH_ONE_CHAR(Value, Byte) \
398  ((Byte) + ROTATE_LEFT (Value, 7))
399
400  size_t value = 0;
401  unsigned char ch;
402
403  for (; (ch = *string); string++)
404    value = HASH_ONE_CHAR (value, ch);
405  return value % n_buckets;
406
407# undef ROTATE_LEFT
408# undef HASH_ONE_CHAR
409}
410
411#else /* not USE_DIFF_HASH */
412
413/* This one comes from `recode', and performs a bit better than the above as
414   per a few experiments.  It is inspired from a hashing routine found in the
415   very old Cyber `snoop', itself written in typical Greg Mansfield style.
416   (By the way, what happened to this excellent man?  Is he still alive?)  */
417
418size_t
419hash_string (const char *string, size_t n_buckets)
420{
421  size_t value = 0;
422  unsigned char ch;
423
424  for (; (ch = *string); string++)
425    value = (value * 31 + ch) % n_buckets;
426  return value;
427}
428
429#endif /* not USE_DIFF_HASH */
430
431/* Return true if CANDIDATE is a prime number.  CANDIDATE should be an odd
432   number at least equal to 11.  */
433
434static bool
435is_prime (size_t candidate)
436{
437  size_t divisor = 3;
438  size_t square = divisor * divisor;
439
440  while (square < candidate && (candidate % divisor))
441    {
442      divisor++;
443      square += 4 * divisor;
444      divisor++;
445    }
446
447  return (candidate % divisor ? true : false);
448}
449
450/* Round a given CANDIDATE number up to the nearest prime, and return that
451   prime.  Primes lower than 10 are merely skipped.  */
452
453static size_t
454next_prime (size_t candidate)
455{
456  /* Skip small primes.  */
457  if (candidate < 10)
458    candidate = 10;
459
460  /* Make it definitely odd.  */
461  candidate |= 1;
462
463  while (!is_prime (candidate))
464    candidate += 2;
465
466  return candidate;
467}
468
469void
470hash_reset_tuning (Hash_tuning *tuning)
471{
472  *tuning = default_tuning;
473}
474
475/* For the given hash TABLE, check the user supplied tuning structure for
476   reasonable values, and return true if there is no gross error with it.
477   Otherwise, definitively reset the TUNING field to some acceptable default
478   in the hash table (that is, the user loses the right of further modifying
479   tuning arguments), and return false.  */
480
481static bool
482check_tuning (Hash_table *table)
483{
484  const Hash_tuning *tuning = table->tuning;
485
486  /* Be a bit stricter than mathematics would require, so that
487     rounding errors in size calculations do not cause allocations to
488     fail to grow or shrink as they should.  The smallest allocation
489     is 11 (due to next_prime's algorithm), so an epsilon of 0.1
490     should be good enough.  */
491  float epsilon = 0.1f;
492
493  if (epsilon < tuning->growth_threshold
494      && tuning->growth_threshold < 1 - epsilon
495      && 1 + epsilon < tuning->growth_factor
496      && 0 <= tuning->shrink_threshold
497      && tuning->shrink_threshold + epsilon < tuning->shrink_factor
498      && tuning->shrink_factor <= 1
499      && tuning->shrink_threshold + epsilon < tuning->growth_threshold)
500    return true;
501
502  table->tuning = &default_tuning;
503  return false;
504}
505
506/* Allocate and return a new hash table, or NULL upon failure.  The initial
507   number of buckets is automatically selected so as to _guarantee_ that you
508   may insert at least CANDIDATE different user entries before any growth of
509   the hash table size occurs.  So, if have a reasonably tight a-priori upper
510   bound on the number of entries you intend to insert in the hash table, you
511   may save some table memory and insertion time, by specifying it here.  If
512   the IS_N_BUCKETS field of the TUNING structure is true, the CANDIDATE
513   argument has its meaning changed to the wanted number of buckets.
514
515   TUNING points to a structure of user-supplied values, in case some fine
516   tuning is wanted over the default behavior of the hasher.  If TUNING is
517   NULL, the default tuning parameters are used instead.
518
519   The user-supplied HASHER function should be provided.  It accepts two
520   arguments ENTRY and TABLE_SIZE.  It computes, by hashing ENTRY contents, a
521   slot number for that entry which should be in the range 0..TABLE_SIZE-1.
522   This slot number is then returned.
523
524   The user-supplied COMPARATOR function should be provided.  It accepts two
525   arguments pointing to user data, it then returns true for a pair of entries
526   that compare equal, or false otherwise.  This function is internally called
527   on entries which are already known to hash to the same bucket index.
528
529   The user-supplied DATA_FREER function, when not NULL, may be later called
530   with the user data as an argument, just before the entry containing the
531   data gets freed.  This happens from within `hash_free' or `hash_clear'.
532   You should specify this function only if you want these functions to free
533   all of your `data' data.  This is typically the case when your data is
534   simply an auxiliary struct that you have malloc'd to aggregate several
535   values.  */
536
537Hash_table *
538hash_initialize (size_t candidate, const Hash_tuning *tuning,
539		 Hash_hasher hasher, Hash_comparator comparator,
540		 Hash_data_freer data_freer)
541{
542  Hash_table *table;
543
544  if (hasher == NULL || comparator == NULL)
545    return NULL;
546
547  table = malloc (sizeof *table);
548  if (table == NULL)
549    return NULL;
550
551  if (!tuning)
552    tuning = &default_tuning;
553  table->tuning = tuning;
554  if (!check_tuning (table))
555    {
556      /* Fail if the tuning options are invalid.  This is the only occasion
557	 when the user gets some feedback about it.  Once the table is created,
558	 if the user provides invalid tuning options, we silently revert to
559	 using the defaults, and ignore further request to change the tuning
560	 options.  */
561      goto fail;
562    }
563
564  if (!tuning->is_n_buckets)
565    {
566      float new_candidate = candidate / tuning->growth_threshold;
567      if (SIZE_MAX <= new_candidate)
568	goto fail;
569      candidate = new_candidate;
570    }
571
572  if (xalloc_oversized (candidate, sizeof *table->bucket))
573    goto fail;
574  table->n_buckets = next_prime (candidate);
575  if (xalloc_oversized (table->n_buckets, sizeof *table->bucket))
576    goto fail;
577
578  table->bucket = calloc (table->n_buckets, sizeof *table->bucket);
579  table->bucket_limit = table->bucket + table->n_buckets;
580  table->n_buckets_used = 0;
581  table->n_entries = 0;
582
583  table->hasher = hasher;
584  table->comparator = comparator;
585  table->data_freer = data_freer;
586
587  table->free_entry_list = NULL;
588#if USE_OBSTACK
589  obstack_init (&table->entry_stack);
590#endif
591  return table;
592
593 fail:
594  free (table);
595  return NULL;
596}
597
598/* Make all buckets empty, placing any chained entries on the free list.
599   Apply the user-specified function data_freer (if any) to the datas of any
600   affected entries.  */
601
602void
603hash_clear (Hash_table *table)
604{
605  struct hash_entry *bucket;
606
607  for (bucket = table->bucket; bucket < table->bucket_limit; bucket++)
608    {
609      if (bucket->data)
610	{
611	  struct hash_entry *cursor;
612	  struct hash_entry *next;
613
614	  /* Free the bucket overflow.  */
615	  for (cursor = bucket->next; cursor; cursor = next)
616	    {
617	      if (table->data_freer)
618		(*table->data_freer) (cursor->data);
619	      cursor->data = NULL;
620
621	      next = cursor->next;
622	      /* Relinking is done one entry at a time, as it is to be expected
623		 that overflows are either rare or short.  */
624	      cursor->next = table->free_entry_list;
625	      table->free_entry_list = cursor;
626	    }
627
628	  /* Free the bucket head.  */
629	  if (table->data_freer)
630	    (*table->data_freer) (bucket->data);
631	  bucket->data = NULL;
632	  bucket->next = NULL;
633	}
634    }
635
636  table->n_buckets_used = 0;
637  table->n_entries = 0;
638}
639
640/* Reclaim all storage associated with a hash table.  If a data_freer
641   function has been supplied by the user when the hash table was created,
642   this function applies it to the data of each entry before freeing that
643   entry.  */
644
645void
646hash_free (Hash_table *table)
647{
648  struct hash_entry *bucket;
649  struct hash_entry *cursor;
650  struct hash_entry *next;
651
652  /* Call the user data_freer function.  */
653  if (table->data_freer && table->n_entries)
654    {
655      for (bucket = table->bucket; bucket < table->bucket_limit; bucket++)
656	{
657	  if (bucket->data)
658	    {
659	      for (cursor = bucket; cursor; cursor = cursor->next)
660		{
661		  (*table->data_freer) (cursor->data);
662		}
663	    }
664	}
665    }
666
667#if USE_OBSTACK
668
669  obstack_free (&table->entry_stack, NULL);
670
671#else
672
673  /* Free all bucket overflowed entries.  */
674  for (bucket = table->bucket; bucket < table->bucket_limit; bucket++)
675    {
676      for (cursor = bucket->next; cursor; cursor = next)
677	{
678	  next = cursor->next;
679	  free (cursor);
680	}
681    }
682
683  /* Also reclaim the internal list of previously freed entries.  */
684  for (cursor = table->free_entry_list; cursor; cursor = next)
685    {
686      next = cursor->next;
687      free (cursor);
688    }
689
690#endif
691
692  /* Free the remainder of the hash table structure.  */
693  free (table->bucket);
694  free (table);
695}
696
697/* Insertion and deletion.  */
698
699/* Get a new hash entry for a bucket overflow, possibly by reclying a
700   previously freed one.  If this is not possible, allocate a new one.  */
701
702static struct hash_entry *
703allocate_entry (Hash_table *table)
704{
705  struct hash_entry *new;
706
707  if (table->free_entry_list)
708    {
709      new = table->free_entry_list;
710      table->free_entry_list = new->next;
711    }
712  else
713    {
714#if USE_OBSTACK
715      new = obstack_alloc (&table->entry_stack, sizeof *new);
716#else
717      new = malloc (sizeof *new);
718#endif
719    }
720
721  return new;
722}
723
724/* Free a hash entry which was part of some bucket overflow,
725   saving it for later recycling.  */
726
727static void
728free_entry (Hash_table *table, struct hash_entry *entry)
729{
730  entry->data = NULL;
731  entry->next = table->free_entry_list;
732  table->free_entry_list = entry;
733}
734
735/* This private function is used to help with insertion and deletion.  When
736   ENTRY matches an entry in the table, return a pointer to the corresponding
737   user data and set *BUCKET_HEAD to the head of the selected bucket.
738   Otherwise, return NULL.  When DELETE is true and ENTRY matches an entry in
739   the table, unlink the matching entry.  */
740
741static void *
742hash_find_entry (Hash_table *table, const void *entry,
743		 struct hash_entry **bucket_head, bool delete)
744{
745  struct hash_entry *bucket
746    = table->bucket + table->hasher (entry, table->n_buckets);
747  struct hash_entry *cursor;
748
749  if (! (bucket < table->bucket_limit))
750    abort ();
751
752  *bucket_head = bucket;
753
754  /* Test for empty bucket.  */
755  if (bucket->data == NULL)
756    return NULL;
757
758  /* See if the entry is the first in the bucket.  */
759  if ((*table->comparator) (entry, bucket->data))
760    {
761      void *data = bucket->data;
762
763      if (delete)
764	{
765	  if (bucket->next)
766	    {
767	      struct hash_entry *next = bucket->next;
768
769	      /* Bump the first overflow entry into the bucket head, then save
770		 the previous first overflow entry for later recycling.  */
771	      *bucket = *next;
772	      free_entry (table, next);
773	    }
774	  else
775	    {
776	      bucket->data = NULL;
777	    }
778	}
779
780      return data;
781    }
782
783  /* Scan the bucket overflow.  */
784  for (cursor = bucket; cursor->next; cursor = cursor->next)
785    {
786      if ((*table->comparator) (entry, cursor->next->data))
787	{
788	  void *data = cursor->next->data;
789
790	  if (delete)
791	    {
792	      struct hash_entry *next = cursor->next;
793
794	      /* Unlink the entry to delete, then save the freed entry for later
795		 recycling.  */
796	      cursor->next = next->next;
797	      free_entry (table, next);
798	    }
799
800	  return data;
801	}
802    }
803
804  /* No entry found.  */
805  return NULL;
806}
807
808/* For an already existing hash table, change the number of buckets through
809   specifying CANDIDATE.  The contents of the hash table are preserved.  The
810   new number of buckets is automatically selected so as to _guarantee_ that
811   the table may receive at least CANDIDATE different user entries, including
812   those already in the table, before any other growth of the hash table size
813   occurs.  If TUNING->IS_N_BUCKETS is true, then CANDIDATE specifies the
814   exact number of buckets desired.  */
815
816bool
817hash_rehash (Hash_table *table, size_t candidate)
818{
819  Hash_table *new_table;
820  struct hash_entry *bucket;
821  struct hash_entry *cursor;
822  struct hash_entry *next;
823
824  new_table = hash_initialize (candidate, table->tuning, table->hasher,
825			       table->comparator, table->data_freer);
826  if (new_table == NULL)
827    return false;
828
829  /* Merely reuse the extra old space into the new table.  */
830#if USE_OBSTACK
831  obstack_free (&new_table->entry_stack, NULL);
832  new_table->entry_stack = table->entry_stack;
833#endif
834  new_table->free_entry_list = table->free_entry_list;
835
836  for (bucket = table->bucket; bucket < table->bucket_limit; bucket++)
837    if (bucket->data)
838      for (cursor = bucket; cursor; cursor = next)
839	{
840	  void *data = cursor->data;
841	  struct hash_entry *new_bucket
842	    = (new_table->bucket
843	       + new_table->hasher (data, new_table->n_buckets));
844
845	  if (! (new_bucket < new_table->bucket_limit))
846	    abort ();
847
848	  next = cursor->next;
849
850	  if (new_bucket->data)
851	    {
852	      if (cursor == bucket)
853		{
854		  /* Allocate or recycle an entry, when moving from a bucket
855		     header into a bucket overflow.  */
856		  struct hash_entry *new_entry = allocate_entry (new_table);
857
858		  if (new_entry == NULL)
859		    return false;
860
861		  new_entry->data = data;
862		  new_entry->next = new_bucket->next;
863		  new_bucket->next = new_entry;
864		}
865	      else
866		{
867		  /* Merely relink an existing entry, when moving from a
868		     bucket overflow into a bucket overflow.  */
869		  cursor->next = new_bucket->next;
870		  new_bucket->next = cursor;
871		}
872	    }
873	  else
874	    {
875	      /* Free an existing entry, when moving from a bucket
876		 overflow into a bucket header.  Also take care of the
877		 simple case of moving from a bucket header into a bucket
878		 header.  */
879	      new_bucket->data = data;
880	      new_table->n_buckets_used++;
881	      if (cursor != bucket)
882		free_entry (new_table, cursor);
883	    }
884	}
885
886  free (table->bucket);
887  table->bucket = new_table->bucket;
888  table->bucket_limit = new_table->bucket_limit;
889  table->n_buckets = new_table->n_buckets;
890  table->n_buckets_used = new_table->n_buckets_used;
891  table->free_entry_list = new_table->free_entry_list;
892  /* table->n_entries already holds its value.  */
893#if USE_OBSTACK
894  table->entry_stack = new_table->entry_stack;
895#endif
896  free (new_table);
897
898  return true;
899}
900
901/* If ENTRY matches an entry already in the hash table, return the pointer
902   to the entry from the table.  Otherwise, insert ENTRY and return ENTRY.
903   Return NULL if the storage required for insertion cannot be allocated.  */
904
905void *
906hash_insert (Hash_table *table, const void *entry)
907{
908  void *data;
909  struct hash_entry *bucket;
910
911  /* The caller cannot insert a NULL entry.  */
912  if (! entry)
913    abort ();
914
915  /* If there's a matching entry already in the table, return that.  */
916  if ((data = hash_find_entry (table, entry, &bucket, false)) != NULL)
917    return data;
918
919  /* ENTRY is not matched, it should be inserted.  */
920
921  if (bucket->data)
922    {
923      struct hash_entry *new_entry = allocate_entry (table);
924
925      if (new_entry == NULL)
926	return NULL;
927
928      /* Add ENTRY in the overflow of the bucket.  */
929
930      new_entry->data = (void *) entry;
931      new_entry->next = bucket->next;
932      bucket->next = new_entry;
933      table->n_entries++;
934      return (void *) entry;
935    }
936
937  /* Add ENTRY right in the bucket head.  */
938
939  bucket->data = (void *) entry;
940  table->n_entries++;
941  table->n_buckets_used++;
942
943  /* If the growth threshold of the buckets in use has been reached, increase
944     the table size and rehash.  There's no point in checking the number of
945     entries:  if the hashing function is ill-conditioned, rehashing is not
946     likely to improve it.  */
947
948  if (table->n_buckets_used
949      > table->tuning->growth_threshold * table->n_buckets)
950    {
951      /* Check more fully, before starting real work.  If tuning arguments
952	 became invalid, the second check will rely on proper defaults.  */
953      check_tuning (table);
954      if (table->n_buckets_used
955	  > table->tuning->growth_threshold * table->n_buckets)
956	{
957	  const Hash_tuning *tuning = table->tuning;
958	  float candidate =
959	    (tuning->is_n_buckets
960	     ? (table->n_buckets * tuning->growth_factor)
961	     : (table->n_buckets * tuning->growth_factor
962		* tuning->growth_threshold));
963
964	  if (SIZE_MAX <= candidate)
965	    return NULL;
966
967	  /* If the rehash fails, arrange to return NULL.  */
968	  if (!hash_rehash (table, candidate))
969	    entry = NULL;
970	}
971    }
972
973  return (void *) entry;
974}
975
976/* If ENTRY is already in the table, remove it and return the just-deleted
977   data (the user may want to deallocate its storage).  If ENTRY is not in the
978   table, don't modify the table and return NULL.  */
979
980void *
981hash_delete (Hash_table *table, const void *entry)
982{
983  void *data;
984  struct hash_entry *bucket;
985
986  data = hash_find_entry (table, entry, &bucket, true);
987  if (!data)
988    return NULL;
989
990  table->n_entries--;
991  if (!bucket->data)
992    {
993      table->n_buckets_used--;
994
995      /* If the shrink threshold of the buckets in use has been reached,
996	 rehash into a smaller table.  */
997
998      if (table->n_buckets_used
999	  < table->tuning->shrink_threshold * table->n_buckets)
1000	{
1001	  /* Check more fully, before starting real work.  If tuning arguments
1002	     became invalid, the second check will rely on proper defaults.  */
1003	  check_tuning (table);
1004	  if (table->n_buckets_used
1005	      < table->tuning->shrink_threshold * table->n_buckets)
1006	    {
1007	      const Hash_tuning *tuning = table->tuning;
1008	      size_t candidate =
1009		(tuning->is_n_buckets
1010		 ? table->n_buckets * tuning->shrink_factor
1011		 : (table->n_buckets * tuning->shrink_factor
1012		    * tuning->growth_threshold));
1013
1014	      hash_rehash (table, candidate);
1015	    }
1016	}
1017    }
1018
1019  return data;
1020}
1021
1022/* Testing.  */
1023
1024#if TESTING
1025
1026void
1027hash_print (const Hash_table *table)
1028{
1029  struct hash_entry const *bucket;
1030
1031  for (bucket = table->bucket; bucket < table->bucket_limit; bucket++)
1032    {
1033      struct hash_entry *cursor;
1034
1035      if (bucket)
1036	printf ("%lu:\n", (unsigned long int) (bucket - table->bucket));
1037
1038      for (cursor = bucket; cursor; cursor = cursor->next)
1039	{
1040	  char const *s = cursor->data;
1041	  /* FIXME */
1042	  if (s)
1043	    printf ("  %s\n", s);
1044	}
1045    }
1046}
1047
1048#endif /* TESTING */
1049