1/* hash.c -- hash table maintenance
2   Copyright (C) 1995, 1999, 2002 Free Software Foundation, Inc.
3   Written by Greg McGary <gkm@gnu.org> <greg@mcgary.org>
4
5   This program is free software; you can redistribute it and/or modify
6   it under the terms of the GNU General Public License as published by
7   the Free Software Foundation; either version 2, or (at your option)
8   any later version.
9
10   This program is distributed in the hope that it will be useful,
11   but WITHOUT ANY WARRANTY; without even the implied warranty of
12   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13   GNU General Public License for more details.
14
15   You should have received a copy of the GNU General Public License
16   along with this program; if not, write to the Free Software
17   Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
18*/
19
20#include "make.h"
21#include "hash.h"
22
23#define	CALLOC(t, n) ((t *) calloc (sizeof (t), (n)))
24#define MALLOC(t, n) ((t *) xmalloc (sizeof (t) * (n)))
25#define REALLOC(o, t, n) ((t *) xrealloc ((o), sizeof (t) * (n)))
26#define CLONE(o, t, n) ((t *) memcpy (MALLOC (t, (n)), (o), sizeof (t) * (n)))
27
28static void hash_rehash __P((struct hash_table* ht));
29static unsigned long round_up_2 __P((unsigned long rough));
30
31/* Implement double hashing with open addressing.  The table size is
32   always a power of two.  The secondary (`increment') hash function
33   is forced to return an odd-value, in order to be relatively prime
34   to the table size.  This guarantees that the increment can
35   potentially hit every slot in the table during collision
36   resolution.  */
37
38void *hash_deleted_item = &hash_deleted_item;
39
40/* Force the table size to be a power of two, possibly rounding up the
41   given size.  */
42
43void
44hash_init (ht, size, hash_1, hash_2, hash_cmp)
45     struct hash_table* ht;
46     unsigned long size;
47     hash_func_t hash_1;
48     hash_func_t hash_2;
49     hash_cmp_func_t hash_cmp;
50{
51  ht->ht_size = round_up_2 (size);
52  ht->ht_empty_slots = ht->ht_size;
53  ht->ht_vec = (void**) CALLOC (struct token *, ht->ht_size);
54  if (ht->ht_vec == 0)
55    {
56      fprintf (stderr, _("can't allocate %ld bytes for hash table: memory exhausted"),
57	       ht->ht_size * sizeof(struct token *));
58      exit (1);
59    }
60
61  ht->ht_capacity = ht->ht_size - (ht->ht_size / 16); /* 93.75% loading factor */
62  ht->ht_fill = 0;
63  ht->ht_collisions = 0;
64  ht->ht_lookups = 0;
65  ht->ht_rehashes = 0;
66  ht->ht_hash_1 = hash_1;
67  ht->ht_hash_2 = hash_2;
68  ht->ht_compare = hash_cmp;
69}
70
71/* Load an array of items into `ht'.  */
72
73void
74hash_load (ht, item_table, cardinality, size)
75     struct hash_table* ht;
76     void *item_table;
77     unsigned long cardinality;
78     unsigned long size;
79{
80  char *items = (char *) item_table;
81  while (cardinality--)
82    {
83      hash_insert (ht, items);
84      items += size;
85    }
86}
87
88/* Returns the address of the table slot matching `key'.  If `key' is
89   not found, return the address of an empty slot suitable for
90   inserting `key'.  The caller is responsible for incrementing
91   ht_fill on insertion.  */
92
93void **
94hash_find_slot (ht, key)
95     struct hash_table* ht;
96     void const *key;
97{
98  void **slot;
99  void **deleted_slot = 0;
100  unsigned int hash_2 = 0;
101  unsigned int hash_1 = (*ht->ht_hash_1) (key);
102
103  ht->ht_lookups++;
104  for (;;)
105    {
106      hash_1 &= (ht->ht_size - 1);
107      slot = &ht->ht_vec[hash_1];
108
109      if (*slot == 0)
110	return (deleted_slot ? deleted_slot : slot);
111      if (*slot == hash_deleted_item)
112	{
113	  if (deleted_slot == 0)
114	    deleted_slot = slot;
115	}
116      else
117	{
118	  if (key == *slot)
119	    return slot;
120	  if ((*ht->ht_compare) (key, *slot) == 0)
121	    return slot;
122	  ht->ht_collisions++;
123	}
124      if (!hash_2)
125	  hash_2 = (*ht->ht_hash_2) (key) | 1;
126      hash_1 += hash_2;
127    }
128}
129
130void *
131hash_find_item (ht, key)
132     struct hash_table* ht;
133     void const *key;
134{
135  void **slot = hash_find_slot (ht, key);
136  return ((HASH_VACANT (*slot)) ? 0 : *slot);
137}
138
139void *
140hash_insert (ht, item)
141     struct hash_table* ht;
142     void *item;
143{
144  void **slot = hash_find_slot (ht, item);
145  void *old_item = slot ? *slot : 0;
146  hash_insert_at (ht, item, slot);
147  return ((HASH_VACANT (old_item)) ? 0 : old_item);
148}
149
150void *
151hash_insert_at (ht, item, slot)
152     struct hash_table* ht;
153     void *item;
154     void const *slot;
155{
156  void *old_item = *(void **) slot;
157  if (HASH_VACANT (old_item))
158    {
159      ht->ht_fill++;
160      if (old_item == 0)
161	ht->ht_empty_slots--;
162      old_item = item;
163    }
164  *(void const **) slot = item;
165  if (ht->ht_empty_slots < ht->ht_size - ht->ht_capacity)
166    {
167      hash_rehash (ht);
168      return (void *) hash_find_slot (ht, item);
169    }
170  else
171    return (void *) slot;
172}
173
174void *
175hash_delete (ht, item)
176     struct hash_table* ht;
177     void const *item;
178{
179  void **slot = hash_find_slot (ht, item);
180  return hash_delete_at (ht, slot);
181}
182
183void *
184hash_delete_at (ht, slot)
185     struct hash_table* ht;
186     void const *slot;
187{
188  void *item = *(void **) slot;
189  if (!HASH_VACANT (item))
190    {
191      *(void const **) slot = hash_deleted_item;
192      ht->ht_fill--;
193      return item;
194    }
195  else
196    return 0;
197}
198
199void
200hash_free_items (ht)
201     struct hash_table* ht;
202{
203  void **vec = ht->ht_vec;
204  void **end = &vec[ht->ht_size];
205  for (; vec < end; vec++)
206    {
207      void *item = *vec;
208      if (!HASH_VACANT (item))
209	free (item);
210      *vec = 0;
211    }
212  ht->ht_fill = 0;
213  ht->ht_empty_slots = ht->ht_size;
214}
215
216void
217hash_delete_items (ht)
218     struct hash_table* ht;
219{
220  void **vec = ht->ht_vec;
221  void **end = &vec[ht->ht_size];
222  for (; vec < end; vec++)
223    *vec = 0;
224  ht->ht_fill = 0;
225  ht->ht_collisions = 0;
226  ht->ht_lookups = 0;
227  ht->ht_rehashes = 0;
228  ht->ht_empty_slots = ht->ht_size;
229}
230
231void
232hash_free (ht, free_items)
233     struct hash_table* ht;
234     int free_items;
235{
236  if (free_items)
237    hash_free_items (ht);
238  else
239    {
240      ht->ht_fill = 0;
241      ht->ht_empty_slots = ht->ht_size;
242    }
243  free (ht->ht_vec);
244  ht->ht_vec = 0;
245  ht->ht_capacity = 0;
246}
247
248void
249hash_map (ht, map)
250     struct hash_table *ht;
251     hash_map_func_t map;
252{
253  void **slot;
254  void **end = &ht->ht_vec[ht->ht_size];
255
256  for (slot = ht->ht_vec; slot < end; slot++)
257    {
258      if (!HASH_VACANT (*slot))
259	(*map) (*slot);
260    }
261}
262
263void
264hash_map_arg (ht, map, arg)
265     struct hash_table *ht;
266     hash_map_arg_func_t map;
267     void *arg;
268{
269  void **slot;
270  void **end = &ht->ht_vec[ht->ht_size];
271
272  for (slot = ht->ht_vec; slot < end; slot++)
273    {
274      if (!HASH_VACANT (*slot))
275	(*map) (*slot, arg);
276    }
277}
278
279/* Double the size of the hash table in the event of overflow... */
280
281static void
282hash_rehash (ht)
283     struct hash_table* ht;
284{
285  unsigned long old_ht_size = ht->ht_size;
286  void **old_vec = ht->ht_vec;
287  void **ovp;
288
289  if (ht->ht_fill >= ht->ht_capacity)
290    {
291      ht->ht_size *= 2;
292      ht->ht_capacity = ht->ht_size - (ht->ht_size >> 4);
293    }
294  ht->ht_rehashes++;
295  ht->ht_vec = (void **) CALLOC (struct token *, ht->ht_size);
296
297  for (ovp = old_vec; ovp < &old_vec[old_ht_size]; ovp++)
298    {
299      if (! HASH_VACANT (*ovp))
300	{
301	  void **slot = hash_find_slot (ht, *ovp);
302	  *slot = *ovp;
303	}
304    }
305  ht->ht_empty_slots = ht->ht_size - ht->ht_fill;
306  free (old_vec);
307}
308
309void
310hash_print_stats (ht, out_FILE)
311     struct hash_table *ht;
312     FILE *out_FILE;
313{
314  /* GKM FIXME: honor NO_FLOAT */
315  fprintf (out_FILE, _("Load=%ld/%ld=%.0f%%, "), ht->ht_fill, ht->ht_size,
316	   100.0 * (double) ht->ht_fill / (double) ht->ht_size);
317  fprintf (out_FILE, _("Rehash=%d, "), ht->ht_rehashes);
318  fprintf (out_FILE, _("Collisions=%ld/%ld=%.0f%%"), ht->ht_collisions, ht->ht_lookups,
319	   (ht->ht_lookups
320	    ? (100.0 * (double) ht->ht_collisions / (double) ht->ht_lookups)
321	    : 0));
322}
323
324/* Dump all items into a NULL-terminated vector.  Use the
325   user-supplied vector, or malloc one.  */
326
327void **
328hash_dump (ht, vector_0, compare)
329     struct hash_table *ht;
330     void **vector_0;
331     qsort_cmp_t compare;
332{
333  void **vector;
334  void **slot;
335  void **end = &ht->ht_vec[ht->ht_size];
336
337  if (vector_0 == 0)
338    vector_0 = MALLOC (void *, ht->ht_fill + 1);
339  vector = vector_0;
340
341  for (slot = ht->ht_vec; slot < end; slot++)
342    if (!HASH_VACANT (*slot))
343      *vector++ = *slot;
344  *vector = 0;
345
346  if (compare)
347    qsort (vector_0, ht->ht_fill, sizeof (void *), compare);
348  return vector_0;
349}
350
351/* Round a given number up to the nearest power of 2. */
352
353static unsigned long
354round_up_2 (n)
355     unsigned long n;
356{
357  n |= (n >> 1);
358  n |= (n >> 2);
359  n |= (n >> 4);
360  n |= (n >> 8);
361  n |= (n >> 16);
362
363#if !defined(HAVE_LIMITS_H) || ULONG_MAX > 4294967295
364  /* We only need this on systems where unsigned long is >32 bits.  */
365  n |= (n >> 32);
366#endif
367
368  return n + 1;
369}
370