1/* hash - implement simple hashing table with string based keys.
2   Copyright (C) 1994-1995, 2000-2006 Free Software Foundation, Inc.
3   Written by Ulrich Drepper <drepper@gnu.ai.mit.edu>, October 1994.
4
5   This program is free software; you can redistribute it and/or modify
6   it under the terms of the GNU General Public License as published by
7   the Free Software Foundation; either version 2, or (at your option)
8   any later version.
9
10   This program is distributed in the hope that it will be useful,
11   but WITHOUT ANY WARRANTY; without even the implied warranty of
12   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13   GNU General Public License for more details.
14
15   You should have received a copy of the GNU General Public License
16   along with this program; if not, write to the Free Software Foundation,
17   Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.  */
18
19#include <config.h>
20
21/* Specification.  */
22#include "hash.h"
23
24#include <stdlib.h>
25#include <string.h>
26#include <stdio.h>
27#include <limits.h>
28#include <sys/types.h>
29
30/* Since this simple implementation of hash tables allows only insertion, no
31   removal of entries, the right data structure for the memory holding all keys
32   is an obstack.  */
33#include "obstack.h"
34
35/* Use checked memory allocation.  */
36#include "xalloc.h"
37
38#define obstack_chunk_alloc xmalloc
39#define obstack_chunk_free free
40
41
42typedef struct hash_entry
43{
44  unsigned long used;  /* Hash code of the key, or 0 for an unused entry.  */
45  const void *key;     /* Key.  */
46  size_t keylen;
47  void *data;          /* Value.  */
48  struct hash_entry *next;
49}
50hash_entry;
51
52
53/* Given an odd CANDIDATE > 1, return true if it is a prime number.  */
54static int
55is_prime (unsigned long int candidate)
56{
57  /* No even number and none less than 10 will be passed here.  */
58  unsigned long int divn = 3;
59  unsigned long int sq = divn * divn;
60
61  while (sq < candidate && candidate % divn != 0)
62    {
63      ++divn;
64      sq += 4 * divn;
65      ++divn;
66    }
67
68  return candidate % divn != 0;
69}
70
71
72/* Given SEED > 1, return the smallest odd prime number >= SEED.  */
73unsigned long
74next_prime (unsigned long int seed)
75{
76  /* Make it definitely odd.  */
77  seed |= 1;
78
79  while (!is_prime (seed))
80    seed += 2;
81
82  return seed;
83}
84
85
86/* Initialize a hash table.  INIT_SIZE > 1 is the initial number of available
87   entries.
88   Return 0 upon successful completion, -1 upon memory allocation error.  */
89int
90hash_init (hash_table *htab, unsigned long int init_size)
91{
92  /* We need the size to be a prime.  */
93  init_size = next_prime (init_size);
94
95  /* Initialize the data structure.  */
96  htab->size = init_size;
97  htab->filled = 0;
98  htab->first = NULL;
99  htab->table = (hash_entry *) xcalloc (init_size + 1, sizeof (hash_entry));
100
101  obstack_init (&htab->mem_pool);
102
103  return 0;
104}
105
106
107/* Delete a hash table's contents.
108   Return 0 always.  */
109int
110hash_destroy (hash_table *htab)
111{
112  free (htab->table);
113  obstack_free (&htab->mem_pool, NULL);
114  return 0;
115}
116
117
118/* Compute a hash code for a key consisting of KEYLEN bytes starting at KEY
119   in memory.  */
120static unsigned long
121compute_hashval (const void *key, size_t keylen)
122{
123  size_t cnt;
124  unsigned long int hval;
125
126  /* Compute the hash value for the given string.  The algorithm
127     is taken from [Aho,Sethi,Ullman], fixed according to
128     http://www.haible.de/bruno/hashfunc.html.  */
129  cnt = 0;
130  hval = keylen;
131  while (cnt < keylen)
132    {
133      hval = (hval << 9) | (hval >> (sizeof (unsigned long) * CHAR_BIT - 9));
134      hval += (unsigned long int) *(((const char *) key) + cnt++);
135    }
136  return hval != 0 ? hval : ~((unsigned long) 0);
137}
138
139
140/* References:
141   [Aho,Sethi,Ullman] Compilers: Principles, Techniques and Tools, 1986
142   [Knuth]	      The Art of Computer Programming, part3 (6.4) */
143
144/* Look up a given key in the hash table.
145   Return the index of the entry, if present, or otherwise the index a free
146   entry where it could be inserted.  */
147static size_t
148lookup (hash_table *htab,
149	const void *key, size_t keylen,
150	unsigned long int hval)
151{
152  unsigned long int hash;
153  size_t idx;
154  hash_entry *table = htab->table;
155
156  /* First hash function: simply take the modul but prevent zero.  */
157  hash = 1 + hval % htab->size;
158
159  idx = hash;
160
161  if (table[idx].used)
162    {
163      if (table[idx].used == hval && table[idx].keylen == keylen
164	  && memcmp (table[idx].key, key, keylen) == 0)
165	return idx;
166
167      /* Second hash function as suggested in [Knuth].  */
168      hash = 1 + hval % (htab->size - 2);
169
170      do
171	{
172	  if (idx <= hash)
173	    idx = htab->size + idx - hash;
174	  else
175	    idx -= hash;
176
177	  /* If entry is found use it.  */
178	  if (table[idx].used == hval && table[idx].keylen == keylen
179	      && memcmp (table[idx].key, key, keylen) == 0)
180	    return idx;
181	}
182      while (table[idx].used);
183    }
184  return idx;
185}
186
187
188/* Look up the value of a key in the given table.
189   If found, return 0 and set *RESULT to it.  Otherwise return -1.  */
190int
191hash_find_entry (hash_table *htab, const void *key, size_t keylen,
192		 void **result)
193{
194  hash_entry *table = htab->table;
195  size_t idx = lookup (htab, key, keylen, compute_hashval (key, keylen));
196
197  if (table[idx].used == 0)
198    return -1;
199
200  *result = table[idx].data;
201  return 0;
202}
203
204
205/* Insert the pair (KEY[0..KEYLEN-1], DATA) in the hash table at index IDX.
206   HVAL is the key's hash code.  IDX depends on it.  The table entry at index
207   IDX is known to be unused.  */
208static void
209insert_entry_2 (hash_table *htab,
210		const void *key, size_t keylen,
211		unsigned long int hval, size_t idx, void *data)
212{
213  hash_entry *table = htab->table;
214
215  table[idx].used = hval;
216  table[idx].key = key;
217  table[idx].keylen = keylen;
218  table[idx].data = data;
219
220  /* List the new value in the list.  */
221  if (htab->first == NULL)
222    {
223      table[idx].next = &table[idx];
224      htab->first = &table[idx];
225    }
226  else
227    {
228      table[idx].next = htab->first->next;
229      htab->first->next = &table[idx];
230      htab->first = &table[idx];
231    }
232
233  ++htab->filled;
234}
235
236
237/* Grow the hash table.  */
238static void
239resize (hash_table *htab)
240{
241  unsigned long int old_size = htab->size;
242  hash_entry *table = htab->table;
243  size_t idx;
244
245  htab->size = next_prime (htab->size * 2);
246  htab->filled = 0;
247  htab->first = NULL;
248  htab->table = (hash_entry *) xcalloc (1 + htab->size, sizeof (hash_entry));
249
250  for (idx = 1; idx <= old_size; ++idx)
251    if (table[idx].used)
252      insert_entry_2 (htab, table[idx].key, table[idx].keylen,
253		      table[idx].used,
254		      lookup (htab, table[idx].key, table[idx].keylen,
255			      table[idx].used),
256		      table[idx].data);
257
258  free (table);
259}
260
261
262/* Try to insert the pair (KEY[0..KEYLEN-1], DATA) in the hash table.
263   Return non-NULL (more precisely, the address of the KEY inside the table's
264   memory pool) if successful, or NULL if there is already an entry with the
265   given key.  */
266const void *
267hash_insert_entry (hash_table *htab,
268		   const void *key, size_t keylen,
269		   void *data)
270{
271  unsigned long int hval = compute_hashval (key, keylen);
272  hash_entry *table = htab->table;
273  size_t idx = lookup (htab, key, keylen, hval);
274
275  if (table[idx].used)
276    /* We don't want to overwrite the old value.  */
277    return NULL;
278  else
279    {
280      /* An empty bucket has been found.  */
281      void *keycopy = obstack_copy (&htab->mem_pool, key, keylen);
282      insert_entry_2 (htab, keycopy, keylen, hval, idx, data);
283      if (100 * htab->filled > 75 * htab->size)
284	/* Table is filled more than 75%.  Resize the table.  */
285	resize (htab);
286      return keycopy;
287    }
288}
289
290
291/* Insert the pair (KEY[0..KEYLEN-1], DATA) in the hash table.
292   Return 0.  */
293int
294hash_set_value (hash_table *htab,
295		const void *key, size_t keylen,
296		void *data)
297{
298  unsigned long int hval = compute_hashval (key, keylen);
299  hash_entry *table = htab->table;
300  size_t idx = lookup (htab, key, keylen, hval);
301
302  if (table[idx].used)
303    {
304      /* Overwrite the old value.  */
305      table[idx].data = data;
306      return 0;
307    }
308  else
309    {
310      /* An empty bucket has been found.  */
311      void *keycopy = obstack_copy (&htab->mem_pool, key, keylen);
312      insert_entry_2 (htab, keycopy, keylen, hval, idx, data);
313      if (100 * htab->filled > 75 * htab->size)
314	/* Table is filled more than 75%.  Resize the table.  */
315	resize (htab);
316      return 0;
317    }
318}
319
320
321/* Steps *PTR forward to the next used entry in the given hash table.  *PTR
322   should be initially set to NULL.  Store information about the next entry
323   in *KEY, *KEYLEN, *DATA.
324   Return 0 normally, -1 when the whole hash table has been traversed.  */
325int
326hash_iterate (hash_table *htab, void **ptr, const void **key, size_t *keylen,
327	      void **data)
328{
329  hash_entry *curr;
330
331  if (*ptr == NULL)
332    {
333      if (htab->first == NULL)
334	return -1;
335      curr = htab->first;
336    }
337  else
338    {
339      if (*ptr == htab->first)
340	return -1;
341      curr = (hash_entry *) *ptr;
342    }
343  curr = curr->next;
344  *ptr = (void *) curr;
345
346  *key = curr->key;
347  *keylen = curr->keylen;
348  *data = curr->data;
349  return 0;
350}
351
352
353/* Steps *PTR forward to the next used entry in the given hash table.  *PTR
354   should be initially set to NULL.  Store information about the next entry
355   in *KEY, *KEYLEN, *DATAP.  *DATAP is set to point to the storage of the
356   value; modifying **DATAP will modify the value of the entry.
357   Return 0 normally, -1 when the whole hash table has been traversed.  */
358int
359hash_iterate_modify (hash_table *htab, void **ptr,
360		     const void **key, size_t *keylen,
361		     void ***datap)
362{
363  hash_entry *curr;
364
365  if (*ptr == NULL)
366    {
367      if (htab->first == NULL)
368	return -1;
369      curr = htab->first;
370    }
371  else
372    {
373      if (*ptr == htab->first)
374	return -1;
375      curr = (hash_entry *) *ptr;
376    }
377  curr = curr->next;
378  *ptr = (void *) curr;
379
380  *key = curr->key;
381  *keylen = curr->keylen;
382  *datap = &curr->data;
383  return 0;
384}
385