1/* Hash tables.
2   Copyright (C) 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008,
3   2009 Free Software Foundation, Inc.
4
5This file is part of GNU Wget.
6
7GNU Wget is free software; you can redistribute it and/or modify
8it under the terms of the GNU General Public License as published by
9the Free Software Foundation; either version 3 of the License, or (at
10your option) any later version.
11
12GNU Wget is distributed in the hope that it will be useful,
13but WITHOUT ANY WARRANTY; without even the implied warranty of
14MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15GNU General Public License for more details.
16
17You should have received a copy of the GNU General Public License
18along with Wget.  If not, see <http://www.gnu.org/licenses/>.
19
20Additional permission under GNU GPL version 3 section 7
21
22If you modify this program, or any covered work, by linking or
23combining it with the OpenSSL project's OpenSSL library (or a
24modified version of that library), containing parts covered by the
25terms of the OpenSSL or SSLeay licenses, the Free Software Foundation
26grants you additional permission to convey the resulting work.
27Corresponding Source for a non-source form of such a combination
28shall include the source code for the parts of OpenSSL used as well
29as that of the covered work.  */
30
31/* With -DSTANDALONE, this file can be compiled outside Wget source
32   tree.  To test, also use -DTEST.  */
33
34#ifndef STANDALONE
35# include "wget.h"
36#endif
37
38#include <stdio.h>
39#include <stdlib.h>
40#include <assert.h>
41#include <string.h>
42#include <limits.h>
43
44#ifndef STANDALONE
45/* Get Wget's utility headers. */
46# include "utils.h"
47#else
48/* Make do without them. */
49# define xnew(x) xmalloc (sizeof (x))
50# define xnew_array(type, x) xmalloc (sizeof (type) * (x))
51# define xmalloc malloc
52# define xfree free
53# ifndef countof
54#  define countof(x) (sizeof (x) / sizeof ((x)[0]))
55# endif
56# include <ctype.h>
57# define c_tolower(x) tolower ((unsigned char) (x))
58# ifdef HAVE_STDINT_H
59#  include <stdint.h>
60# else
61   typedef unsigned long uintptr_t;
62# endif
63#endif
64
65#include "hash.h"
66
67/* INTERFACE:
68
69   Hash tables are a technique used to implement mapping between
70   objects with near-constant-time access and storage.  The table
71   associates keys to values, and a value can be very quickly
72   retrieved by providing the key.  Fast lookup tables are typically
73   implemented as hash tables.
74
75   The entry points are
76     hash_table_new       -- creates the table.
77     hash_table_destroy   -- destroys the table.
78     hash_table_put       -- establishes or updates key->value mapping.
79     hash_table_get       -- retrieves value of key.
80     hash_table_get_pair  -- get key/value pair for key.
81     hash_table_contains  -- test whether the table contains key.
82     hash_table_remove    -- remove key->value mapping for given key.
83     hash_table_for_each  -- call function for each table entry.
84     hash_table_iterate   -- iterate over entries in hash table.
85     hash_table_iter_next -- return next element during iteration.
86     hash_table_clear     -- clear hash table contents.
87     hash_table_count     -- return the number of entries in the table.
88
89   The hash table grows internally as new entries are added and is not
90   limited in size, except by available memory.  The table doubles
91   with each resize, which ensures that the amortized time per
92   operation remains constant.
93
94   If not instructed otherwise, tables created by hash_table_new
95   consider the keys to be equal if their pointer values are the same.
96   You can use make_string_hash_table to create tables whose keys are
97   considered equal if their string contents are the same.  In the
98   general case, the criterion of equality used to compare keys is
99   specified at table creation time with two callback functions,
100   "hash" and "test".  The hash function transforms the key into an
101   arbitrary number that must be the same for two equal keys.  The
102   test function accepts two keys and returns non-zero if they are to
103   be considered equal.
104
105   Note that neither keys nor values are copied when inserted into the
106   hash table, so they must exist for the lifetime of the table.  This
107   means that e.g. the use of static strings is OK, but objects with a
108   shorter life-time probably need to be copied (with strdup() or the
109   like in the case of strings) before being inserted.  */
110
111/* IMPLEMENTATION:
112
113   The hash table is implemented as an open-addressed table with
114   linear probing collision resolution.
115
116   The above means that all the cells (each cell containing a key and
117   a value pointer) are stored in a contiguous array.  Array position
118   of each cell is determined by the hash value of its key and the
119   size of the table: location := hash(key) % size.  If two different
120   keys end up on the same position (collide), the one that came
121   second is stored in the first unoccupied cell that follows it.
122   This collision resolution technique is called "linear probing".
123
124   There are more advanced collision resolution methods (quadratic
125   probing, double hashing), but we don't use them because they incur
126   more non-sequential access to the array, which results in worse CPU
127   cache behavior.  Linear probing works well as long as the
128   count/size ratio (fullness) is kept below 75%.  We make sure to
129   grow and rehash the table whenever this threshold is exceeded.
130
131   Collisions complicate deletion because simply clearing a cell
132   followed by previously collided entries would cause those neighbors
133   to not be picked up by find_cell later.  One solution is to leave a
134   "tombstone" marker instead of clearing the cell, and another is to
135   recalculate the positions of adjacent cells.  We take the latter
136   approach because it results in less bookkeeping garbage and faster
137   retrieval at the (slight) expense of deletion.  */
138
139/* Maximum allowed fullness: when hash table's fullness exceeds this
140   value, the table is resized.  */
141#define HASH_MAX_FULLNESS 0.75
142
143/* The hash table size is multiplied by this factor (and then rounded
144   to the next prime) with each resize.  This guarantees infrequent
145   resizes.  */
146#define HASH_RESIZE_FACTOR 2
147
148struct cell {
149  void *key;
150  void *value;
151};
152
153typedef unsigned long (*hashfun_t) (const void *);
154typedef int (*testfun_t) (const void *, const void *);
155
156struct hash_table {
157  hashfun_t hash_function;
158  testfun_t test_function;
159
160  struct cell *cells;           /* contiguous array of cells. */
161  int size;                     /* size of the array. */
162
163  int count;                    /* number of occupied entries. */
164  int resize_threshold;         /* after size exceeds this number of
165                                   entries, resize the table.  */
166  int prime_offset;             /* the offset of the current prime in
167                                   the prime table. */
168};
169
170/* We use the all-bits-set constant (INVALID_PTR) marker to mean that
171   a cell is empty.  It is unaligned and therefore illegal as a
172   pointer.  INVALID_PTR_CHAR (0xff) is the single-character constant
173   used to initialize the entire cells array as empty.
174
175   The all-bits-set value is a better choice than NULL because it
176   allows the use of NULL/0 keys.  Since the keys are either integers
177   or pointers, the only key that cannot be used is the integer value
178   -1.  This is acceptable because it still allows the use of
179   nonnegative integer keys.  */
180
181#define INVALID_PTR ((void *) ~(uintptr_t) 0)
182#ifndef UCHAR_MAX
183# define UCHAR_MAX 0xff
184#endif
185#define INVALID_PTR_CHAR UCHAR_MAX
186
187/* Whether the cell C is occupied (non-empty). */
188#define CELL_OCCUPIED(c) ((c)->key != INVALID_PTR)
189
190/* Clear the cell C, i.e. mark it as empty (unoccupied). */
191#define CLEAR_CELL(c) ((c)->key = INVALID_PTR)
192
193/* "Next" cell is the cell following C, but wrapping back to CELLS
194   when C would reach CELLS+SIZE.  */
195#define NEXT_CELL(c, cells, size) (c != cells + (size - 1) ? c + 1 : cells)
196
197/* Loop over occupied cells starting at C, terminating the loop when
198   an empty cell is encountered.  */
199#define FOREACH_OCCUPIED_ADJACENT(c, cells, size)                               \
200  for (; CELL_OCCUPIED (c); c = NEXT_CELL (c, cells, size))
201
202/* Return the position of KEY in hash table SIZE large, hash function
203   being HASHFUN.  */
204#define HASH_POSITION(key, hashfun, size) ((hashfun) (key) % size)
205
206/* Find a prime near, but greather than or equal to SIZE.  The primes
207   are looked up from a table with a selection of primes convenient
208   for this purpose.
209
210   PRIME_OFFSET is a minor optimization: it specifies start position
211   for the search for the large enough prime.  The final offset is
212   stored in the same variable.  That way the list of primes does not
213   have to be scanned from the beginning each time around.  */
214
215static int
216prime_size (int size, int *prime_offset)
217{
218  static const int primes[] = {
219    13, 19, 29, 41, 59, 79, 107, 149, 197, 263, 347, 457, 599, 787, 1031,
220    1361, 1777, 2333, 3037, 3967, 5167, 6719, 8737, 11369, 14783,
221    19219, 24989, 32491, 42257, 54941, 71429, 92861, 120721, 156941,
222    204047, 265271, 344857, 448321, 582821, 757693, 985003, 1280519,
223    1664681, 2164111, 2813353, 3657361, 4754591, 6180989, 8035301,
224    10445899, 13579681, 17653589, 22949669, 29834603, 38784989,
225    50420551, 65546729, 85210757, 110774011, 144006217, 187208107,
226    243370577, 316381771, 411296309, 534685237, 695090819, 903618083,
227    1174703521, 1527114613, 1837299131, 2147483647
228  };
229  size_t i;
230
231  for (i = *prime_offset; i < countof (primes); i++)
232    if (primes[i] >= size)
233      {
234        /* Set the offset to the next prime.  That is safe because,
235           next time we are called, it will be with a larger SIZE,
236           which means we could never return the same prime anyway.
237           (If that is not the case, the caller can simply reset
238           *prime_offset.)  */
239        *prime_offset = i + 1;
240        return primes[i];
241      }
242
243  abort ();
244}
245
246static int cmp_pointer (const void *, const void *);
247
248/* Create a hash table with hash function HASH_FUNCTION and test
249   function TEST_FUNCTION.  The table is empty (its count is 0), but
250   pre-allocated to store at least ITEMS items.
251
252   ITEMS is the number of items that the table can accept without
253   needing to resize.  It is useful when creating a table that is to
254   be immediately filled with a known number of items.  In that case,
255   the regrows are a waste of time, and specifying ITEMS correctly
256   will avoid them altogether.
257
258   Note that hash tables grow dynamically regardless of ITEMS.  The
259   only use of ITEMS is to preallocate the table and avoid unnecessary
260   dynamic regrows.  Don't bother making ITEMS prime because it's not
261   used as size unchanged.  To start with a small table that grows as
262   needed, simply specify zero ITEMS.
263
264   If hash and test callbacks are not specified, identity mapping is
265   assumed, i.e. pointer values are used for key comparison.  (Common
266   Lisp calls such tables EQ hash tables, and Java calls them
267   IdentityHashMaps.)  If your keys require different comparison,
268   specify hash and test functions.  For easy use of C strings as hash
269   keys, you can use the convenience functions make_string_hash_table
270   and make_nocase_string_hash_table.  */
271
272struct hash_table *
273hash_table_new (int items,
274                unsigned long (*hash_function) (const void *),
275                int (*test_function) (const void *, const void *))
276{
277  int size;
278  struct hash_table *ht = xnew (struct hash_table);
279
280  ht->hash_function = hash_function ? hash_function : hash_pointer;
281  ht->test_function = test_function ? test_function : cmp_pointer;
282
283  /* If the size of struct hash_table ever becomes a concern, this
284     field can go.  (Wget doesn't create many hashes.)  */
285  ht->prime_offset = 0;
286
287  /* Calculate the size that ensures that the table will store at
288     least ITEMS keys without the need to resize.  */
289  size = 1 + items / HASH_MAX_FULLNESS;
290  size = prime_size (size, &ht->prime_offset);
291  ht->size = size;
292  ht->resize_threshold = size * HASH_MAX_FULLNESS;
293  /*assert (ht->resize_threshold >= items);*/
294
295  ht->cells = xnew_array (struct cell, ht->size);
296
297  /* Mark cells as empty.  We use 0xff rather than 0 to mark empty
298     keys because it allows us to use NULL/0 as keys.  */
299  memset (ht->cells, INVALID_PTR_CHAR, size * sizeof (struct cell));
300
301  ht->count = 0;
302
303  return ht;
304}
305
306/* Free the data associated with hash table HT. */
307
308void
309hash_table_destroy (struct hash_table *ht)
310{
311  xfree (ht->cells);
312  xfree (ht);
313}
314
315/* The heart of most functions in this file -- find the cell whose
316   KEY is equal to key, using linear probing.  Returns the cell
317   that matches KEY, or the first empty cell if none matches.  */
318
319static inline struct cell *
320find_cell (const struct hash_table *ht, const void *key)
321{
322  struct cell *cells = ht->cells;
323  int size = ht->size;
324  struct cell *c = cells + HASH_POSITION (key, ht->hash_function, size);
325  testfun_t equals = ht->test_function;
326
327  FOREACH_OCCUPIED_ADJACENT (c, cells, size)
328    if (equals (key, c->key))
329      break;
330  return c;
331}
332
333/* Get the value that corresponds to the key KEY in the hash table HT.
334   If no value is found, return NULL.  Note that NULL is a legal value
335   for value; if you are storing NULLs in your hash table, you can use
336   hash_table_contains to be sure that a (possibly NULL) value exists
337   in the table.  Or, you can use hash_table_get_pair instead of this
338   function.  */
339
340void *
341hash_table_get (const struct hash_table *ht, const void *key)
342{
343  struct cell *c = find_cell (ht, key);
344  if (CELL_OCCUPIED (c))
345    return c->value;
346  else
347    return NULL;
348}
349
350/* Like hash_table_get, but writes out the pointers to both key and
351   value.  Returns non-zero on success.  */
352
353int
354hash_table_get_pair (const struct hash_table *ht, const void *lookup_key,
355                     void *orig_key, void *value)
356{
357  struct cell *c = find_cell (ht, lookup_key);
358  if (CELL_OCCUPIED (c))
359    {
360      if (orig_key)
361        *(void **)orig_key = c->key;
362      if (value)
363        *(void **)value = c->value;
364      return 1;
365    }
366  else
367    return 0;
368}
369
370/* Return 1 if HT contains KEY, 0 otherwise. */
371
372int
373hash_table_contains (const struct hash_table *ht, const void *key)
374{
375  struct cell *c = find_cell (ht, key);
376  return CELL_OCCUPIED (c);
377}
378
379/* Grow hash table HT as necessary, and rehash all the key-value
380   mappings.  */
381
382static void
383grow_hash_table (struct hash_table *ht)
384{
385  hashfun_t hasher = ht->hash_function;
386  struct cell *old_cells = ht->cells;
387  struct cell *old_end   = ht->cells + ht->size;
388  struct cell *c, *cells;
389  int newsize;
390
391  newsize = prime_size (ht->size * HASH_RESIZE_FACTOR, &ht->prime_offset);
392#if 0
393  printf ("growing from %d to %d; fullness %.2f%% to %.2f%%\n",
394          ht->size, newsize,
395          100.0 * ht->count / ht->size,
396          100.0 * ht->count / newsize);
397#endif
398
399  ht->size = newsize;
400  ht->resize_threshold = newsize * HASH_MAX_FULLNESS;
401
402  cells = xnew_array (struct cell, newsize);
403  memset (cells, INVALID_PTR_CHAR, newsize * sizeof (struct cell));
404  ht->cells = cells;
405
406  for (c = old_cells; c < old_end; c++)
407    if (CELL_OCCUPIED (c))
408      {
409        struct cell *new_c;
410        /* We don't need to test for uniqueness of keys because they
411           come from the hash table and are therefore known to be
412           unique.  */
413        new_c = cells + HASH_POSITION (c->key, hasher, newsize);
414        FOREACH_OCCUPIED_ADJACENT (new_c, cells, newsize)
415          ;
416        *new_c = *c;
417      }
418
419  xfree (old_cells);
420}
421
422/* Put VALUE in the hash table HT under the key KEY.  This regrows the
423   table if necessary.  */
424
425void
426hash_table_put (struct hash_table *ht, const void *key, void *value)
427{
428  struct cell *c = find_cell (ht, key);
429  if (CELL_OCCUPIED (c))
430    {
431      /* update existing item */
432      c->key   = (void *)key; /* const? */
433      c->value = value;
434      return;
435    }
436
437  /* If adding the item would make the table exceed max. fullness,
438     grow the table first.  */
439  if (ht->count >= ht->resize_threshold)
440    {
441      grow_hash_table (ht);
442      c = find_cell (ht, key);
443    }
444
445  /* add new item */
446  ++ht->count;
447  c->key   = (void *)key;       /* const? */
448  c->value = value;
449}
450
451/* Remove KEY->value mapping from HT.  Return 0 if there was no such
452   entry; return 1 if an entry was removed.  */
453
454int
455hash_table_remove (struct hash_table *ht, const void *key)
456{
457  struct cell *c = find_cell (ht, key);
458  if (!CELL_OCCUPIED (c))
459    return 0;
460  else
461    {
462      int size = ht->size;
463      struct cell *cells = ht->cells;
464      hashfun_t hasher = ht->hash_function;
465
466      CLEAR_CELL (c);
467      --ht->count;
468
469      /* Rehash all the entries following C.  The alternative
470         approach is to mark the entry as deleted, i.e. create a
471         "tombstone".  That speeds up removal, but leaves a lot of
472         garbage and slows down hash_table_get and hash_table_put.  */
473
474      c = NEXT_CELL (c, cells, size);
475      FOREACH_OCCUPIED_ADJACENT (c, cells, size)
476        {
477          const void *key2 = c->key;
478          struct cell *c_new;
479
480          /* Find the new location for the key. */
481          c_new = cells + HASH_POSITION (key2, hasher, size);
482          FOREACH_OCCUPIED_ADJACENT (c_new, cells, size)
483            if (key2 == c_new->key)
484              /* The cell C (key2) is already where we want it (in
485                 C_NEW's "chain" of keys.)  */
486              goto next_rehash;
487
488          *c_new = *c;
489          CLEAR_CELL (c);
490
491        next_rehash:
492          ;
493        }
494      return 1;
495    }
496}
497
498/* Clear HT of all entries.  After calling this function, the count
499   and the fullness of the hash table will be zero.  The size will
500   remain unchanged.  */
501
502void
503hash_table_clear (struct hash_table *ht)
504{
505  memset (ht->cells, INVALID_PTR_CHAR, ht->size * sizeof (struct cell));
506  ht->count = 0;
507}
508
509/* Call FN for each entry in HT.  FN is called with three arguments:
510   the key, the value, and ARG.  When FN returns a non-zero value, the
511   mapping stops.
512
513   It is undefined what happens if you add or remove entries in the
514   hash table while hash_table_for_each is running.  The exception is
515   the entry you're currently mapping over; you may call
516   hash_table_put or hash_table_remove on that entry's key.  That is
517   also the reason why this function cannot be implemented in terms of
518   hash_table_iterate.  */
519
520void
521hash_table_for_each (struct hash_table *ht,
522                     int (*fn) (void *, void *, void *), void *arg)
523{
524  struct cell *c = ht->cells;
525  struct cell *end = ht->cells + ht->size;
526
527  for (; c < end; c++)
528    if (CELL_OCCUPIED (c))
529      {
530        void *key;
531      repeat:
532        key = c->key;
533        if (fn (key, c->value, arg))
534          return;
535        /* hash_table_remove might have moved the adjacent cells. */
536        if (c->key != key && CELL_OCCUPIED (c))
537          goto repeat;
538      }
539}
540
541/* Initiate iteration over HT.  Entries are obtained with
542   hash_table_iter_next, a typical iteration loop looking like this:
543
544       hash_table_iterator iter;
545       for (hash_table_iterate (ht, &iter); hash_table_iter_next (&iter); )
546         ... do something with iter.key and iter.value ...
547
548   The iterator does not need to be deallocated after use.  The hash
549   table must not be modified while being iterated over.  */
550
551void
552hash_table_iterate (struct hash_table *ht, hash_table_iterator *iter)
553{
554  iter->pos = ht->cells;
555  iter->end = ht->cells + ht->size;
556}
557
558/* Get the next hash table entry.  ITER is an iterator object
559   initialized using hash_table_iterate.  While there are more
560   entries, the key and value pointers are stored to ITER->key and
561   ITER->value respectively and 1 is returned.  When there are no more
562   entries, 0 is returned.
563
564   If the hash table is modified between calls to this function, the
565   result is undefined.  */
566
567int
568hash_table_iter_next (hash_table_iterator *iter)
569{
570  struct cell *c = iter->pos;
571  struct cell *end = iter->end;
572  for (; c < end; c++)
573    if (CELL_OCCUPIED (c))
574      {
575        iter->key = c->key;
576        iter->value = c->value;
577        iter->pos = c + 1;
578        return 1;
579      }
580  return 0;
581}
582
583/* Return the number of elements in the hash table.  This is not the
584   same as the physical size of the hash table, which is always
585   greater than the number of elements.  */
586
587int
588hash_table_count (const struct hash_table *ht)
589{
590  return ht->count;
591}
592
593/* Functions from this point onward are meant for convenience and
594   don't strictly belong to this file.  However, this is as good a
595   place for them as any.  */
596
597/* Guidelines for creating custom hash and test functions:
598
599   - The test function returns non-zero for keys that are considered
600     "equal", zero otherwise.
601
602   - The hash function returns a number that represents the
603     "distinctness" of the object.  In more precise terms, it means
604     that for any two objects that test "equal" under the test
605     function, the hash function MUST produce the same result.
606
607     This does not mean that all different objects must produce
608     different values (that would be "perfect" hashing), only that
609     non-distinct objects must produce the same values!  For instance,
610     a hash function that returns 0 for any given object is a
611     perfectly valid (albeit extremely bad) hash function.  A hash
612     function that hashes a string by adding up all its characters is
613     another example of a valid (but still quite bad) hash function.
614
615     It is not hard to make hash and test functions agree about
616     equality.  For example, if the test function compares strings
617     case-insensitively, the hash function can lower-case the
618     characters when calculating the hash value.  That ensures that
619     two strings differing only in case will hash the same.
620
621   - To prevent performance degradation, choose a hash function with
622     as good "spreading" as possible.  A good hash function will use
623     all the bits of the input when calculating the hash, and will
624     react to even small changes in input with a completely different
625     output.  But don't make the hash function itself overly slow,
626     because you'll be incurring a non-negligible overhead to all hash
627     table operations.  */
628
629/*
630 * Support for hash tables whose keys are strings.
631 *
632 */
633
634/* Base 31 hash function.  Taken from Gnome's glib, modified to use
635   standard C types.
636
637   We used to use the popular hash function from the Dragon Book, but
638   this one seems to perform much better, both by being faster and by
639   generating less collisions.  */
640
641static unsigned long
642hash_string (const void *key)
643{
644  const char *p = key;
645  unsigned int h = *p;
646
647  if (h)
648    for (p += 1; *p != '\0'; p++)
649      h = (h << 5) - h + *p;
650
651  return h;
652}
653
654/* Frontend for strcmp usable for hash tables. */
655
656static int
657cmp_string (const void *s1, const void *s2)
658{
659  return !strcmp ((const char *)s1, (const char *)s2);
660}
661
662/* Return a hash table of preallocated to store at least ITEMS items
663   suitable to use strings as keys.  */
664
665struct hash_table *
666make_string_hash_table (int items)
667{
668  return hash_table_new (items, hash_string, cmp_string);
669}
670
671/*
672 * Support for hash tables whose keys are strings, but which are
673 * compared case-insensitively.
674 *
675 */
676
677/* Like hash_string, but produce the same hash regardless of the case. */
678
679static unsigned long
680hash_string_nocase (const void *key)
681{
682  const char *p = key;
683  unsigned int h = c_tolower (*p);
684
685  if (h)
686    for (p += 1; *p != '\0'; p++)
687      h = (h << 5) - h + c_tolower (*p);
688
689  return h;
690}
691
692/* Like string_cmp, but doing case-insensitive compareison. */
693
694static int
695string_cmp_nocase (const void *s1, const void *s2)
696{
697  return !strcasecmp ((const char *)s1, (const char *)s2);
698}
699
700/* Like make_string_hash_table, but uses string_hash_nocase and
701   string_cmp_nocase.  */
702
703struct hash_table *
704make_nocase_string_hash_table (int items)
705{
706  return hash_table_new (items, hash_string_nocase, string_cmp_nocase);
707}
708
709/* Hashing of numeric values, such as pointers and integers.
710
711   This implementation is the Robert Jenkins' 32 bit Mix Function,
712   with a simple adaptation for 64-bit values.  According to Jenkins
713   it should offer excellent spreading of values.  Unlike the popular
714   Knuth's multiplication hash, this function doesn't need to know the
715   hash table size to work.  */
716
717unsigned long
718hash_pointer (const void *ptr)
719{
720  uintptr_t key = (uintptr_t) ptr;
721  key += (key << 12);
722  key ^= (key >> 22);
723  key += (key << 4);
724  key ^= (key >> 9);
725  key += (key << 10);
726  key ^= (key >> 2);
727  key += (key << 7);
728  key ^= (key >> 12);
729#if SIZEOF_VOID_P > 4
730  key += (key << 44);
731  key ^= (key >> 54);
732  key += (key << 36);
733  key ^= (key >> 41);
734  key += (key << 42);
735  key ^= (key >> 34);
736  key += (key << 39);
737  key ^= (key >> 44);
738#endif
739  return (unsigned long) key;
740}
741
742static int
743cmp_pointer (const void *ptr1, const void *ptr2)
744{
745  return ptr1 == ptr2;
746}
747
748#ifdef TEST
749
750#include <stdio.h>
751#include <string.h>
752
753void
754print_hash (struct hash_table *sht)
755{
756  hash_table_iterator iter;
757  int count = 0;
758
759  for (hash_table_iterate (sht, &iter); hash_table_iter_next (&iter);
760       ++count)
761    printf ("%s: %s\n", iter.key, iter.value);
762  assert (count == sht->count);
763}
764
765int
766main (void)
767{
768  struct hash_table *ht = make_string_hash_table (0);
769  char line[80];
770  while ((fgets (line, sizeof (line), stdin)))
771    {
772      int len = strlen (line);
773      if (len <= 1)
774        continue;
775      line[--len] = '\0';
776      if (!hash_table_contains (ht, line))
777        hash_table_put (ht, strdup (line), "here I am!");
778#if 1
779      if (len % 5 == 0)
780        {
781          char *line_copy;
782          if (hash_table_get_pair (ht, line, &line_copy, NULL))
783            {
784              hash_table_remove (ht, line);
785              xfree (line_copy);
786            }
787        }
788#endif
789    }
790#if 0
791  print_hash (ht);
792#endif
793#if 1
794  printf ("%d %d\n", ht->count, ht->size);
795#endif
796  return 0;
797}
798#endif /* TEST */
799