1169695Skan/* Hash tables.
2169695Skan   Copyright (C) 2000, 2001, 2003, 2004 Free Software Foundation, Inc.
3169695Skan
4169695SkanThis program is free software; you can redistribute it and/or modify it
5169695Skanunder the terms of the GNU General Public License as published by the
6169695SkanFree Software Foundation; either version 2, or (at your option) any
7169695Skanlater version.
8169695Skan
9169695SkanThis program is distributed in the hope that it will be useful,
10169695Skanbut WITHOUT ANY WARRANTY; without even the implied warranty of
11169695SkanMERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12169695SkanGNU General Public License for more details.
13169695Skan
14169695SkanYou should have received a copy of the GNU General Public License
15169695Skanalong with this program; if not, write to the Free Software
16169695SkanFoundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
17169695Skan
18169695Skan In other words, you are welcome to use, share and improve this program.
19169695Skan You are forbidden to forbid anyone else to use, share and improve
20169695Skan what you give them.   Help stamp out software-hoarding!  */
21169695Skan
22169695Skan#include "config.h"
23169695Skan#include "system.h"
24169695Skan#include "symtab.h"
25169695Skan
26169695Skan/* The code below is a specialization of Vladimir Makarov's expandable
27169695Skan   hash tables (see libiberty/hashtab.c).  The abstraction penalty was
28169695Skan   too high to continue using the generic form.  This code knows
29169695Skan   intrinsically how to calculate a hash value, and how to compare an
30169695Skan   existing entry with a potential new one.  Also, the ability to
31169695Skan   delete members from the table has been removed.  */
32169695Skan
33169695Skanstatic unsigned int calc_hash (const unsigned char *, size_t);
34169695Skanstatic void ht_expand (hash_table *);
35169695Skanstatic double approx_sqrt (double);
36169695Skan
37169695Skan/* Calculate the hash of the string STR of length LEN.  */
38169695Skan
39169695Skanstatic unsigned int
40169695Skancalc_hash (const unsigned char *str, size_t len)
41169695Skan{
42169695Skan  size_t n = len;
43169695Skan  unsigned int r = 0;
44169695Skan
45169695Skan  while (n--)
46169695Skan    r = HT_HASHSTEP (r, *str++);
47169695Skan
48169695Skan  return HT_HASHFINISH (r, len);
49169695Skan}
50169695Skan
51169695Skan/* Initialize an identifier hashtable.  */
52169695Skan
53169695Skanhash_table *
54169695Skanht_create (unsigned int order)
55169695Skan{
56169695Skan  unsigned int nslots = 1 << order;
57169695Skan  hash_table *table;
58169695Skan
59169695Skan  table = XCNEW (hash_table);
60169695Skan
61169695Skan  /* Strings need no alignment.  */
62169695Skan  _obstack_begin (&table->stack, 0, 0,
63169695Skan		  (void *(*) (long)) xmalloc,
64169695Skan		  (void (*) (void *)) free);
65169695Skan
66169695Skan  obstack_alignment_mask (&table->stack) = 0;
67169695Skan
68169695Skan  table->entries = XCNEWVEC (hashnode, nslots);
69169695Skan  table->entries_owned = true;
70169695Skan  table->nslots = nslots;
71169695Skan  return table;
72169695Skan}
73169695Skan
74169695Skan/* Frees all memory associated with a hash table.  */
75169695Skan
76169695Skanvoid
77169695Skanht_destroy (hash_table *table)
78169695Skan{
79169695Skan  obstack_free (&table->stack, NULL);
80169695Skan  if (table->entries_owned)
81169695Skan    free (table->entries);
82169695Skan  free (table);
83169695Skan}
84169695Skan
85169695Skan/* Returns the hash entry for the a STR of length LEN.  If that string
86169695Skan   already exists in the table, returns the existing entry, and, if
87169695Skan   INSERT is CPP_ALLOCED, frees the last obstack object.  If the
88169695Skan   identifier hasn't been seen before, and INSERT is CPP_NO_INSERT,
89169695Skan   returns NULL.  Otherwise insert and returns a new entry.  A new
90169695Skan   string is alloced if INSERT is CPP_ALLOC, otherwise INSERT is
91169695Skan   CPP_ALLOCED and the item is assumed to be at the top of the
92169695Skan   obstack.  */
93169695Skanhashnode
94169695Skanht_lookup (hash_table *table, const unsigned char *str, size_t len,
95169695Skan	   enum ht_lookup_option insert)
96169695Skan{
97169695Skan  return ht_lookup_with_hash (table, str, len, calc_hash (str, len),
98169695Skan			      insert);
99169695Skan}
100169695Skan
101169695Skanhashnode
102169695Skanht_lookup_with_hash (hash_table *table, const unsigned char *str,
103169695Skan		     size_t len, unsigned int hash,
104169695Skan		     enum ht_lookup_option insert)
105169695Skan{
106169695Skan  unsigned int hash2;
107169695Skan  unsigned int index;
108169695Skan  size_t sizemask;
109169695Skan  hashnode node;
110169695Skan
111169695Skan  sizemask = table->nslots - 1;
112169695Skan  index = hash & sizemask;
113169695Skan  table->searches++;
114169695Skan
115169695Skan  node = table->entries[index];
116169695Skan
117169695Skan  if (node != NULL)
118169695Skan    {
119169695Skan      if (node->hash_value == hash
120169695Skan	  && HT_LEN (node) == (unsigned int) len
121169695Skan	  && !memcmp (HT_STR (node), str, len))
122169695Skan	{
123169695Skan	  if (insert == HT_ALLOCED)
124169695Skan	    /* The string we search for was placed at the end of the
125169695Skan	       obstack.  Release it.  */
126169695Skan	    obstack_free (&table->stack, (void *) str);
127169695Skan	  return node;
128169695Skan	}
129169695Skan
130169695Skan      /* hash2 must be odd, so we're guaranteed to visit every possible
131169695Skan	 location in the table during rehashing.  */
132169695Skan      hash2 = ((hash * 17) & sizemask) | 1;
133169695Skan
134169695Skan      for (;;)
135169695Skan	{
136169695Skan	  table->collisions++;
137169695Skan	  index = (index + hash2) & sizemask;
138169695Skan	  node = table->entries[index];
139169695Skan	  if (node == NULL)
140169695Skan	    break;
141169695Skan
142169695Skan	  if (node->hash_value == hash
143169695Skan	      && HT_LEN (node) == (unsigned int) len
144169695Skan	      && !memcmp (HT_STR (node), str, len))
145169695Skan	    {
146169695Skan	      if (insert == HT_ALLOCED)
147169695Skan	      /* The string we search for was placed at the end of the
148169695Skan		 obstack.  Release it.  */
149169695Skan		obstack_free (&table->stack, (void *) str);
150169695Skan	      return node;
151169695Skan	    }
152169695Skan	}
153169695Skan    }
154169695Skan
155169695Skan  if (insert == HT_NO_INSERT)
156169695Skan    return NULL;
157169695Skan
158169695Skan  node = (*table->alloc_node) (table);
159169695Skan  table->entries[index] = node;
160169695Skan
161169695Skan  HT_LEN (node) = (unsigned int) len;
162169695Skan  node->hash_value = hash;
163169695Skan  if (insert == HT_ALLOC)
164169695Skan    HT_STR (node) = (const unsigned char *) obstack_copy0 (&table->stack,
165169695Skan                                                           str, len);
166169695Skan  else
167169695Skan    HT_STR (node) = str;
168169695Skan
169169695Skan  if (++table->nelements * 4 >= table->nslots * 3)
170169695Skan    /* Must expand the string table.  */
171169695Skan    ht_expand (table);
172169695Skan
173169695Skan  return node;
174169695Skan}
175169695Skan
176169695Skan/* Double the size of a hash table, re-hashing existing entries.  */
177169695Skan
178169695Skanstatic void
179169695Skanht_expand (hash_table *table)
180169695Skan{
181169695Skan  hashnode *nentries, *p, *limit;
182169695Skan  unsigned int size, sizemask;
183169695Skan
184169695Skan  size = table->nslots * 2;
185169695Skan  nentries = XCNEWVEC (hashnode, size);
186169695Skan  sizemask = size - 1;
187169695Skan
188169695Skan  p = table->entries;
189169695Skan  limit = p + table->nslots;
190169695Skan  do
191169695Skan    if (*p)
192169695Skan      {
193169695Skan	unsigned int index, hash, hash2;
194169695Skan
195169695Skan	hash = (*p)->hash_value;
196169695Skan	index = hash & sizemask;
197169695Skan
198169695Skan	if (nentries[index])
199169695Skan	  {
200169695Skan	    hash2 = ((hash * 17) & sizemask) | 1;
201169695Skan	    do
202169695Skan	      {
203169695Skan		index = (index + hash2) & sizemask;
204169695Skan	      }
205169695Skan	    while (nentries[index]);
206169695Skan	  }
207169695Skan	nentries[index] = *p;
208169695Skan      }
209169695Skan  while (++p < limit);
210169695Skan
211169695Skan  if (table->entries_owned)
212169695Skan    free (table->entries);
213169695Skan  table->entries_owned = true;
214169695Skan  table->entries = nentries;
215169695Skan  table->nslots = size;
216169695Skan}
217169695Skan
218169695Skan/* For all nodes in TABLE, callback CB with parameters TABLE->PFILE,
219169695Skan   the node, and V.  */
220169695Skanvoid
221169695Skanht_forall (hash_table *table, ht_cb cb, const void *v)
222169695Skan{
223169695Skan  hashnode *p, *limit;
224169695Skan
225169695Skan  p = table->entries;
226169695Skan  limit = p + table->nslots;
227169695Skan  do
228169695Skan    if (*p)
229169695Skan      {
230169695Skan	if ((*cb) (table->pfile, *p, v) == 0)
231169695Skan	  break;
232169695Skan      }
233169695Skan  while (++p < limit);
234169695Skan}
235169695Skan
236169695Skan/* Restore the hash table.  */
237169695Skanvoid
238169695Skanht_load (hash_table *ht, hashnode *entries,
239169695Skan	 unsigned int nslots, unsigned int nelements,
240169695Skan	 bool own)
241169695Skan{
242169695Skan  if (ht->entries_owned)
243169695Skan    free (ht->entries);
244169695Skan  ht->entries = entries;
245169695Skan  ht->nslots = nslots;
246169695Skan  ht->nelements = nelements;
247169695Skan  ht->entries_owned = own;
248169695Skan}
249169695Skan
250169695Skan/* Dump allocation statistics to stderr.  */
251169695Skan
252169695Skanvoid
253169695Skanht_dump_statistics (hash_table *table)
254169695Skan{
255169695Skan  size_t nelts, nids, overhead, headers;
256169695Skan  size_t total_bytes, longest;
257169695Skan  double sum_of_squares, exp_len, exp_len2, exp2_len;
258169695Skan  hashnode *p, *limit;
259169695Skan
260169695Skan#define SCALE(x) ((unsigned long) ((x) < 1024*10 \
261169695Skan		  ? (x) \
262169695Skan		  : ((x) < 1024*1024*10 \
263169695Skan		     ? (x) / 1024 \
264169695Skan		     : (x) / (1024*1024))))
265169695Skan#define LABEL(x) ((x) < 1024*10 ? ' ' : ((x) < 1024*1024*10 ? 'k' : 'M'))
266169695Skan
267169695Skan  total_bytes = longest = sum_of_squares = nids = 0;
268169695Skan  p = table->entries;
269169695Skan  limit = p + table->nslots;
270169695Skan  do
271169695Skan    if (*p)
272169695Skan      {
273169695Skan	size_t n = HT_LEN (*p);
274169695Skan
275169695Skan	total_bytes += n;
276169695Skan	sum_of_squares += (double) n * n;
277169695Skan	if (n > longest)
278169695Skan	  longest = n;
279169695Skan	nids++;
280169695Skan      }
281169695Skan  while (++p < limit);
282169695Skan
283169695Skan  nelts = table->nelements;
284169695Skan  overhead = obstack_memory_used (&table->stack) - total_bytes;
285169695Skan  headers = table->nslots * sizeof (hashnode);
286169695Skan
287169695Skan  fprintf (stderr, "\nString pool\nentries\t\t%lu\n",
288169695Skan	   (unsigned long) nelts);
289169695Skan  fprintf (stderr, "identifiers\t%lu (%.2f%%)\n",
290169695Skan	   (unsigned long) nids, nids * 100.0 / nelts);
291169695Skan  fprintf (stderr, "slots\t\t%lu\n",
292169695Skan	   (unsigned long) table->nslots);
293169695Skan  fprintf (stderr, "bytes\t\t%lu%c (%lu%c overhead)\n",
294169695Skan	   SCALE (total_bytes), LABEL (total_bytes),
295169695Skan	   SCALE (overhead), LABEL (overhead));
296169695Skan  fprintf (stderr, "table size\t%lu%c\n",
297169695Skan	   SCALE (headers), LABEL (headers));
298169695Skan
299169695Skan  exp_len = (double)total_bytes / (double)nelts;
300169695Skan  exp2_len = exp_len * exp_len;
301169695Skan  exp_len2 = (double) sum_of_squares / (double) nelts;
302169695Skan
303169695Skan  fprintf (stderr, "coll/search\t%.4f\n",
304169695Skan	   (double) table->collisions / (double) table->searches);
305169695Skan  fprintf (stderr, "ins/search\t%.4f\n",
306169695Skan	   (double) nelts / (double) table->searches);
307169695Skan  fprintf (stderr, "avg. entry\t%.2f bytes (+/- %.2f)\n",
308169695Skan	   exp_len, approx_sqrt (exp_len2 - exp2_len));
309169695Skan  fprintf (stderr, "longest entry\t%lu\n",
310169695Skan	   (unsigned long) longest);
311169695Skan#undef SCALE
312169695Skan#undef LABEL
313169695Skan}
314169695Skan
315169695Skan/* Return the approximate positive square root of a number N.  This is for
316169695Skan   statistical reports, not code generation.  */
317169695Skanstatic double
318169695Skanapprox_sqrt (double x)
319169695Skan{
320169695Skan  double s, d;
321169695Skan
322169695Skan  if (x < 0)
323169695Skan    abort ();
324169695Skan  if (x == 0)
325169695Skan    return 0;
326169695Skan
327169695Skan  s = x;
328169695Skan  do
329169695Skan    {
330169695Skan      d = (s * s - x) / (2 * s);
331169695Skan      s -= d;
332169695Skan    }
333169695Skan  while (d > .0001);
334169695Skan  return s;
335169695Skan}
336