1/* Hash tables.
2   Copyright (C) 2000-2015 Free Software Foundation, Inc.
3
4This program is free software; you can redistribute it and/or modify it
5under the terms of the GNU General Public License as published by the
6Free Software Foundation; either version 3, or (at your option) any
7later version.
8
9This program is distributed in the hope that it will be useful,
10but WITHOUT ANY WARRANTY; without even the implied warranty of
11MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12GNU General Public License for more details.
13
14You should have received a copy of the GNU General Public License
15along with this program; see the file COPYING3.  If not see
16<http://www.gnu.org/licenses/>.
17
18 In other words, you are welcome to use, share and improve this program.
19 You are forbidden to forbid anyone else to use, share and improve
20 what you give them.   Help stamp out software-hoarding!  */
21
22#include "config.h"
23#include "system.h"
24#include "symtab.h"
25
26/* The code below is a specialization of Vladimir Makarov's expandable
27   hash tables (see libiberty/hashtab.c).  The abstraction penalty was
28   too high to continue using the generic form.  This code knows
29   intrinsically how to calculate a hash value, and how to compare an
30   existing entry with a potential new one.  */
31
32static unsigned int calc_hash (const unsigned char *, size_t);
33static void ht_expand (cpp_hash_table *);
34static double approx_sqrt (double);
35
36/* A deleted entry.  */
37#define DELETED ((hashnode) -1)
38
39/* Calculate the hash of the string STR of length LEN.  */
40
41static unsigned int
42calc_hash (const unsigned char *str, size_t len)
43{
44  size_t n = len;
45  unsigned int r = 0;
46
47  while (n--)
48    r = HT_HASHSTEP (r, *str++);
49
50  return HT_HASHFINISH (r, len);
51}
52
53/* Initialize an identifier hashtable.  */
54
55cpp_hash_table *
56ht_create (unsigned int order)
57{
58  unsigned int nslots = 1 << order;
59  cpp_hash_table *table;
60
61  table = XCNEW (cpp_hash_table);
62
63  /* Strings need no alignment.  */
64  obstack_specify_allocation (&table->stack, 0, 0, xmalloc, free);
65
66  obstack_alignment_mask (&table->stack) = 0;
67
68  table->entries = XCNEWVEC (hashnode, nslots);
69  table->entries_owned = true;
70  table->nslots = nslots;
71  return table;
72}
73
74/* Frees all memory associated with a hash table.  */
75
76void
77ht_destroy (cpp_hash_table *table)
78{
79  obstack_free (&table->stack, NULL);
80  if (table->entries_owned)
81    free (table->entries);
82  free (table);
83}
84
85/* Returns the hash entry for the a STR of length LEN.  If that string
86   already exists in the table, returns the existing entry.  If the
87   identifier hasn't been seen before, and INSERT is CPP_NO_INSERT,
88   returns NULL.  Otherwise insert and returns a new entry.  A new
89   string is allocated.  */
90hashnode
91ht_lookup (cpp_hash_table *table, const unsigned char *str, size_t len,
92	   enum ht_lookup_option insert)
93{
94  return ht_lookup_with_hash (table, str, len, calc_hash (str, len),
95			      insert);
96}
97
98hashnode
99ht_lookup_with_hash (cpp_hash_table *table, const unsigned char *str,
100		     size_t len, unsigned int hash,
101		     enum ht_lookup_option insert)
102{
103  unsigned int hash2;
104  unsigned int index;
105  unsigned int deleted_index = table->nslots;
106  size_t sizemask;
107  hashnode node;
108
109  sizemask = table->nslots - 1;
110  index = hash & sizemask;
111  table->searches++;
112
113  node = table->entries[index];
114
115  if (node != NULL)
116    {
117      if (node == DELETED)
118	deleted_index = index;
119      else if (node->hash_value == hash
120	       && HT_LEN (node) == (unsigned int) len
121	       && !memcmp (HT_STR (node), str, len))
122	return node;
123
124      /* hash2 must be odd, so we're guaranteed to visit every possible
125	 location in the table during rehashing.  */
126      hash2 = ((hash * 17) & sizemask) | 1;
127
128      for (;;)
129	{
130	  table->collisions++;
131	  index = (index + hash2) & sizemask;
132	  node = table->entries[index];
133	  if (node == NULL)
134	    break;
135
136	  if (node == DELETED)
137	    {
138	      if (deleted_index != table->nslots)
139		deleted_index = index;
140	    }
141	  else if (node->hash_value == hash
142		   && HT_LEN (node) == (unsigned int) len
143		   && !memcmp (HT_STR (node), str, len))
144	    return node;
145	}
146    }
147
148  if (insert == HT_NO_INSERT)
149    return NULL;
150
151  /* We prefer to overwrite the first deleted slot we saw.  */
152  if (deleted_index != table->nslots)
153    index = deleted_index;
154
155  node = (*table->alloc_node) (table);
156  table->entries[index] = node;
157
158  HT_LEN (node) = (unsigned int) len;
159  node->hash_value = hash;
160
161  if (table->alloc_subobject)
162    {
163      char *chars = (char *) table->alloc_subobject (len + 1);
164      memcpy (chars, str, len);
165      chars[len] = '\0';
166      HT_STR (node) = (const unsigned char *) chars;
167    }
168  else
169    HT_STR (node) = (const unsigned char *) obstack_copy0 (&table->stack,
170							   str, len);
171
172  if (++table->nelements * 4 >= table->nslots * 3)
173    /* Must expand the string table.  */
174    ht_expand (table);
175
176  return node;
177}
178
179/* Double the size of a hash table, re-hashing existing entries.  */
180
181static void
182ht_expand (cpp_hash_table *table)
183{
184  hashnode *nentries, *p, *limit;
185  unsigned int size, sizemask;
186
187  size = table->nslots * 2;
188  nentries = XCNEWVEC (hashnode, size);
189  sizemask = size - 1;
190
191  p = table->entries;
192  limit = p + table->nslots;
193  do
194    if (*p && *p != DELETED)
195      {
196	unsigned int index, hash, hash2;
197
198	hash = (*p)->hash_value;
199	index = hash & sizemask;
200
201	if (nentries[index])
202	  {
203	    hash2 = ((hash * 17) & sizemask) | 1;
204	    do
205	      {
206		index = (index + hash2) & sizemask;
207	      }
208	    while (nentries[index]);
209	  }
210	nentries[index] = *p;
211      }
212  while (++p < limit);
213
214  if (table->entries_owned)
215    free (table->entries);
216  table->entries_owned = true;
217  table->entries = nentries;
218  table->nslots = size;
219}
220
221/* For all nodes in TABLE, callback CB with parameters TABLE->PFILE,
222   the node, and V.  */
223void
224ht_forall (cpp_hash_table *table, ht_cb cb, const void *v)
225{
226  hashnode *p, *limit;
227
228  p = table->entries;
229  limit = p + table->nslots;
230  do
231    if (*p && *p != DELETED)
232      {
233	if ((*cb) (table->pfile, *p, v) == 0)
234	  break;
235      }
236  while (++p < limit);
237}
238
239/* Like ht_forall, but a nonzero return from the callback means that
240   the entry should be removed from the table.  */
241void
242ht_purge (cpp_hash_table *table, ht_cb cb, const void *v)
243{
244  hashnode *p, *limit;
245
246  p = table->entries;
247  limit = p + table->nslots;
248  do
249    if (*p && *p != DELETED)
250      {
251	if ((*cb) (table->pfile, *p, v))
252	  *p = DELETED;
253      }
254  while (++p < limit);
255}
256
257/* Restore the hash table.  */
258void
259ht_load (cpp_hash_table *ht, hashnode *entries,
260	 unsigned int nslots, unsigned int nelements,
261	 bool own)
262{
263  if (ht->entries_owned)
264    free (ht->entries);
265  ht->entries = entries;
266  ht->nslots = nslots;
267  ht->nelements = nelements;
268  ht->entries_owned = own;
269}
270
271/* Dump allocation statistics to stderr.  */
272
273void
274ht_dump_statistics (cpp_hash_table *table)
275{
276  size_t nelts, nids, overhead, headers;
277  size_t total_bytes, longest, deleted = 0;
278  double sum_of_squares, exp_len, exp_len2, exp2_len;
279  hashnode *p, *limit;
280
281#define SCALE(x) ((unsigned long) ((x) < 1024*10 \
282		  ? (x) \
283		  : ((x) < 1024*1024*10 \
284		     ? (x) / 1024 \
285		     : (x) / (1024*1024))))
286#define LABEL(x) ((x) < 1024*10 ? ' ' : ((x) < 1024*1024*10 ? 'k' : 'M'))
287
288  total_bytes = longest = sum_of_squares = nids = 0;
289  p = table->entries;
290  limit = p + table->nslots;
291  do
292    if (*p == DELETED)
293      ++deleted;
294    else if (*p)
295      {
296	size_t n = HT_LEN (*p);
297
298	total_bytes += n;
299	sum_of_squares += (double) n * n;
300	if (n > longest)
301	  longest = n;
302	nids++;
303      }
304  while (++p < limit);
305
306  nelts = table->nelements;
307  overhead = obstack_memory_used (&table->stack) - total_bytes;
308  headers = table->nslots * sizeof (hashnode);
309
310  fprintf (stderr, "\nString pool\nentries\t\t%lu\n",
311	   (unsigned long) nelts);
312  fprintf (stderr, "identifiers\t%lu (%.2f%%)\n",
313	   (unsigned long) nids, nids * 100.0 / nelts);
314  fprintf (stderr, "slots\t\t%lu\n",
315	   (unsigned long) table->nslots);
316  fprintf (stderr, "deleted\t\t%lu\n",
317	   (unsigned long) deleted);
318  fprintf (stderr, "bytes\t\t%lu%c (%lu%c overhead)\n",
319	   SCALE (total_bytes), LABEL (total_bytes),
320	   SCALE (overhead), LABEL (overhead));
321  fprintf (stderr, "table size\t%lu%c\n",
322	   SCALE (headers), LABEL (headers));
323
324  exp_len = (double)total_bytes / (double)nelts;
325  exp2_len = exp_len * exp_len;
326  exp_len2 = (double) sum_of_squares / (double) nelts;
327
328  fprintf (stderr, "coll/search\t%.4f\n",
329	   (double) table->collisions / (double) table->searches);
330  fprintf (stderr, "ins/search\t%.4f\n",
331	   (double) nelts / (double) table->searches);
332  fprintf (stderr, "avg. entry\t%.2f bytes (+/- %.2f)\n",
333	   exp_len, approx_sqrt (exp_len2 - exp2_len));
334  fprintf (stderr, "longest entry\t%lu\n",
335	   (unsigned long) longest);
336#undef SCALE
337#undef LABEL
338}
339
340/* Return the approximate positive square root of a number N.  This is for
341   statistical reports, not code generation.  */
342static double
343approx_sqrt (double x)
344{
345  double s, d;
346
347  if (x < 0)
348    abort ();
349  if (x == 0)
350    return 0;
351
352  s = x;
353  do
354    {
355      d = (s * s - x) / (2 * s);
356      s -= d;
357    }
358  while (d > .0001);
359  return s;
360}
361