1169695Skan/* Hash tables. 2169695Skan Copyright (C) 2000, 2001, 2003, 2004 Free Software Foundation, Inc. 3169695Skan 4169695SkanThis program is free software; you can redistribute it and/or modify it 5169695Skanunder the terms of the GNU General Public License as published by the 6169695SkanFree Software Foundation; either version 2, or (at your option) any 7169695Skanlater version. 8169695Skan 9169695SkanThis program is distributed in the hope that it will be useful, 10169695Skanbut WITHOUT ANY WARRANTY; without even the implied warranty of 11169695SkanMERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12169695SkanGNU General Public License for more details. 13169695Skan 14169695SkanYou should have received a copy of the GNU General Public License 15169695Skanalong with this program; if not, write to the Free Software 16169695SkanFoundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 17169695Skan 18169695Skan In other words, you are welcome to use, share and improve this program. 19169695Skan You are forbidden to forbid anyone else to use, share and improve 20169695Skan what you give them. Help stamp out software-hoarding! */ 21169695Skan 22169695Skan#include "config.h" 23169695Skan#include "system.h" 24169695Skan#include "symtab.h" 25169695Skan 26169695Skan/* The code below is a specialization of Vladimir Makarov's expandable 27169695Skan hash tables (see libiberty/hashtab.c). The abstraction penalty was 28169695Skan too high to continue using the generic form. This code knows 29169695Skan intrinsically how to calculate a hash value, and how to compare an 30169695Skan existing entry with a potential new one. Also, the ability to 31169695Skan delete members from the table has been removed. */ 32169695Skan 33169695Skanstatic unsigned int calc_hash (const unsigned char *, size_t); 34169695Skanstatic void ht_expand (hash_table *); 35169695Skanstatic double approx_sqrt (double); 36169695Skan 37169695Skan/* Calculate the hash of the string STR of length LEN. */ 38169695Skan 39169695Skanstatic unsigned int 40169695Skancalc_hash (const unsigned char *str, size_t len) 41169695Skan{ 42169695Skan size_t n = len; 43169695Skan unsigned int r = 0; 44169695Skan 45169695Skan while (n--) 46169695Skan r = HT_HASHSTEP (r, *str++); 47169695Skan 48169695Skan return HT_HASHFINISH (r, len); 49169695Skan} 50169695Skan 51169695Skan/* Initialize an identifier hashtable. */ 52169695Skan 53169695Skanhash_table * 54169695Skanht_create (unsigned int order) 55169695Skan{ 56169695Skan unsigned int nslots = 1 << order; 57169695Skan hash_table *table; 58169695Skan 59169695Skan table = XCNEW (hash_table); 60169695Skan 61169695Skan /* Strings need no alignment. */ 62169695Skan _obstack_begin (&table->stack, 0, 0, 63169695Skan (void *(*) (long)) xmalloc, 64169695Skan (void (*) (void *)) free); 65169695Skan 66169695Skan obstack_alignment_mask (&table->stack) = 0; 67169695Skan 68169695Skan table->entries = XCNEWVEC (hashnode, nslots); 69169695Skan table->entries_owned = true; 70169695Skan table->nslots = nslots; 71169695Skan return table; 72169695Skan} 73169695Skan 74169695Skan/* Frees all memory associated with a hash table. */ 75169695Skan 76169695Skanvoid 77169695Skanht_destroy (hash_table *table) 78169695Skan{ 79169695Skan obstack_free (&table->stack, NULL); 80169695Skan if (table->entries_owned) 81169695Skan free (table->entries); 82169695Skan free (table); 83169695Skan} 84169695Skan 85169695Skan/* Returns the hash entry for the a STR of length LEN. If that string 86169695Skan already exists in the table, returns the existing entry, and, if 87169695Skan INSERT is CPP_ALLOCED, frees the last obstack object. If the 88169695Skan identifier hasn't been seen before, and INSERT is CPP_NO_INSERT, 89169695Skan returns NULL. Otherwise insert and returns a new entry. A new 90169695Skan string is alloced if INSERT is CPP_ALLOC, otherwise INSERT is 91169695Skan CPP_ALLOCED and the item is assumed to be at the top of the 92169695Skan obstack. */ 93169695Skanhashnode 94169695Skanht_lookup (hash_table *table, const unsigned char *str, size_t len, 95169695Skan enum ht_lookup_option insert) 96169695Skan{ 97169695Skan return ht_lookup_with_hash (table, str, len, calc_hash (str, len), 98169695Skan insert); 99169695Skan} 100169695Skan 101169695Skanhashnode 102169695Skanht_lookup_with_hash (hash_table *table, const unsigned char *str, 103169695Skan size_t len, unsigned int hash, 104169695Skan enum ht_lookup_option insert) 105169695Skan{ 106169695Skan unsigned int hash2; 107169695Skan unsigned int index; 108169695Skan size_t sizemask; 109169695Skan hashnode node; 110169695Skan 111169695Skan sizemask = table->nslots - 1; 112169695Skan index = hash & sizemask; 113169695Skan table->searches++; 114169695Skan 115169695Skan node = table->entries[index]; 116169695Skan 117169695Skan if (node != NULL) 118169695Skan { 119169695Skan if (node->hash_value == hash 120169695Skan && HT_LEN (node) == (unsigned int) len 121169695Skan && !memcmp (HT_STR (node), str, len)) 122169695Skan { 123169695Skan if (insert == HT_ALLOCED) 124169695Skan /* The string we search for was placed at the end of the 125169695Skan obstack. Release it. */ 126169695Skan obstack_free (&table->stack, (void *) str); 127169695Skan return node; 128169695Skan } 129169695Skan 130169695Skan /* hash2 must be odd, so we're guaranteed to visit every possible 131169695Skan location in the table during rehashing. */ 132169695Skan hash2 = ((hash * 17) & sizemask) | 1; 133169695Skan 134169695Skan for (;;) 135169695Skan { 136169695Skan table->collisions++; 137169695Skan index = (index + hash2) & sizemask; 138169695Skan node = table->entries[index]; 139169695Skan if (node == NULL) 140169695Skan break; 141169695Skan 142169695Skan if (node->hash_value == hash 143169695Skan && HT_LEN (node) == (unsigned int) len 144169695Skan && !memcmp (HT_STR (node), str, len)) 145169695Skan { 146169695Skan if (insert == HT_ALLOCED) 147169695Skan /* The string we search for was placed at the end of the 148169695Skan obstack. Release it. */ 149169695Skan obstack_free (&table->stack, (void *) str); 150169695Skan return node; 151169695Skan } 152169695Skan } 153169695Skan } 154169695Skan 155169695Skan if (insert == HT_NO_INSERT) 156169695Skan return NULL; 157169695Skan 158169695Skan node = (*table->alloc_node) (table); 159169695Skan table->entries[index] = node; 160169695Skan 161169695Skan HT_LEN (node) = (unsigned int) len; 162169695Skan node->hash_value = hash; 163169695Skan if (insert == HT_ALLOC) 164169695Skan HT_STR (node) = (const unsigned char *) obstack_copy0 (&table->stack, 165169695Skan str, len); 166169695Skan else 167169695Skan HT_STR (node) = str; 168169695Skan 169169695Skan if (++table->nelements * 4 >= table->nslots * 3) 170169695Skan /* Must expand the string table. */ 171169695Skan ht_expand (table); 172169695Skan 173169695Skan return node; 174169695Skan} 175169695Skan 176169695Skan/* Double the size of a hash table, re-hashing existing entries. */ 177169695Skan 178169695Skanstatic void 179169695Skanht_expand (hash_table *table) 180169695Skan{ 181169695Skan hashnode *nentries, *p, *limit; 182169695Skan unsigned int size, sizemask; 183169695Skan 184169695Skan size = table->nslots * 2; 185169695Skan nentries = XCNEWVEC (hashnode, size); 186169695Skan sizemask = size - 1; 187169695Skan 188169695Skan p = table->entries; 189169695Skan limit = p + table->nslots; 190169695Skan do 191169695Skan if (*p) 192169695Skan { 193169695Skan unsigned int index, hash, hash2; 194169695Skan 195169695Skan hash = (*p)->hash_value; 196169695Skan index = hash & sizemask; 197169695Skan 198169695Skan if (nentries[index]) 199169695Skan { 200169695Skan hash2 = ((hash * 17) & sizemask) | 1; 201169695Skan do 202169695Skan { 203169695Skan index = (index + hash2) & sizemask; 204169695Skan } 205169695Skan while (nentries[index]); 206169695Skan } 207169695Skan nentries[index] = *p; 208169695Skan } 209169695Skan while (++p < limit); 210169695Skan 211169695Skan if (table->entries_owned) 212169695Skan free (table->entries); 213169695Skan table->entries_owned = true; 214169695Skan table->entries = nentries; 215169695Skan table->nslots = size; 216169695Skan} 217169695Skan 218169695Skan/* For all nodes in TABLE, callback CB with parameters TABLE->PFILE, 219169695Skan the node, and V. */ 220169695Skanvoid 221169695Skanht_forall (hash_table *table, ht_cb cb, const void *v) 222169695Skan{ 223169695Skan hashnode *p, *limit; 224169695Skan 225169695Skan p = table->entries; 226169695Skan limit = p + table->nslots; 227169695Skan do 228169695Skan if (*p) 229169695Skan { 230169695Skan if ((*cb) (table->pfile, *p, v) == 0) 231169695Skan break; 232169695Skan } 233169695Skan while (++p < limit); 234169695Skan} 235169695Skan 236169695Skan/* Restore the hash table. */ 237169695Skanvoid 238169695Skanht_load (hash_table *ht, hashnode *entries, 239169695Skan unsigned int nslots, unsigned int nelements, 240169695Skan bool own) 241169695Skan{ 242169695Skan if (ht->entries_owned) 243169695Skan free (ht->entries); 244169695Skan ht->entries = entries; 245169695Skan ht->nslots = nslots; 246169695Skan ht->nelements = nelements; 247169695Skan ht->entries_owned = own; 248169695Skan} 249169695Skan 250169695Skan/* Dump allocation statistics to stderr. */ 251169695Skan 252169695Skanvoid 253169695Skanht_dump_statistics (hash_table *table) 254169695Skan{ 255169695Skan size_t nelts, nids, overhead, headers; 256169695Skan size_t total_bytes, longest; 257169695Skan double sum_of_squares, exp_len, exp_len2, exp2_len; 258169695Skan hashnode *p, *limit; 259169695Skan 260169695Skan#define SCALE(x) ((unsigned long) ((x) < 1024*10 \ 261169695Skan ? (x) \ 262169695Skan : ((x) < 1024*1024*10 \ 263169695Skan ? (x) / 1024 \ 264169695Skan : (x) / (1024*1024)))) 265169695Skan#define LABEL(x) ((x) < 1024*10 ? ' ' : ((x) < 1024*1024*10 ? 'k' : 'M')) 266169695Skan 267169695Skan total_bytes = longest = sum_of_squares = nids = 0; 268169695Skan p = table->entries; 269169695Skan limit = p + table->nslots; 270169695Skan do 271169695Skan if (*p) 272169695Skan { 273169695Skan size_t n = HT_LEN (*p); 274169695Skan 275169695Skan total_bytes += n; 276169695Skan sum_of_squares += (double) n * n; 277169695Skan if (n > longest) 278169695Skan longest = n; 279169695Skan nids++; 280169695Skan } 281169695Skan while (++p < limit); 282169695Skan 283169695Skan nelts = table->nelements; 284169695Skan overhead = obstack_memory_used (&table->stack) - total_bytes; 285169695Skan headers = table->nslots * sizeof (hashnode); 286169695Skan 287169695Skan fprintf (stderr, "\nString pool\nentries\t\t%lu\n", 288169695Skan (unsigned long) nelts); 289169695Skan fprintf (stderr, "identifiers\t%lu (%.2f%%)\n", 290169695Skan (unsigned long) nids, nids * 100.0 / nelts); 291169695Skan fprintf (stderr, "slots\t\t%lu\n", 292169695Skan (unsigned long) table->nslots); 293169695Skan fprintf (stderr, "bytes\t\t%lu%c (%lu%c overhead)\n", 294169695Skan SCALE (total_bytes), LABEL (total_bytes), 295169695Skan SCALE (overhead), LABEL (overhead)); 296169695Skan fprintf (stderr, "table size\t%lu%c\n", 297169695Skan SCALE (headers), LABEL (headers)); 298169695Skan 299169695Skan exp_len = (double)total_bytes / (double)nelts; 300169695Skan exp2_len = exp_len * exp_len; 301169695Skan exp_len2 = (double) sum_of_squares / (double) nelts; 302169695Skan 303169695Skan fprintf (stderr, "coll/search\t%.4f\n", 304169695Skan (double) table->collisions / (double) table->searches); 305169695Skan fprintf (stderr, "ins/search\t%.4f\n", 306169695Skan (double) nelts / (double) table->searches); 307169695Skan fprintf (stderr, "avg. entry\t%.2f bytes (+/- %.2f)\n", 308169695Skan exp_len, approx_sqrt (exp_len2 - exp2_len)); 309169695Skan fprintf (stderr, "longest entry\t%lu\n", 310169695Skan (unsigned long) longest); 311169695Skan#undef SCALE 312169695Skan#undef LABEL 313169695Skan} 314169695Skan 315169695Skan/* Return the approximate positive square root of a number N. This is for 316169695Skan statistical reports, not code generation. */ 317169695Skanstatic double 318169695Skanapprox_sqrt (double x) 319169695Skan{ 320169695Skan double s, d; 321169695Skan 322169695Skan if (x < 0) 323169695Skan abort (); 324169695Skan if (x == 0) 325169695Skan return 0; 326169695Skan 327169695Skan s = x; 328169695Skan do 329169695Skan { 330169695Skan d = (s * s - x) / (2 * s); 331169695Skan s -= d; 332169695Skan } 333169695Skan while (d > .0001); 334169695Skan return s; 335169695Skan} 336