11590Srgrimes/* Implement a cached obstack. 21590Srgrimes Written by Fred Fish <fnf@cygnus.com> 31590Srgrimes Rewritten by Jim Blandy <jimb@cygnus.com> 41590Srgrimes 51590Srgrimes Copyright 1999, 2000, 2002, 2003 Free Software Foundation, Inc. 61590Srgrimes 71590Srgrimes This file is part of GDB. 81590Srgrimes 91590Srgrimes This program is free software; you can redistribute it and/or modify 101590Srgrimes it under the terms of the GNU General Public License as published by 111590Srgrimes the Free Software Foundation; either version 2 of the License, or 121590Srgrimes (at your option) any later version. 131590Srgrimes 141590Srgrimes This program is distributed in the hope that it will be useful, 151590Srgrimes but WITHOUT ANY WARRANTY; without even the implied warranty of 161590Srgrimes MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 171590Srgrimes GNU General Public License for more details. 181590Srgrimes 191590Srgrimes You should have received a copy of the GNU General Public License 201590Srgrimes along with this program; if not, write to the Free Software 211590Srgrimes Foundation, Inc., 59 Temple Place - Suite 330, 221590Srgrimes Boston, MA 02111-1307, USA. */ 231590Srgrimes 241590Srgrimes#include "defs.h" 251590Srgrimes#include "gdb_obstack.h" 261590Srgrimes#include "bcache.h" 271590Srgrimes#include "gdb_string.h" /* For memcpy declaration */ 281590Srgrimes#include "gdb_assert.h" 291590Srgrimes 301590Srgrimes#include <stddef.h> 311590Srgrimes#include <stdlib.h> 321590Srgrimes 331590Srgrimes/* The type used to hold a single bcache string. The user data is 341590Srgrimes stored in d.data. Since it can be any type, it needs to have the 351590Srgrimes same alignment as the most strict alignment of any type on the host 361590Srgrimes machine. I don't know of any really correct way to do this in 371590Srgrimes stock ANSI C, so just do it the same way obstack.h does. */ 381590Srgrimes 391590Srgrimesstruct bstring 401590Srgrimes{ 411590Srgrimes /* Hash chain. */ 421590Srgrimes struct bstring *next; 431590Srgrimes /* Assume the data length is no more than 64k. */ 441590Srgrimes unsigned short length; 451590Srgrimes /* The half hash hack. This contains the upper 16 bits of the hash 461590Srgrimes value and is used as a pre-check when comparing two strings and 471590Srgrimes avoids the need to do length or memcmp calls. It proves to be 481590Srgrimes roughly 100% effective. */ 491590Srgrimes unsigned short half_hash; 501590Srgrimes 511590Srgrimes union 521590Srgrimes { 531590Srgrimes char data[1]; 541590Srgrimes double dummy; 551590Srgrimes } 561590Srgrimes d; 571590Srgrimes}; 581590Srgrimes 591590Srgrimes 601590Srgrimes/* The structure for a bcache itself. The bcache is initialized, in 611590Srgrimes bcache_xmalloc(), by filling it with zeros and then setting the 621590Srgrimes corresponding obstack's malloc() and free() methods. */ 631590Srgrimes 641590Srgrimesstruct bcache 651590Srgrimes{ 661590Srgrimes /* All the bstrings are allocated here. */ 671590Srgrimes struct obstack cache; 681590Srgrimes 691590Srgrimes /* How many hash buckets we're using. */ 701590Srgrimes unsigned int num_buckets; 711590Srgrimes 721590Srgrimes /* Hash buckets. This table is allocated using malloc, so when we 731590Srgrimes grow the table we can return the old table to the system. */ 741590Srgrimes struct bstring **bucket; 751590Srgrimes 761590Srgrimes /* Statistics. */ 771590Srgrimes unsigned long unique_count; /* number of unique strings */ 781590Srgrimes long total_count; /* total number of strings cached, including dups */ 791590Srgrimes long unique_size; /* size of unique strings, in bytes */ 801590Srgrimes long total_size; /* total number of bytes cached, including dups */ 811590Srgrimes long structure_size; /* total size of bcache, including infrastructure */ 821590Srgrimes /* Number of times that the hash table is expanded and hence 831590Srgrimes re-built, and the corresponding number of times that a string is 841590Srgrimes [re]hashed as part of entering it into the expanded table. The 851590Srgrimes total number of hashes can be computed by adding TOTAL_COUNT to 861590Srgrimes expand_hash_count. */ 871590Srgrimes unsigned long expand_count; 881590Srgrimes unsigned long expand_hash_count; 891590Srgrimes /* Number of times that the half-hash compare hit (compare the upper 901590Srgrimes 16 bits of hash values) hit, but the corresponding combined 911590Srgrimes length/data compare missed. */ 921590Srgrimes unsigned long half_hash_miss_count; 931590Srgrimes}; 941590Srgrimes 951590Srgrimes/* The old hash function was stolen from SDBM. This is what DB 3.0 uses now, 961590Srgrimes * and is better than the old one. 971590Srgrimes */ 981590Srgrimes 991590Srgrimesunsigned long 1001590Srgrimeshash(const void *addr, int length) 1011590Srgrimes{ 1021590Srgrimes const unsigned char *k, *e; 1031590Srgrimes unsigned long h; 1041590Srgrimes 1051590Srgrimes k = (const unsigned char *)addr; 1061590Srgrimes e = k+length; 1071590Srgrimes for (h=0; k< e;++k) 1081590Srgrimes { 1091590Srgrimes h *=16777619; 1101590Srgrimes h ^= *k; 1111590Srgrimes } 1121590Srgrimes return (h); 1131590Srgrimes} 1141590Srgrimes 1151590Srgrimes/* Growing the bcache's hash table. */ 1161590Srgrimes 1171590Srgrimes/* If the average chain length grows beyond this, then we want to 1181590Srgrimes resize our hash table. */ 1191590Srgrimes#define CHAIN_LENGTH_THRESHOLD (5) 1201590Srgrimes 1211590Srgrimesstatic void 1221590Srgrimesexpand_hash_table (struct bcache *bcache) 1231590Srgrimes{ 1241590Srgrimes /* A table of good hash table sizes. Whenever we grow, we pick the 1251590Srgrimes next larger size from this table. sizes[i] is close to 1 << (i+10), 1261590Srgrimes so we roughly double the table size each time. After we fall off 1271590Srgrimes the end of this table, we just double. Don't laugh --- there have 1281590Srgrimes been executables sighted with a gigabyte of debug info. */ 1291590Srgrimes static unsigned long sizes[] = { 1301590Srgrimes 1021, 2053, 4099, 8191, 16381, 32771, 1311590Srgrimes 65537, 131071, 262144, 524287, 1048573, 2097143, 1321590Srgrimes 4194301, 8388617, 16777213, 33554467, 67108859, 134217757, 1331590Srgrimes 268435459, 536870923, 1073741827, 2147483659UL 1341590Srgrimes }; 1351590Srgrimes unsigned int new_num_buckets; 1361590Srgrimes struct bstring **new_buckets; 1371590Srgrimes unsigned int i; 1381590Srgrimes 1391590Srgrimes /* Count the stats. Every unique item needs to be re-hashed and 1401590Srgrimes re-entered. */ 1411590Srgrimes bcache->expand_count++; 1421590Srgrimes bcache->expand_hash_count += bcache->unique_count; 1431590Srgrimes 1441590Srgrimes /* Find the next size. */ 1451590Srgrimes new_num_buckets = bcache->num_buckets * 2; 1461590Srgrimes for (i = 0; i < (sizeof (sizes) / sizeof (sizes[0])); i++) 1471590Srgrimes if (sizes[i] > bcache->num_buckets) 1481590Srgrimes { 1491590Srgrimes new_num_buckets = sizes[i]; 1501590Srgrimes break; 1511590Srgrimes } 1521590Srgrimes 1531590Srgrimes /* Allocate the new table. */ 1541590Srgrimes { 1551590Srgrimes size_t new_size = new_num_buckets * sizeof (new_buckets[0]); 1561590Srgrimes new_buckets = (struct bstring **) xmalloc (new_size); 1571590Srgrimes memset (new_buckets, 0, new_size); 1581590Srgrimes 1591590Srgrimes bcache->structure_size -= (bcache->num_buckets 1601590Srgrimes * sizeof (bcache->bucket[0])); 1611590Srgrimes bcache->structure_size += new_size; 1621590Srgrimes } 1631590Srgrimes 1641590Srgrimes /* Rehash all existing strings. */ 1651590Srgrimes for (i = 0; i < bcache->num_buckets; i++) 1661590Srgrimes { 1671590Srgrimes struct bstring *s, *next; 1681590Srgrimes 1691590Srgrimes for (s = bcache->bucket[i]; s; s = next) 1701590Srgrimes { 1711590Srgrimes struct bstring **new_bucket; 1721590Srgrimes next = s->next; 1731590Srgrimes 1741590Srgrimes new_bucket = &new_buckets[(hash (&s->d.data, s->length) 1751590Srgrimes % new_num_buckets)]; 1761590Srgrimes s->next = *new_bucket; 1771590Srgrimes *new_bucket = s; 1781590Srgrimes } 1791590Srgrimes } 1801590Srgrimes 1811590Srgrimes /* Plug in the new table. */ 1821590Srgrimes if (bcache->bucket) 1831590Srgrimes xfree (bcache->bucket); 1841590Srgrimes bcache->bucket = new_buckets; 1851590Srgrimes bcache->num_buckets = new_num_buckets; 1861590Srgrimes} 1871590Srgrimes 1881590Srgrimes 1891590Srgrimes/* Looking up things in the bcache. */ 1901590Srgrimes 1911590Srgrimes/* The number of bytes needed to allocate a struct bstring whose data 1921590Srgrimes is N bytes long. */ 1931590Srgrimes#define BSTRING_SIZE(n) (offsetof (struct bstring, d.data) + (n)) 1941590Srgrimes 1951590Srgrimes/* Find a copy of the LENGTH bytes at ADDR in BCACHE. If BCACHE has 1961590Srgrimes never seen those bytes before, add a copy of them to BCACHE. In 1971590Srgrimes either case, return a pointer to BCACHE's copy of that string. */ 1981590Srgrimesstatic void * 1991590Srgrimesbcache_data (const void *addr, int length, struct bcache *bcache) 2001590Srgrimes{ 2011590Srgrimes unsigned long full_hash; 2021590Srgrimes unsigned short half_hash; 2031590Srgrimes int hash_index; 2041590Srgrimes struct bstring *s; 2051590Srgrimes 2061590Srgrimes /* If our average chain length is too high, expand the hash table. */ 2071590Srgrimes if (bcache->unique_count >= bcache->num_buckets * CHAIN_LENGTH_THRESHOLD) 2081590Srgrimes expand_hash_table (bcache); 2091590Srgrimes 2101590Srgrimes bcache->total_count++; 2111590Srgrimes bcache->total_size += length; 2121590Srgrimes 2131590Srgrimes full_hash = hash (addr, length); 2141590Srgrimes half_hash = (full_hash >> 16); 2151590Srgrimes hash_index = full_hash % bcache->num_buckets; 2161590Srgrimes 2171590Srgrimes /* Search the hash bucket for a string identical to the caller's. 2181590Srgrimes As a short-circuit first compare the upper part of each hash 2191590Srgrimes values. */ 2201590Srgrimes for (s = bcache->bucket[hash_index]; s; s = s->next) 2211590Srgrimes { 2221590Srgrimes if (s->half_hash == half_hash) 2231590Srgrimes { 2241590Srgrimes if (s->length == length 2251590Srgrimes && ! memcmp (&s->d.data, addr, length)) 2261590Srgrimes return &s->d.data; 2271590Srgrimes else 2281590Srgrimes bcache->half_hash_miss_count++; 2291590Srgrimes } 2301590Srgrimes } 2311590Srgrimes 2321590Srgrimes /* The user's string isn't in the list. Insert it after *ps. */ 2331590Srgrimes { 2341590Srgrimes struct bstring *new 2351590Srgrimes = obstack_alloc (&bcache->cache, BSTRING_SIZE (length)); 2361590Srgrimes memcpy (&new->d.data, addr, length); 2371590Srgrimes new->length = length; 2381590Srgrimes new->next = bcache->bucket[hash_index]; 2391590Srgrimes new->half_hash = half_hash; 2401590Srgrimes bcache->bucket[hash_index] = new; 2411590Srgrimes 2421590Srgrimes bcache->unique_count++; 2431590Srgrimes bcache->unique_size += length; 2441590Srgrimes bcache->structure_size += BSTRING_SIZE (length); 2451590Srgrimes 2461590Srgrimes return &new->d.data; 2471590Srgrimes } 2481590Srgrimes} 2491590Srgrimes 2501590Srgrimesvoid * 2511590Srgrimesdeprecated_bcache (const void *addr, int length, struct bcache *bcache) 2521590Srgrimes{ 2531590Srgrimes return bcache_data (addr, length, bcache); 2541590Srgrimes} 2551590Srgrimes 2561590Srgrimesconst void * 2571590Srgrimesbcache (const void *addr, int length, struct bcache *bcache) 2581590Srgrimes{ 2591590Srgrimes return bcache_data (addr, length, bcache); 2601590Srgrimes} 2611590Srgrimes 2621590Srgrimes/* Allocating and freeing bcaches. */ 2631590Srgrimes 2641590Srgrimesstruct bcache * 2651590Srgrimesbcache_xmalloc (void) 2661590Srgrimes{ 2671590Srgrimes /* Allocate the bcache pre-zeroed. */ 2681590Srgrimes struct bcache *b = XCALLOC (1, struct bcache); 2691590Srgrimes /* We could use obstack_specify_allocation here instead, but 2701590Srgrimes gdb_obstack.h specifies the allocation/deallocation 2711590Srgrimes functions. */ 2721590Srgrimes obstack_init (&b->cache); 2731590Srgrimes return b; 2741590Srgrimes} 2751590Srgrimes 2761590Srgrimes/* Free all the storage associated with BCACHE. */ 2771590Srgrimesvoid 2781590Srgrimesbcache_xfree (struct bcache *bcache) 2791590Srgrimes{ 2801590Srgrimes if (bcache == NULL) 2811590Srgrimes return; 2821590Srgrimes obstack_free (&bcache->cache, 0); 2831590Srgrimes xfree (bcache->bucket); 2841590Srgrimes xfree (bcache); 2851590Srgrimes} 2861590Srgrimes 2871590Srgrimes 2881590Srgrimes 2891590Srgrimes/* Printing statistics. */ 2901590Srgrimes 2911590Srgrimesstatic int 2921590Srgrimescompare_ints (const void *ap, const void *bp) 2931590Srgrimes{ 2941590Srgrimes /* Because we know we're comparing two ints which are positive, 2951590Srgrimes there's no danger of overflow here. */ 2961590Srgrimes return * (int *) ap - * (int *) bp; 2971590Srgrimes} 2981590Srgrimes 2991590Srgrimes 3001590Srgrimesstatic void 3011590Srgrimesprint_percentage (int portion, int total) 3021590Srgrimes{ 3031590Srgrimes if (total == 0) 3041590Srgrimes printf_filtered ("(not applicable)\n"); 3051590Srgrimes else 3061590Srgrimes printf_filtered ("%3d%%\n", portion * 100 / total); 3071590Srgrimes} 3081590Srgrimes 3091590Srgrimes 3101590Srgrimes/* Print statistics on BCACHE's memory usage and efficacity at 3111590Srgrimes eliminating duplication. NAME should describe the kind of data 3121590Srgrimes BCACHE holds. Statistics are printed using `printf_filtered' and 3131590Srgrimes its ilk. */ 3141590Srgrimesvoid 3151590Srgrimesprint_bcache_statistics (struct bcache *c, char *type) 3161590Srgrimes{ 3171590Srgrimes int occupied_buckets; 3181590Srgrimes int max_chain_length; 3191590Srgrimes int median_chain_length; 3201590Srgrimes int max_entry_size; 3211590Srgrimes int median_entry_size; 3221590Srgrimes 3231590Srgrimes /* Count the number of occupied buckets, tally the various string 3241590Srgrimes lengths, and measure chain lengths. */ 3251590Srgrimes { 3261590Srgrimes unsigned int b; 3271590Srgrimes int *chain_length = XCALLOC (c->num_buckets + 1, int); 3281590Srgrimes int *entry_size = XCALLOC (c->unique_count + 1, int); 3291590Srgrimes int stringi = 0; 3301590Srgrimes 3311590Srgrimes occupied_buckets = 0; 3321590Srgrimes 3331590Srgrimes for (b = 0; b < c->num_buckets; b++) 3341590Srgrimes { 3351590Srgrimes struct bstring *s = c->bucket[b]; 3361590Srgrimes 3371590Srgrimes chain_length[b] = 0; 3381590Srgrimes 3391590Srgrimes if (s) 3401590Srgrimes { 3411590Srgrimes occupied_buckets++; 3421590Srgrimes 3431590Srgrimes while (s) 3441590Srgrimes { 3451590Srgrimes gdb_assert (b < c->num_buckets); 3461590Srgrimes chain_length[b]++; 3471590Srgrimes gdb_assert (stringi < c->unique_count); 3481590Srgrimes entry_size[stringi++] = s->length; 3491590Srgrimes s = s->next; 3501590Srgrimes } 3511590Srgrimes } 3521590Srgrimes } 3531590Srgrimes 3541590Srgrimes /* To compute the median, we need the set of chain lengths sorted. */ 3551590Srgrimes qsort (chain_length, c->num_buckets, sizeof (chain_length[0]), 3561590Srgrimes compare_ints); 3571590Srgrimes qsort (entry_size, c->unique_count, sizeof (entry_size[0]), 3581590Srgrimes compare_ints); 3591590Srgrimes 3601590Srgrimes if (c->num_buckets > 0) 3611590Srgrimes { 3621590Srgrimes max_chain_length = chain_length[c->num_buckets - 1]; 3631590Srgrimes median_chain_length = chain_length[c->num_buckets / 2]; 3641590Srgrimes } 3651590Srgrimes else 3661590Srgrimes { 3671590Srgrimes max_chain_length = 0; 3681590Srgrimes median_chain_length = 0; 3691590Srgrimes } 3701590Srgrimes if (c->unique_count > 0) 3711590Srgrimes { 3721590Srgrimes max_entry_size = entry_size[c->unique_count - 1]; 3731590Srgrimes median_entry_size = entry_size[c->unique_count / 2]; 3741590Srgrimes } 3751590Srgrimes else 3761590Srgrimes { 3771590Srgrimes max_entry_size = 0; 3781590Srgrimes median_entry_size = 0; 3791590Srgrimes } 3801590Srgrimes 3811590Srgrimes xfree (chain_length); 3821590Srgrimes xfree (entry_size); 3831590Srgrimes } 3841590Srgrimes 3851590Srgrimes printf_filtered (" Cached '%s' statistics:\n", type); 3861590Srgrimes printf_filtered (" Total object count: %ld\n", c->total_count); 3871590Srgrimes printf_filtered (" Unique object count: %lu\n", c->unique_count); 3881590Srgrimes printf_filtered (" Percentage of duplicates, by count: "); 3891590Srgrimes print_percentage (c->total_count - c->unique_count, c->total_count); 3901590Srgrimes printf_filtered ("\n"); 3911590Srgrimes 3921590Srgrimes printf_filtered (" Total object size: %ld\n", c->total_size); 3931590Srgrimes printf_filtered (" Unique object size: %ld\n", c->unique_size); 3941590Srgrimes printf_filtered (" Percentage of duplicates, by size: "); 3951590Srgrimes print_percentage (c->total_size - c->unique_size, c->total_size); 3961590Srgrimes printf_filtered ("\n"); 3971590Srgrimes 3981590Srgrimes printf_filtered (" Max entry size: %d\n", max_entry_size); 3991590Srgrimes printf_filtered (" Average entry size: "); 4001590Srgrimes if (c->unique_count > 0) 4011590Srgrimes printf_filtered ("%ld\n", c->unique_size / c->unique_count); 4021590Srgrimes else 4031590Srgrimes printf_filtered ("(not applicable)\n"); 4041590Srgrimes printf_filtered (" Median entry size: %d\n", median_entry_size); 4051590Srgrimes printf_filtered ("\n"); 4061590Srgrimes 4071590Srgrimes printf_filtered (" Total memory used by bcache, including overhead: %ld\n", 4081590Srgrimes c->structure_size); 4091590Srgrimes printf_filtered (" Percentage memory overhead: "); 4101590Srgrimes print_percentage (c->structure_size - c->unique_size, c->unique_size); 4111590Srgrimes printf_filtered (" Net memory savings: "); 4121590Srgrimes print_percentage (c->total_size - c->structure_size, c->total_size); 4131590Srgrimes printf_filtered ("\n"); 4141590Srgrimes 4151590Srgrimes printf_filtered (" Hash table size: %3d\n", c->num_buckets); 4161590Srgrimes printf_filtered (" Hash table expands: %lu\n", 4171590Srgrimes c->expand_count); 4181590Srgrimes printf_filtered (" Hash table hashes: %lu\n", 4191590Srgrimes c->total_count + c->expand_hash_count); 4201590Srgrimes printf_filtered (" Half hash misses: %lu\n", 4211590Srgrimes c->half_hash_miss_count); 4221590Srgrimes printf_filtered (" Hash table population: "); 4231590Srgrimes print_percentage (occupied_buckets, c->num_buckets); 4241590Srgrimes printf_filtered (" Median hash chain length: %3d\n", 4251590Srgrimes median_chain_length); 4261590Srgrimes printf_filtered (" Average hash chain length: "); 4271590Srgrimes if (c->num_buckets > 0) 4281590Srgrimes printf_filtered ("%3lu\n", c->unique_count / c->num_buckets); 4291590Srgrimes else 4301590Srgrimes printf_filtered ("(not applicable)\n"); 4311590Srgrimes printf_filtered (" Maximum hash chain length: %3d\n", max_chain_length); 4321590Srgrimes printf_filtered ("\n"); 4331590Srgrimes} 4341590Srgrimes 4351590Srgrimesint 4361590Srgrimesbcache_memory_used (struct bcache *bcache) 4371590Srgrimes{ 4381590Srgrimes return obstack_memory_used (&bcache->cache); 4391590Srgrimes} 4401590Srgrimes