119370Spst/* Implement a cached obstack. 298944Sobrien Written by Fred Fish <fnf@cygnus.com> 398944Sobrien Rewritten by Jim Blandy <jimb@cygnus.com> 419370Spst 5130803Smarcel Copyright 1999, 2000, 2002, 2003 Free Software Foundation, Inc. 619370Spst 798944Sobrien This file is part of GDB. 819370Spst 998944Sobrien This program is free software; you can redistribute it and/or modify 1098944Sobrien it under the terms of the GNU General Public License as published by 1198944Sobrien the Free Software Foundation; either version 2 of the License, or 1298944Sobrien (at your option) any later version. 1319370Spst 1498944Sobrien This program is distributed in the hope that it will be useful, 1598944Sobrien but WITHOUT ANY WARRANTY; without even the implied warranty of 1698944Sobrien MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 1798944Sobrien GNU General Public License for more details. 1819370Spst 1998944Sobrien You should have received a copy of the GNU General Public License 2098944Sobrien along with this program; if not, write to the Free Software 2198944Sobrien Foundation, Inc., 59 Temple Place - Suite 330, 2298944Sobrien Boston, MA 02111-1307, USA. */ 2398944Sobrien 2419370Spst#include "defs.h" 25130803Smarcel#include "gdb_obstack.h" 2619370Spst#include "bcache.h" 2719370Spst#include "gdb_string.h" /* For memcpy declaration */ 28130803Smarcel#include "gdb_assert.h" 2919370Spst 3098944Sobrien#include <stddef.h> 3198944Sobrien#include <stdlib.h> 3246283Sdfr 33130803Smarcel/* The type used to hold a single bcache string. The user data is 34130803Smarcel stored in d.data. Since it can be any type, it needs to have the 35130803Smarcel same alignment as the most strict alignment of any type on the host 36130803Smarcel machine. I don't know of any really correct way to do this in 37130803Smarcel stock ANSI C, so just do it the same way obstack.h does. */ 38130803Smarcel 39130803Smarcelstruct bstring 40130803Smarcel{ 41130803Smarcel /* Hash chain. */ 42130803Smarcel struct bstring *next; 43130803Smarcel /* Assume the data length is no more than 64k. */ 44130803Smarcel unsigned short length; 45130803Smarcel /* The half hash hack. This contains the upper 16 bits of the hash 46130803Smarcel value and is used as a pre-check when comparing two strings and 47130803Smarcel avoids the need to do length or memcmp calls. It proves to be 48130803Smarcel roughly 100% effective. */ 49130803Smarcel unsigned short half_hash; 50130803Smarcel 51130803Smarcel union 52130803Smarcel { 53130803Smarcel char data[1]; 54130803Smarcel double dummy; 55130803Smarcel } 56130803Smarcel d; 57130803Smarcel}; 58130803Smarcel 59130803Smarcel 60130803Smarcel/* The structure for a bcache itself. The bcache is initialized, in 61130803Smarcel bcache_xmalloc(), by filling it with zeros and then setting the 62130803Smarcel corresponding obstack's malloc() and free() methods. */ 63130803Smarcel 64130803Smarcelstruct bcache 65130803Smarcel{ 66130803Smarcel /* All the bstrings are allocated here. */ 67130803Smarcel struct obstack cache; 68130803Smarcel 69130803Smarcel /* How many hash buckets we're using. */ 70130803Smarcel unsigned int num_buckets; 71130803Smarcel 72130803Smarcel /* Hash buckets. This table is allocated using malloc, so when we 73130803Smarcel grow the table we can return the old table to the system. */ 74130803Smarcel struct bstring **bucket; 75130803Smarcel 76130803Smarcel /* Statistics. */ 77130803Smarcel unsigned long unique_count; /* number of unique strings */ 78130803Smarcel long total_count; /* total number of strings cached, including dups */ 79130803Smarcel long unique_size; /* size of unique strings, in bytes */ 80130803Smarcel long total_size; /* total number of bytes cached, including dups */ 81130803Smarcel long structure_size; /* total size of bcache, including infrastructure */ 82130803Smarcel /* Number of times that the hash table is expanded and hence 83130803Smarcel re-built, and the corresponding number of times that a string is 84130803Smarcel [re]hashed as part of entering it into the expanded table. The 85130803Smarcel total number of hashes can be computed by adding TOTAL_COUNT to 86130803Smarcel expand_hash_count. */ 87130803Smarcel unsigned long expand_count; 88130803Smarcel unsigned long expand_hash_count; 89130803Smarcel /* Number of times that the half-hash compare hit (compare the upper 90130803Smarcel 16 bits of hash values) hit, but the corresponding combined 91130803Smarcel length/data compare missed. */ 92130803Smarcel unsigned long half_hash_miss_count; 93130803Smarcel}; 94130803Smarcel 9598944Sobrien/* The old hash function was stolen from SDBM. This is what DB 3.0 uses now, 9698944Sobrien * and is better than the old one. 9798944Sobrien */ 9898944Sobrien 9998944Sobrienunsigned long 10098944Sobrienhash(const void *addr, int length) 10198944Sobrien{ 10298944Sobrien const unsigned char *k, *e; 10398944Sobrien unsigned long h; 10498944Sobrien 10598944Sobrien k = (const unsigned char *)addr; 10698944Sobrien e = k+length; 10798944Sobrien for (h=0; k< e;++k) 10898944Sobrien { 10998944Sobrien h *=16777619; 11098944Sobrien h ^= *k; 11198944Sobrien } 11298944Sobrien return (h); 11398944Sobrien} 11498944Sobrien 11598944Sobrien/* Growing the bcache's hash table. */ 11646283Sdfr 11798944Sobrien/* If the average chain length grows beyond this, then we want to 11898944Sobrien resize our hash table. */ 11998944Sobrien#define CHAIN_LENGTH_THRESHOLD (5) 12046283Sdfr 12198944Sobrienstatic void 12298944Sobrienexpand_hash_table (struct bcache *bcache) 12319370Spst{ 12498944Sobrien /* A table of good hash table sizes. Whenever we grow, we pick the 12598944Sobrien next larger size from this table. sizes[i] is close to 1 << (i+10), 12698944Sobrien so we roughly double the table size each time. After we fall off 12798944Sobrien the end of this table, we just double. Don't laugh --- there have 12898944Sobrien been executables sighted with a gigabyte of debug info. */ 12998944Sobrien static unsigned long sizes[] = { 13098944Sobrien 1021, 2053, 4099, 8191, 16381, 32771, 13198944Sobrien 65537, 131071, 262144, 524287, 1048573, 2097143, 13298944Sobrien 4194301, 8388617, 16777213, 33554467, 67108859, 134217757, 13398944Sobrien 268435459, 536870923, 1073741827, 2147483659UL 13498944Sobrien }; 13598944Sobrien unsigned int new_num_buckets; 13698944Sobrien struct bstring **new_buckets; 13798944Sobrien unsigned int i; 13819370Spst 139130803Smarcel /* Count the stats. Every unique item needs to be re-hashed and 140130803Smarcel re-entered. */ 141130803Smarcel bcache->expand_count++; 142130803Smarcel bcache->expand_hash_count += bcache->unique_count; 143130803Smarcel 14498944Sobrien /* Find the next size. */ 14598944Sobrien new_num_buckets = bcache->num_buckets * 2; 14698944Sobrien for (i = 0; i < (sizeof (sizes) / sizeof (sizes[0])); i++) 14798944Sobrien if (sizes[i] > bcache->num_buckets) 14898944Sobrien { 14998944Sobrien new_num_buckets = sizes[i]; 15098944Sobrien break; 15198944Sobrien } 15219370Spst 15398944Sobrien /* Allocate the new table. */ 15498944Sobrien { 15598944Sobrien size_t new_size = new_num_buckets * sizeof (new_buckets[0]); 15698944Sobrien new_buckets = (struct bstring **) xmalloc (new_size); 15798944Sobrien memset (new_buckets, 0, new_size); 15819370Spst 15998944Sobrien bcache->structure_size -= (bcache->num_buckets 16098944Sobrien * sizeof (bcache->bucket[0])); 16198944Sobrien bcache->structure_size += new_size; 16298944Sobrien } 16398944Sobrien 16498944Sobrien /* Rehash all existing strings. */ 16598944Sobrien for (i = 0; i < bcache->num_buckets; i++) 16619370Spst { 16798944Sobrien struct bstring *s, *next; 16898944Sobrien 16998944Sobrien for (s = bcache->bucket[i]; s; s = next) 17019370Spst { 17198944Sobrien struct bstring **new_bucket; 17298944Sobrien next = s->next; 17398944Sobrien 17498944Sobrien new_bucket = &new_buckets[(hash (&s->d.data, s->length) 17598944Sobrien % new_num_buckets)]; 17698944Sobrien s->next = *new_bucket; 17798944Sobrien *new_bucket = s; 17819370Spst } 17919370Spst } 18098944Sobrien 18198944Sobrien /* Plug in the new table. */ 18298944Sobrien if (bcache->bucket) 18398944Sobrien xfree (bcache->bucket); 18498944Sobrien bcache->bucket = new_buckets; 18598944Sobrien bcache->num_buckets = new_num_buckets; 18619370Spst} 18719370Spst 18898944Sobrien 18998944Sobrien/* Looking up things in the bcache. */ 19098944Sobrien 19198944Sobrien/* The number of bytes needed to allocate a struct bstring whose data 19298944Sobrien is N bytes long. */ 19398944Sobrien#define BSTRING_SIZE(n) (offsetof (struct bstring, d.data) + (n)) 19498944Sobrien 19598944Sobrien/* Find a copy of the LENGTH bytes at ADDR in BCACHE. If BCACHE has 19698944Sobrien never seen those bytes before, add a copy of them to BCACHE. In 19798944Sobrien either case, return a pointer to BCACHE's copy of that string. */ 198130803Smarcelstatic void * 199130803Smarcelbcache_data (const void *addr, int length, struct bcache *bcache) 20019370Spst{ 201130803Smarcel unsigned long full_hash; 202130803Smarcel unsigned short half_hash; 20398944Sobrien int hash_index; 20498944Sobrien struct bstring *s; 20519370Spst 20698944Sobrien /* If our average chain length is too high, expand the hash table. */ 20798944Sobrien if (bcache->unique_count >= bcache->num_buckets * CHAIN_LENGTH_THRESHOLD) 20898944Sobrien expand_hash_table (bcache); 20998944Sobrien 21098944Sobrien bcache->total_count++; 21198944Sobrien bcache->total_size += length; 21298944Sobrien 213130803Smarcel full_hash = hash (addr, length); 214130803Smarcel half_hash = (full_hash >> 16); 215130803Smarcel hash_index = full_hash % bcache->num_buckets; 21698944Sobrien 217130803Smarcel /* Search the hash bucket for a string identical to the caller's. 218130803Smarcel As a short-circuit first compare the upper part of each hash 219130803Smarcel values. */ 22098944Sobrien for (s = bcache->bucket[hash_index]; s; s = s->next) 221130803Smarcel { 222130803Smarcel if (s->half_hash == half_hash) 223130803Smarcel { 224130803Smarcel if (s->length == length 225130803Smarcel && ! memcmp (&s->d.data, addr, length)) 226130803Smarcel return &s->d.data; 227130803Smarcel else 228130803Smarcel bcache->half_hash_miss_count++; 229130803Smarcel } 230130803Smarcel } 23198944Sobrien 23298944Sobrien /* The user's string isn't in the list. Insert it after *ps. */ 23398944Sobrien { 23498944Sobrien struct bstring *new 23598944Sobrien = obstack_alloc (&bcache->cache, BSTRING_SIZE (length)); 23698944Sobrien memcpy (&new->d.data, addr, length); 23798944Sobrien new->length = length; 23898944Sobrien new->next = bcache->bucket[hash_index]; 239130803Smarcel new->half_hash = half_hash; 24098944Sobrien bcache->bucket[hash_index] = new; 24198944Sobrien 24298944Sobrien bcache->unique_count++; 24398944Sobrien bcache->unique_size += length; 24498944Sobrien bcache->structure_size += BSTRING_SIZE (length); 24598944Sobrien 24698944Sobrien return &new->d.data; 24798944Sobrien } 24898944Sobrien} 24998944Sobrien 250130803Smarcelvoid * 251130803Smarceldeprecated_bcache (const void *addr, int length, struct bcache *bcache) 252130803Smarcel{ 253130803Smarcel return bcache_data (addr, length, bcache); 254130803Smarcel} 255130803Smarcel 256130803Smarcelconst void * 257130803Smarcelbcache (const void *addr, int length, struct bcache *bcache) 258130803Smarcel{ 259130803Smarcel return bcache_data (addr, length, bcache); 260130803Smarcel} 26198944Sobrien 262130803Smarcel/* Allocating and freeing bcaches. */ 26398944Sobrien 264130803Smarcelstruct bcache * 265130803Smarcelbcache_xmalloc (void) 266130803Smarcel{ 267130803Smarcel /* Allocate the bcache pre-zeroed. */ 268130803Smarcel struct bcache *b = XCALLOC (1, struct bcache); 269130803Smarcel /* We could use obstack_specify_allocation here instead, but 270130803Smarcel gdb_obstack.h specifies the allocation/deallocation 271130803Smarcel functions. */ 272130803Smarcel obstack_init (&b->cache); 273130803Smarcel return b; 274130803Smarcel} 275130803Smarcel 27698944Sobrien/* Free all the storage associated with BCACHE. */ 27798944Sobrienvoid 278130803Smarcelbcache_xfree (struct bcache *bcache) 27998944Sobrien{ 280130803Smarcel if (bcache == NULL) 281130803Smarcel return; 28298944Sobrien obstack_free (&bcache->cache, 0); 283130803Smarcel xfree (bcache->bucket); 284130803Smarcel xfree (bcache); 28598944Sobrien} 28698944Sobrien 28798944Sobrien 28898944Sobrien 28998944Sobrien/* Printing statistics. */ 29098944Sobrien 29198944Sobrienstatic int 29298944Sobriencompare_ints (const void *ap, const void *bp) 29398944Sobrien{ 29498944Sobrien /* Because we know we're comparing two ints which are positive, 29598944Sobrien there's no danger of overflow here. */ 29698944Sobrien return * (int *) ap - * (int *) bp; 29798944Sobrien} 29898944Sobrien 29998944Sobrien 30098944Sobrienstatic void 30198944Sobrienprint_percentage (int portion, int total) 30298944Sobrien{ 30398944Sobrien if (total == 0) 30498944Sobrien printf_filtered ("(not applicable)\n"); 30519370Spst else 30698944Sobrien printf_filtered ("%3d%%\n", portion * 100 / total); 30719370Spst} 30819370Spst 30919370Spst 31098944Sobrien/* Print statistics on BCACHE's memory usage and efficacity at 31198944Sobrien eliminating duplication. NAME should describe the kind of data 31298944Sobrien BCACHE holds. Statistics are printed using `printf_filtered' and 31398944Sobrien its ilk. */ 31419370Spstvoid 31598944Sobrienprint_bcache_statistics (struct bcache *c, char *type) 31619370Spst{ 31798944Sobrien int occupied_buckets; 31898944Sobrien int max_chain_length; 31998944Sobrien int median_chain_length; 320130803Smarcel int max_entry_size; 321130803Smarcel int median_entry_size; 32219370Spst 323130803Smarcel /* Count the number of occupied buckets, tally the various string 324130803Smarcel lengths, and measure chain lengths. */ 32598944Sobrien { 32698944Sobrien unsigned int b; 327130803Smarcel int *chain_length = XCALLOC (c->num_buckets + 1, int); 328130803Smarcel int *entry_size = XCALLOC (c->unique_count + 1, int); 329130803Smarcel int stringi = 0; 33098944Sobrien 33198944Sobrien occupied_buckets = 0; 33298944Sobrien 33398944Sobrien for (b = 0; b < c->num_buckets; b++) 33498944Sobrien { 33598944Sobrien struct bstring *s = c->bucket[b]; 33698944Sobrien 33798944Sobrien chain_length[b] = 0; 33898944Sobrien 33998944Sobrien if (s) 34098944Sobrien { 34198944Sobrien occupied_buckets++; 34298944Sobrien 34398944Sobrien while (s) 34498944Sobrien { 345130803Smarcel gdb_assert (b < c->num_buckets); 34698944Sobrien chain_length[b]++; 347130803Smarcel gdb_assert (stringi < c->unique_count); 348130803Smarcel entry_size[stringi++] = s->length; 34998944Sobrien s = s->next; 35098944Sobrien } 35198944Sobrien } 35298944Sobrien } 35398944Sobrien 35498944Sobrien /* To compute the median, we need the set of chain lengths sorted. */ 35598944Sobrien qsort (chain_length, c->num_buckets, sizeof (chain_length[0]), 35698944Sobrien compare_ints); 357130803Smarcel qsort (entry_size, c->unique_count, sizeof (entry_size[0]), 358130803Smarcel compare_ints); 35998944Sobrien 36098944Sobrien if (c->num_buckets > 0) 36198944Sobrien { 36298944Sobrien max_chain_length = chain_length[c->num_buckets - 1]; 36398944Sobrien median_chain_length = chain_length[c->num_buckets / 2]; 36498944Sobrien } 36598944Sobrien else 36698944Sobrien { 36798944Sobrien max_chain_length = 0; 36898944Sobrien median_chain_length = 0; 36998944Sobrien } 370130803Smarcel if (c->unique_count > 0) 371130803Smarcel { 372130803Smarcel max_entry_size = entry_size[c->unique_count - 1]; 373130803Smarcel median_entry_size = entry_size[c->unique_count / 2]; 374130803Smarcel } 375130803Smarcel else 376130803Smarcel { 377130803Smarcel max_entry_size = 0; 378130803Smarcel median_entry_size = 0; 379130803Smarcel } 380130803Smarcel 381130803Smarcel xfree (chain_length); 382130803Smarcel xfree (entry_size); 38398944Sobrien } 38498944Sobrien 38598944Sobrien printf_filtered (" Cached '%s' statistics:\n", type); 38698944Sobrien printf_filtered (" Total object count: %ld\n", c->total_count); 38798944Sobrien printf_filtered (" Unique object count: %lu\n", c->unique_count); 38898944Sobrien printf_filtered (" Percentage of duplicates, by count: "); 38998944Sobrien print_percentage (c->total_count - c->unique_count, c->total_count); 39098944Sobrien printf_filtered ("\n"); 39198944Sobrien 39298944Sobrien printf_filtered (" Total object size: %ld\n", c->total_size); 39398944Sobrien printf_filtered (" Unique object size: %ld\n", c->unique_size); 39498944Sobrien printf_filtered (" Percentage of duplicates, by size: "); 39598944Sobrien print_percentage (c->total_size - c->unique_size, c->total_size); 39698944Sobrien printf_filtered ("\n"); 39798944Sobrien 398130803Smarcel printf_filtered (" Max entry size: %d\n", max_entry_size); 399130803Smarcel printf_filtered (" Average entry size: "); 400130803Smarcel if (c->unique_count > 0) 401130803Smarcel printf_filtered ("%ld\n", c->unique_size / c->unique_count); 402130803Smarcel else 403130803Smarcel printf_filtered ("(not applicable)\n"); 404130803Smarcel printf_filtered (" Median entry size: %d\n", median_entry_size); 405130803Smarcel printf_filtered ("\n"); 406130803Smarcel 40798944Sobrien printf_filtered (" Total memory used by bcache, including overhead: %ld\n", 40898944Sobrien c->structure_size); 40998944Sobrien printf_filtered (" Percentage memory overhead: "); 41098944Sobrien print_percentage (c->structure_size - c->unique_size, c->unique_size); 41198944Sobrien printf_filtered (" Net memory savings: "); 41298944Sobrien print_percentage (c->total_size - c->structure_size, c->total_size); 41398944Sobrien printf_filtered ("\n"); 41498944Sobrien 41598944Sobrien printf_filtered (" Hash table size: %3d\n", c->num_buckets); 416130803Smarcel printf_filtered (" Hash table expands: %lu\n", 417130803Smarcel c->expand_count); 418130803Smarcel printf_filtered (" Hash table hashes: %lu\n", 419130803Smarcel c->total_count + c->expand_hash_count); 420130803Smarcel printf_filtered (" Half hash misses: %lu\n", 421130803Smarcel c->half_hash_miss_count); 42298944Sobrien printf_filtered (" Hash table population: "); 42398944Sobrien print_percentage (occupied_buckets, c->num_buckets); 42498944Sobrien printf_filtered (" Median hash chain length: %3d\n", 42598944Sobrien median_chain_length); 42698944Sobrien printf_filtered (" Average hash chain length: "); 42798944Sobrien if (c->num_buckets > 0) 42898944Sobrien printf_filtered ("%3lu\n", c->unique_count / c->num_buckets); 42946283Sdfr else 43098944Sobrien printf_filtered ("(not applicable)\n"); 43198944Sobrien printf_filtered (" Maximum hash chain length: %3d\n", max_chain_length); 43298944Sobrien printf_filtered ("\n"); 43319370Spst} 434130803Smarcel 435130803Smarcelint 436130803Smarcelbcache_memory_used (struct bcache *bcache) 437130803Smarcel{ 438130803Smarcel return obstack_memory_used (&bcache->cache); 439130803Smarcel} 440