11590Srgrimes/* Implement a cached obstack.
21590Srgrimes   Written by Fred Fish <fnf@cygnus.com>
31590Srgrimes   Rewritten by Jim Blandy <jimb@cygnus.com>
41590Srgrimes
51590Srgrimes   Copyright 1999, 2000, 2002, 2003 Free Software Foundation, Inc.
61590Srgrimes
71590Srgrimes   This file is part of GDB.
81590Srgrimes
91590Srgrimes   This program is free software; you can redistribute it and/or modify
101590Srgrimes   it under the terms of the GNU General Public License as published by
111590Srgrimes   the Free Software Foundation; either version 2 of the License, or
121590Srgrimes   (at your option) any later version.
131590Srgrimes
141590Srgrimes   This program is distributed in the hope that it will be useful,
151590Srgrimes   but WITHOUT ANY WARRANTY; without even the implied warranty of
161590Srgrimes   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
171590Srgrimes   GNU General Public License for more details.
181590Srgrimes
191590Srgrimes   You should have received a copy of the GNU General Public License
201590Srgrimes   along with this program; if not, write to the Free Software
211590Srgrimes   Foundation, Inc., 59 Temple Place - Suite 330,
221590Srgrimes   Boston, MA 02111-1307, USA.  */
231590Srgrimes
241590Srgrimes#include "defs.h"
251590Srgrimes#include "gdb_obstack.h"
261590Srgrimes#include "bcache.h"
271590Srgrimes#include "gdb_string.h"		/* For memcpy declaration */
281590Srgrimes#include "gdb_assert.h"
291590Srgrimes
301590Srgrimes#include <stddef.h>
311590Srgrimes#include <stdlib.h>
321590Srgrimes
331590Srgrimes/* The type used to hold a single bcache string.  The user data is
341590Srgrimes   stored in d.data.  Since it can be any type, it needs to have the
351590Srgrimes   same alignment as the most strict alignment of any type on the host
361590Srgrimes   machine.  I don't know of any really correct way to do this in
371590Srgrimes   stock ANSI C, so just do it the same way obstack.h does.  */
381590Srgrimes
391590Srgrimesstruct bstring
401590Srgrimes{
411590Srgrimes  /* Hash chain.  */
421590Srgrimes  struct bstring *next;
431590Srgrimes  /* Assume the data length is no more than 64k.  */
441590Srgrimes  unsigned short length;
451590Srgrimes  /* The half hash hack.  This contains the upper 16 bits of the hash
461590Srgrimes     value and is used as a pre-check when comparing two strings and
471590Srgrimes     avoids the need to do length or memcmp calls.  It proves to be
481590Srgrimes     roughly 100% effective.  */
491590Srgrimes  unsigned short half_hash;
501590Srgrimes
511590Srgrimes  union
521590Srgrimes  {
531590Srgrimes    char data[1];
541590Srgrimes    double dummy;
551590Srgrimes  }
561590Srgrimes  d;
571590Srgrimes};
581590Srgrimes
591590Srgrimes
601590Srgrimes/* The structure for a bcache itself.  The bcache is initialized, in
611590Srgrimes   bcache_xmalloc(), by filling it with zeros and then setting the
621590Srgrimes   corresponding obstack's malloc() and free() methods.  */
631590Srgrimes
641590Srgrimesstruct bcache
651590Srgrimes{
661590Srgrimes  /* All the bstrings are allocated here.  */
671590Srgrimes  struct obstack cache;
681590Srgrimes
691590Srgrimes  /* How many hash buckets we're using.  */
701590Srgrimes  unsigned int num_buckets;
711590Srgrimes
721590Srgrimes  /* Hash buckets.  This table is allocated using malloc, so when we
731590Srgrimes     grow the table we can return the old table to the system.  */
741590Srgrimes  struct bstring **bucket;
751590Srgrimes
761590Srgrimes  /* Statistics.  */
771590Srgrimes  unsigned long unique_count;	/* number of unique strings */
781590Srgrimes  long total_count;	/* total number of strings cached, including dups */
791590Srgrimes  long unique_size;	/* size of unique strings, in bytes */
801590Srgrimes  long total_size;      /* total number of bytes cached, including dups */
811590Srgrimes  long structure_size;	/* total size of bcache, including infrastructure */
821590Srgrimes  /* Number of times that the hash table is expanded and hence
831590Srgrimes     re-built, and the corresponding number of times that a string is
841590Srgrimes     [re]hashed as part of entering it into the expanded table.  The
851590Srgrimes     total number of hashes can be computed by adding TOTAL_COUNT to
861590Srgrimes     expand_hash_count.  */
871590Srgrimes  unsigned long expand_count;
881590Srgrimes  unsigned long expand_hash_count;
891590Srgrimes  /* Number of times that the half-hash compare hit (compare the upper
901590Srgrimes     16 bits of hash values) hit, but the corresponding combined
911590Srgrimes     length/data compare missed.  */
921590Srgrimes  unsigned long half_hash_miss_count;
931590Srgrimes};
941590Srgrimes
951590Srgrimes/* The old hash function was stolen from SDBM. This is what DB 3.0 uses now,
961590Srgrimes * and is better than the old one.
971590Srgrimes */
981590Srgrimes
991590Srgrimesunsigned long
1001590Srgrimeshash(const void *addr, int length)
1011590Srgrimes{
1021590Srgrimes		const unsigned char *k, *e;
1031590Srgrimes		unsigned long h;
1041590Srgrimes
1051590Srgrimes		k = (const unsigned char *)addr;
1061590Srgrimes		e = k+length;
1071590Srgrimes		for (h=0; k< e;++k)
1081590Srgrimes		{
1091590Srgrimes				h *=16777619;
1101590Srgrimes				h ^= *k;
1111590Srgrimes		}
1121590Srgrimes		return (h);
1131590Srgrimes}
1141590Srgrimes
1151590Srgrimes/* Growing the bcache's hash table.  */
1161590Srgrimes
1171590Srgrimes/* If the average chain length grows beyond this, then we want to
1181590Srgrimes   resize our hash table.  */
1191590Srgrimes#define CHAIN_LENGTH_THRESHOLD (5)
1201590Srgrimes
1211590Srgrimesstatic void
1221590Srgrimesexpand_hash_table (struct bcache *bcache)
1231590Srgrimes{
1241590Srgrimes  /* A table of good hash table sizes.  Whenever we grow, we pick the
1251590Srgrimes     next larger size from this table.  sizes[i] is close to 1 << (i+10),
1261590Srgrimes     so we roughly double the table size each time.  After we fall off
1271590Srgrimes     the end of this table, we just double.  Don't laugh --- there have
1281590Srgrimes     been executables sighted with a gigabyte of debug info.  */
1291590Srgrimes  static unsigned long sizes[] = {
1301590Srgrimes    1021, 2053, 4099, 8191, 16381, 32771,
1311590Srgrimes    65537, 131071, 262144, 524287, 1048573, 2097143,
1321590Srgrimes    4194301, 8388617, 16777213, 33554467, 67108859, 134217757,
1331590Srgrimes    268435459, 536870923, 1073741827, 2147483659UL
1341590Srgrimes  };
1351590Srgrimes  unsigned int new_num_buckets;
1361590Srgrimes  struct bstring **new_buckets;
1371590Srgrimes  unsigned int i;
1381590Srgrimes
1391590Srgrimes  /* Count the stats.  Every unique item needs to be re-hashed and
1401590Srgrimes     re-entered.  */
1411590Srgrimes  bcache->expand_count++;
1421590Srgrimes  bcache->expand_hash_count += bcache->unique_count;
1431590Srgrimes
1441590Srgrimes  /* Find the next size.  */
1451590Srgrimes  new_num_buckets = bcache->num_buckets * 2;
1461590Srgrimes  for (i = 0; i < (sizeof (sizes) / sizeof (sizes[0])); i++)
1471590Srgrimes    if (sizes[i] > bcache->num_buckets)
1481590Srgrimes      {
1491590Srgrimes	new_num_buckets = sizes[i];
1501590Srgrimes	break;
1511590Srgrimes      }
1521590Srgrimes
1531590Srgrimes  /* Allocate the new table.  */
1541590Srgrimes  {
1551590Srgrimes    size_t new_size = new_num_buckets * sizeof (new_buckets[0]);
1561590Srgrimes    new_buckets = (struct bstring **) xmalloc (new_size);
1571590Srgrimes    memset (new_buckets, 0, new_size);
1581590Srgrimes
1591590Srgrimes    bcache->structure_size -= (bcache->num_buckets
1601590Srgrimes			       * sizeof (bcache->bucket[0]));
1611590Srgrimes    bcache->structure_size += new_size;
1621590Srgrimes  }
1631590Srgrimes
1641590Srgrimes  /* Rehash all existing strings.  */
1651590Srgrimes  for (i = 0; i < bcache->num_buckets; i++)
1661590Srgrimes    {
1671590Srgrimes      struct bstring *s, *next;
1681590Srgrimes
1691590Srgrimes      for (s = bcache->bucket[i]; s; s = next)
1701590Srgrimes	{
1711590Srgrimes	  struct bstring **new_bucket;
1721590Srgrimes	  next = s->next;
1731590Srgrimes
1741590Srgrimes	  new_bucket = &new_buckets[(hash (&s->d.data, s->length)
1751590Srgrimes				     % new_num_buckets)];
1761590Srgrimes	  s->next = *new_bucket;
1771590Srgrimes	  *new_bucket = s;
1781590Srgrimes	}
1791590Srgrimes    }
1801590Srgrimes
1811590Srgrimes  /* Plug in the new table.  */
1821590Srgrimes  if (bcache->bucket)
1831590Srgrimes    xfree (bcache->bucket);
1841590Srgrimes  bcache->bucket = new_buckets;
1851590Srgrimes  bcache->num_buckets = new_num_buckets;
1861590Srgrimes}
1871590Srgrimes
1881590Srgrimes
1891590Srgrimes/* Looking up things in the bcache.  */
1901590Srgrimes
1911590Srgrimes/* The number of bytes needed to allocate a struct bstring whose data
1921590Srgrimes   is N bytes long.  */
1931590Srgrimes#define BSTRING_SIZE(n) (offsetof (struct bstring, d.data) + (n))
1941590Srgrimes
1951590Srgrimes/* Find a copy of the LENGTH bytes at ADDR in BCACHE.  If BCACHE has
1961590Srgrimes   never seen those bytes before, add a copy of them to BCACHE.  In
1971590Srgrimes   either case, return a pointer to BCACHE's copy of that string.  */
1981590Srgrimesstatic void *
1991590Srgrimesbcache_data (const void *addr, int length, struct bcache *bcache)
2001590Srgrimes{
2011590Srgrimes  unsigned long full_hash;
2021590Srgrimes  unsigned short half_hash;
2031590Srgrimes  int hash_index;
2041590Srgrimes  struct bstring *s;
2051590Srgrimes
2061590Srgrimes  /* If our average chain length is too high, expand the hash table.  */
2071590Srgrimes  if (bcache->unique_count >= bcache->num_buckets * CHAIN_LENGTH_THRESHOLD)
2081590Srgrimes    expand_hash_table (bcache);
2091590Srgrimes
2101590Srgrimes  bcache->total_count++;
2111590Srgrimes  bcache->total_size += length;
2121590Srgrimes
2131590Srgrimes  full_hash = hash (addr, length);
2141590Srgrimes  half_hash = (full_hash >> 16);
2151590Srgrimes  hash_index = full_hash % bcache->num_buckets;
2161590Srgrimes
2171590Srgrimes  /* Search the hash bucket for a string identical to the caller's.
2181590Srgrimes     As a short-circuit first compare the upper part of each hash
2191590Srgrimes     values.  */
2201590Srgrimes  for (s = bcache->bucket[hash_index]; s; s = s->next)
2211590Srgrimes    {
2221590Srgrimes      if (s->half_hash == half_hash)
2231590Srgrimes	{
2241590Srgrimes	  if (s->length == length
2251590Srgrimes	      && ! memcmp (&s->d.data, addr, length))
2261590Srgrimes	    return &s->d.data;
2271590Srgrimes	  else
2281590Srgrimes	    bcache->half_hash_miss_count++;
2291590Srgrimes	}
2301590Srgrimes    }
2311590Srgrimes
2321590Srgrimes  /* The user's string isn't in the list.  Insert it after *ps.  */
2331590Srgrimes  {
2341590Srgrimes    struct bstring *new
2351590Srgrimes      = obstack_alloc (&bcache->cache, BSTRING_SIZE (length));
2361590Srgrimes    memcpy (&new->d.data, addr, length);
2371590Srgrimes    new->length = length;
2381590Srgrimes    new->next = bcache->bucket[hash_index];
2391590Srgrimes    new->half_hash = half_hash;
2401590Srgrimes    bcache->bucket[hash_index] = new;
2411590Srgrimes
2421590Srgrimes    bcache->unique_count++;
2431590Srgrimes    bcache->unique_size += length;
2441590Srgrimes    bcache->structure_size += BSTRING_SIZE (length);
2451590Srgrimes
2461590Srgrimes    return &new->d.data;
2471590Srgrimes  }
2481590Srgrimes}
2491590Srgrimes
2501590Srgrimesvoid *
2511590Srgrimesdeprecated_bcache (const void *addr, int length, struct bcache *bcache)
2521590Srgrimes{
2531590Srgrimes  return bcache_data (addr, length, bcache);
2541590Srgrimes}
2551590Srgrimes
2561590Srgrimesconst void *
2571590Srgrimesbcache (const void *addr, int length, struct bcache *bcache)
2581590Srgrimes{
2591590Srgrimes  return bcache_data (addr, length, bcache);
2601590Srgrimes}
2611590Srgrimes
2621590Srgrimes/* Allocating and freeing bcaches.  */
2631590Srgrimes
2641590Srgrimesstruct bcache *
2651590Srgrimesbcache_xmalloc (void)
2661590Srgrimes{
2671590Srgrimes  /* Allocate the bcache pre-zeroed.  */
2681590Srgrimes  struct bcache *b = XCALLOC (1, struct bcache);
2691590Srgrimes  /* We could use obstack_specify_allocation here instead, but
2701590Srgrimes     gdb_obstack.h specifies the allocation/deallocation
2711590Srgrimes     functions.  */
2721590Srgrimes  obstack_init (&b->cache);
2731590Srgrimes  return b;
2741590Srgrimes}
2751590Srgrimes
2761590Srgrimes/* Free all the storage associated with BCACHE.  */
2771590Srgrimesvoid
2781590Srgrimesbcache_xfree (struct bcache *bcache)
2791590Srgrimes{
2801590Srgrimes  if (bcache == NULL)
2811590Srgrimes    return;
2821590Srgrimes  obstack_free (&bcache->cache, 0);
2831590Srgrimes  xfree (bcache->bucket);
2841590Srgrimes  xfree (bcache);
2851590Srgrimes}
2861590Srgrimes
2871590Srgrimes
2881590Srgrimes
2891590Srgrimes/* Printing statistics.  */
2901590Srgrimes
2911590Srgrimesstatic int
2921590Srgrimescompare_ints (const void *ap, const void *bp)
2931590Srgrimes{
2941590Srgrimes  /* Because we know we're comparing two ints which are positive,
2951590Srgrimes     there's no danger of overflow here.  */
2961590Srgrimes  return * (int *) ap - * (int *) bp;
2971590Srgrimes}
2981590Srgrimes
2991590Srgrimes
3001590Srgrimesstatic void
3011590Srgrimesprint_percentage (int portion, int total)
3021590Srgrimes{
3031590Srgrimes  if (total == 0)
3041590Srgrimes    printf_filtered ("(not applicable)\n");
3051590Srgrimes  else
3061590Srgrimes    printf_filtered ("%3d%%\n", portion * 100 / total);
3071590Srgrimes}
3081590Srgrimes
3091590Srgrimes
3101590Srgrimes/* Print statistics on BCACHE's memory usage and efficacity at
3111590Srgrimes   eliminating duplication.  NAME should describe the kind of data
3121590Srgrimes   BCACHE holds.  Statistics are printed using `printf_filtered' and
3131590Srgrimes   its ilk.  */
3141590Srgrimesvoid
3151590Srgrimesprint_bcache_statistics (struct bcache *c, char *type)
3161590Srgrimes{
3171590Srgrimes  int occupied_buckets;
3181590Srgrimes  int max_chain_length;
3191590Srgrimes  int median_chain_length;
3201590Srgrimes  int max_entry_size;
3211590Srgrimes  int median_entry_size;
3221590Srgrimes
3231590Srgrimes  /* Count the number of occupied buckets, tally the various string
3241590Srgrimes     lengths, and measure chain lengths.  */
3251590Srgrimes  {
3261590Srgrimes    unsigned int b;
3271590Srgrimes    int *chain_length = XCALLOC (c->num_buckets + 1, int);
3281590Srgrimes    int *entry_size = XCALLOC (c->unique_count + 1, int);
3291590Srgrimes    int stringi = 0;
3301590Srgrimes
3311590Srgrimes    occupied_buckets = 0;
3321590Srgrimes
3331590Srgrimes    for (b = 0; b < c->num_buckets; b++)
3341590Srgrimes      {
3351590Srgrimes	struct bstring *s = c->bucket[b];
3361590Srgrimes
3371590Srgrimes	chain_length[b] = 0;
3381590Srgrimes
3391590Srgrimes	if (s)
3401590Srgrimes	  {
3411590Srgrimes	    occupied_buckets++;
3421590Srgrimes
3431590Srgrimes	    while (s)
3441590Srgrimes	      {
3451590Srgrimes		gdb_assert (b < c->num_buckets);
3461590Srgrimes		chain_length[b]++;
3471590Srgrimes		gdb_assert (stringi < c->unique_count);
3481590Srgrimes		entry_size[stringi++] = s->length;
3491590Srgrimes		s = s->next;
3501590Srgrimes	      }
3511590Srgrimes	  }
3521590Srgrimes      }
3531590Srgrimes
3541590Srgrimes    /* To compute the median, we need the set of chain lengths sorted.  */
3551590Srgrimes    qsort (chain_length, c->num_buckets, sizeof (chain_length[0]),
3561590Srgrimes	   compare_ints);
3571590Srgrimes    qsort (entry_size, c->unique_count, sizeof (entry_size[0]),
3581590Srgrimes	   compare_ints);
3591590Srgrimes
3601590Srgrimes    if (c->num_buckets > 0)
3611590Srgrimes      {
3621590Srgrimes	max_chain_length = chain_length[c->num_buckets - 1];
3631590Srgrimes	median_chain_length = chain_length[c->num_buckets / 2];
3641590Srgrimes      }
3651590Srgrimes    else
3661590Srgrimes      {
3671590Srgrimes	max_chain_length = 0;
3681590Srgrimes	median_chain_length = 0;
3691590Srgrimes      }
3701590Srgrimes    if (c->unique_count > 0)
3711590Srgrimes      {
3721590Srgrimes	max_entry_size = entry_size[c->unique_count - 1];
3731590Srgrimes	median_entry_size = entry_size[c->unique_count / 2];
3741590Srgrimes      }
3751590Srgrimes    else
3761590Srgrimes      {
3771590Srgrimes	max_entry_size = 0;
3781590Srgrimes	median_entry_size = 0;
3791590Srgrimes      }
3801590Srgrimes
3811590Srgrimes    xfree (chain_length);
3821590Srgrimes    xfree (entry_size);
3831590Srgrimes  }
3841590Srgrimes
3851590Srgrimes  printf_filtered ("  Cached '%s' statistics:\n", type);
3861590Srgrimes  printf_filtered ("    Total object count:  %ld\n", c->total_count);
3871590Srgrimes  printf_filtered ("    Unique object count: %lu\n", c->unique_count);
3881590Srgrimes  printf_filtered ("    Percentage of duplicates, by count: ");
3891590Srgrimes  print_percentage (c->total_count - c->unique_count, c->total_count);
3901590Srgrimes  printf_filtered ("\n");
3911590Srgrimes
3921590Srgrimes  printf_filtered ("    Total object size:   %ld\n", c->total_size);
3931590Srgrimes  printf_filtered ("    Unique object size:  %ld\n", c->unique_size);
3941590Srgrimes  printf_filtered ("    Percentage of duplicates, by size:  ");
3951590Srgrimes  print_percentage (c->total_size - c->unique_size, c->total_size);
3961590Srgrimes  printf_filtered ("\n");
3971590Srgrimes
3981590Srgrimes  printf_filtered ("    Max entry size:     %d\n", max_entry_size);
3991590Srgrimes  printf_filtered ("    Average entry size: ");
4001590Srgrimes  if (c->unique_count > 0)
4011590Srgrimes    printf_filtered ("%ld\n", c->unique_size / c->unique_count);
4021590Srgrimes  else
4031590Srgrimes    printf_filtered ("(not applicable)\n");
4041590Srgrimes  printf_filtered ("    Median entry size:  %d\n", median_entry_size);
4051590Srgrimes  printf_filtered ("\n");
4061590Srgrimes
4071590Srgrimes  printf_filtered ("    Total memory used by bcache, including overhead: %ld\n",
4081590Srgrimes		   c->structure_size);
4091590Srgrimes  printf_filtered ("    Percentage memory overhead: ");
4101590Srgrimes  print_percentage (c->structure_size - c->unique_size, c->unique_size);
4111590Srgrimes  printf_filtered ("    Net memory savings:         ");
4121590Srgrimes  print_percentage (c->total_size - c->structure_size, c->total_size);
4131590Srgrimes  printf_filtered ("\n");
4141590Srgrimes
4151590Srgrimes  printf_filtered ("    Hash table size:           %3d\n", c->num_buckets);
4161590Srgrimes  printf_filtered ("    Hash table expands:        %lu\n",
4171590Srgrimes		   c->expand_count);
4181590Srgrimes  printf_filtered ("    Hash table hashes:         %lu\n",
4191590Srgrimes		   c->total_count + c->expand_hash_count);
4201590Srgrimes  printf_filtered ("    Half hash misses:          %lu\n",
4211590Srgrimes		   c->half_hash_miss_count);
4221590Srgrimes  printf_filtered ("    Hash table population:     ");
4231590Srgrimes  print_percentage (occupied_buckets, c->num_buckets);
4241590Srgrimes  printf_filtered ("    Median hash chain length:  %3d\n",
4251590Srgrimes		   median_chain_length);
4261590Srgrimes  printf_filtered ("    Average hash chain length: ");
4271590Srgrimes  if (c->num_buckets > 0)
4281590Srgrimes    printf_filtered ("%3lu\n", c->unique_count / c->num_buckets);
4291590Srgrimes  else
4301590Srgrimes    printf_filtered ("(not applicable)\n");
4311590Srgrimes  printf_filtered ("    Maximum hash chain length: %3d\n", max_chain_length);
4321590Srgrimes  printf_filtered ("\n");
4331590Srgrimes}
4341590Srgrimes
4351590Srgrimesint
4361590Srgrimesbcache_memory_used (struct bcache *bcache)
4371590Srgrimes{
4381590Srgrimes  return obstack_memory_used (&bcache->cache);
4391590Srgrimes}
4401590Srgrimes