119370Spst/* Implement a cached obstack.
298944Sobrien   Written by Fred Fish <fnf@cygnus.com>
398944Sobrien   Rewritten by Jim Blandy <jimb@cygnus.com>
419370Spst
5130803Smarcel   Copyright 1999, 2000, 2002, 2003 Free Software Foundation, Inc.
619370Spst
798944Sobrien   This file is part of GDB.
819370Spst
998944Sobrien   This program is free software; you can redistribute it and/or modify
1098944Sobrien   it under the terms of the GNU General Public License as published by
1198944Sobrien   the Free Software Foundation; either version 2 of the License, or
1298944Sobrien   (at your option) any later version.
1319370Spst
1498944Sobrien   This program is distributed in the hope that it will be useful,
1598944Sobrien   but WITHOUT ANY WARRANTY; without even the implied warranty of
1698944Sobrien   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
1798944Sobrien   GNU General Public License for more details.
1819370Spst
1998944Sobrien   You should have received a copy of the GNU General Public License
2098944Sobrien   along with this program; if not, write to the Free Software
2198944Sobrien   Foundation, Inc., 59 Temple Place - Suite 330,
2298944Sobrien   Boston, MA 02111-1307, USA.  */
2398944Sobrien
2419370Spst#include "defs.h"
25130803Smarcel#include "gdb_obstack.h"
2619370Spst#include "bcache.h"
2719370Spst#include "gdb_string.h"		/* For memcpy declaration */
28130803Smarcel#include "gdb_assert.h"
2919370Spst
3098944Sobrien#include <stddef.h>
3198944Sobrien#include <stdlib.h>
3246283Sdfr
33130803Smarcel/* The type used to hold a single bcache string.  The user data is
34130803Smarcel   stored in d.data.  Since it can be any type, it needs to have the
35130803Smarcel   same alignment as the most strict alignment of any type on the host
36130803Smarcel   machine.  I don't know of any really correct way to do this in
37130803Smarcel   stock ANSI C, so just do it the same way obstack.h does.  */
38130803Smarcel
39130803Smarcelstruct bstring
40130803Smarcel{
41130803Smarcel  /* Hash chain.  */
42130803Smarcel  struct bstring *next;
43130803Smarcel  /* Assume the data length is no more than 64k.  */
44130803Smarcel  unsigned short length;
45130803Smarcel  /* The half hash hack.  This contains the upper 16 bits of the hash
46130803Smarcel     value and is used as a pre-check when comparing two strings and
47130803Smarcel     avoids the need to do length or memcmp calls.  It proves to be
48130803Smarcel     roughly 100% effective.  */
49130803Smarcel  unsigned short half_hash;
50130803Smarcel
51130803Smarcel  union
52130803Smarcel  {
53130803Smarcel    char data[1];
54130803Smarcel    double dummy;
55130803Smarcel  }
56130803Smarcel  d;
57130803Smarcel};
58130803Smarcel
59130803Smarcel
60130803Smarcel/* The structure for a bcache itself.  The bcache is initialized, in
61130803Smarcel   bcache_xmalloc(), by filling it with zeros and then setting the
62130803Smarcel   corresponding obstack's malloc() and free() methods.  */
63130803Smarcel
64130803Smarcelstruct bcache
65130803Smarcel{
66130803Smarcel  /* All the bstrings are allocated here.  */
67130803Smarcel  struct obstack cache;
68130803Smarcel
69130803Smarcel  /* How many hash buckets we're using.  */
70130803Smarcel  unsigned int num_buckets;
71130803Smarcel
72130803Smarcel  /* Hash buckets.  This table is allocated using malloc, so when we
73130803Smarcel     grow the table we can return the old table to the system.  */
74130803Smarcel  struct bstring **bucket;
75130803Smarcel
76130803Smarcel  /* Statistics.  */
77130803Smarcel  unsigned long unique_count;	/* number of unique strings */
78130803Smarcel  long total_count;	/* total number of strings cached, including dups */
79130803Smarcel  long unique_size;	/* size of unique strings, in bytes */
80130803Smarcel  long total_size;      /* total number of bytes cached, including dups */
81130803Smarcel  long structure_size;	/* total size of bcache, including infrastructure */
82130803Smarcel  /* Number of times that the hash table is expanded and hence
83130803Smarcel     re-built, and the corresponding number of times that a string is
84130803Smarcel     [re]hashed as part of entering it into the expanded table.  The
85130803Smarcel     total number of hashes can be computed by adding TOTAL_COUNT to
86130803Smarcel     expand_hash_count.  */
87130803Smarcel  unsigned long expand_count;
88130803Smarcel  unsigned long expand_hash_count;
89130803Smarcel  /* Number of times that the half-hash compare hit (compare the upper
90130803Smarcel     16 bits of hash values) hit, but the corresponding combined
91130803Smarcel     length/data compare missed.  */
92130803Smarcel  unsigned long half_hash_miss_count;
93130803Smarcel};
94130803Smarcel
9598944Sobrien/* The old hash function was stolen from SDBM. This is what DB 3.0 uses now,
9698944Sobrien * and is better than the old one.
9798944Sobrien */
9898944Sobrien
9998944Sobrienunsigned long
10098944Sobrienhash(const void *addr, int length)
10198944Sobrien{
10298944Sobrien		const unsigned char *k, *e;
10398944Sobrien		unsigned long h;
10498944Sobrien
10598944Sobrien		k = (const unsigned char *)addr;
10698944Sobrien		e = k+length;
10798944Sobrien		for (h=0; k< e;++k)
10898944Sobrien		{
10998944Sobrien				h *=16777619;
11098944Sobrien				h ^= *k;
11198944Sobrien		}
11298944Sobrien		return (h);
11398944Sobrien}
11498944Sobrien
11598944Sobrien/* Growing the bcache's hash table.  */
11646283Sdfr
11798944Sobrien/* If the average chain length grows beyond this, then we want to
11898944Sobrien   resize our hash table.  */
11998944Sobrien#define CHAIN_LENGTH_THRESHOLD (5)
12046283Sdfr
12198944Sobrienstatic void
12298944Sobrienexpand_hash_table (struct bcache *bcache)
12319370Spst{
12498944Sobrien  /* A table of good hash table sizes.  Whenever we grow, we pick the
12598944Sobrien     next larger size from this table.  sizes[i] is close to 1 << (i+10),
12698944Sobrien     so we roughly double the table size each time.  After we fall off
12798944Sobrien     the end of this table, we just double.  Don't laugh --- there have
12898944Sobrien     been executables sighted with a gigabyte of debug info.  */
12998944Sobrien  static unsigned long sizes[] = {
13098944Sobrien    1021, 2053, 4099, 8191, 16381, 32771,
13198944Sobrien    65537, 131071, 262144, 524287, 1048573, 2097143,
13298944Sobrien    4194301, 8388617, 16777213, 33554467, 67108859, 134217757,
13398944Sobrien    268435459, 536870923, 1073741827, 2147483659UL
13498944Sobrien  };
13598944Sobrien  unsigned int new_num_buckets;
13698944Sobrien  struct bstring **new_buckets;
13798944Sobrien  unsigned int i;
13819370Spst
139130803Smarcel  /* Count the stats.  Every unique item needs to be re-hashed and
140130803Smarcel     re-entered.  */
141130803Smarcel  bcache->expand_count++;
142130803Smarcel  bcache->expand_hash_count += bcache->unique_count;
143130803Smarcel
14498944Sobrien  /* Find the next size.  */
14598944Sobrien  new_num_buckets = bcache->num_buckets * 2;
14698944Sobrien  for (i = 0; i < (sizeof (sizes) / sizeof (sizes[0])); i++)
14798944Sobrien    if (sizes[i] > bcache->num_buckets)
14898944Sobrien      {
14998944Sobrien	new_num_buckets = sizes[i];
15098944Sobrien	break;
15198944Sobrien      }
15219370Spst
15398944Sobrien  /* Allocate the new table.  */
15498944Sobrien  {
15598944Sobrien    size_t new_size = new_num_buckets * sizeof (new_buckets[0]);
15698944Sobrien    new_buckets = (struct bstring **) xmalloc (new_size);
15798944Sobrien    memset (new_buckets, 0, new_size);
15819370Spst
15998944Sobrien    bcache->structure_size -= (bcache->num_buckets
16098944Sobrien			       * sizeof (bcache->bucket[0]));
16198944Sobrien    bcache->structure_size += new_size;
16298944Sobrien  }
16398944Sobrien
16498944Sobrien  /* Rehash all existing strings.  */
16598944Sobrien  for (i = 0; i < bcache->num_buckets; i++)
16619370Spst    {
16798944Sobrien      struct bstring *s, *next;
16898944Sobrien
16998944Sobrien      for (s = bcache->bucket[i]; s; s = next)
17019370Spst	{
17198944Sobrien	  struct bstring **new_bucket;
17298944Sobrien	  next = s->next;
17398944Sobrien
17498944Sobrien	  new_bucket = &new_buckets[(hash (&s->d.data, s->length)
17598944Sobrien				     % new_num_buckets)];
17698944Sobrien	  s->next = *new_bucket;
17798944Sobrien	  *new_bucket = s;
17819370Spst	}
17919370Spst    }
18098944Sobrien
18198944Sobrien  /* Plug in the new table.  */
18298944Sobrien  if (bcache->bucket)
18398944Sobrien    xfree (bcache->bucket);
18498944Sobrien  bcache->bucket = new_buckets;
18598944Sobrien  bcache->num_buckets = new_num_buckets;
18619370Spst}
18719370Spst
18898944Sobrien
18998944Sobrien/* Looking up things in the bcache.  */
19098944Sobrien
19198944Sobrien/* The number of bytes needed to allocate a struct bstring whose data
19298944Sobrien   is N bytes long.  */
19398944Sobrien#define BSTRING_SIZE(n) (offsetof (struct bstring, d.data) + (n))
19498944Sobrien
19598944Sobrien/* Find a copy of the LENGTH bytes at ADDR in BCACHE.  If BCACHE has
19698944Sobrien   never seen those bytes before, add a copy of them to BCACHE.  In
19798944Sobrien   either case, return a pointer to BCACHE's copy of that string.  */
198130803Smarcelstatic void *
199130803Smarcelbcache_data (const void *addr, int length, struct bcache *bcache)
20019370Spst{
201130803Smarcel  unsigned long full_hash;
202130803Smarcel  unsigned short half_hash;
20398944Sobrien  int hash_index;
20498944Sobrien  struct bstring *s;
20519370Spst
20698944Sobrien  /* If our average chain length is too high, expand the hash table.  */
20798944Sobrien  if (bcache->unique_count >= bcache->num_buckets * CHAIN_LENGTH_THRESHOLD)
20898944Sobrien    expand_hash_table (bcache);
20998944Sobrien
21098944Sobrien  bcache->total_count++;
21198944Sobrien  bcache->total_size += length;
21298944Sobrien
213130803Smarcel  full_hash = hash (addr, length);
214130803Smarcel  half_hash = (full_hash >> 16);
215130803Smarcel  hash_index = full_hash % bcache->num_buckets;
21698944Sobrien
217130803Smarcel  /* Search the hash bucket for a string identical to the caller's.
218130803Smarcel     As a short-circuit first compare the upper part of each hash
219130803Smarcel     values.  */
22098944Sobrien  for (s = bcache->bucket[hash_index]; s; s = s->next)
221130803Smarcel    {
222130803Smarcel      if (s->half_hash == half_hash)
223130803Smarcel	{
224130803Smarcel	  if (s->length == length
225130803Smarcel	      && ! memcmp (&s->d.data, addr, length))
226130803Smarcel	    return &s->d.data;
227130803Smarcel	  else
228130803Smarcel	    bcache->half_hash_miss_count++;
229130803Smarcel	}
230130803Smarcel    }
23198944Sobrien
23298944Sobrien  /* The user's string isn't in the list.  Insert it after *ps.  */
23398944Sobrien  {
23498944Sobrien    struct bstring *new
23598944Sobrien      = obstack_alloc (&bcache->cache, BSTRING_SIZE (length));
23698944Sobrien    memcpy (&new->d.data, addr, length);
23798944Sobrien    new->length = length;
23898944Sobrien    new->next = bcache->bucket[hash_index];
239130803Smarcel    new->half_hash = half_hash;
24098944Sobrien    bcache->bucket[hash_index] = new;
24198944Sobrien
24298944Sobrien    bcache->unique_count++;
24398944Sobrien    bcache->unique_size += length;
24498944Sobrien    bcache->structure_size += BSTRING_SIZE (length);
24598944Sobrien
24698944Sobrien    return &new->d.data;
24798944Sobrien  }
24898944Sobrien}
24998944Sobrien
250130803Smarcelvoid *
251130803Smarceldeprecated_bcache (const void *addr, int length, struct bcache *bcache)
252130803Smarcel{
253130803Smarcel  return bcache_data (addr, length, bcache);
254130803Smarcel}
255130803Smarcel
256130803Smarcelconst void *
257130803Smarcelbcache (const void *addr, int length, struct bcache *bcache)
258130803Smarcel{
259130803Smarcel  return bcache_data (addr, length, bcache);
260130803Smarcel}
26198944Sobrien
262130803Smarcel/* Allocating and freeing bcaches.  */
26398944Sobrien
264130803Smarcelstruct bcache *
265130803Smarcelbcache_xmalloc (void)
266130803Smarcel{
267130803Smarcel  /* Allocate the bcache pre-zeroed.  */
268130803Smarcel  struct bcache *b = XCALLOC (1, struct bcache);
269130803Smarcel  /* We could use obstack_specify_allocation here instead, but
270130803Smarcel     gdb_obstack.h specifies the allocation/deallocation
271130803Smarcel     functions.  */
272130803Smarcel  obstack_init (&b->cache);
273130803Smarcel  return b;
274130803Smarcel}
275130803Smarcel
27698944Sobrien/* Free all the storage associated with BCACHE.  */
27798944Sobrienvoid
278130803Smarcelbcache_xfree (struct bcache *bcache)
27998944Sobrien{
280130803Smarcel  if (bcache == NULL)
281130803Smarcel    return;
28298944Sobrien  obstack_free (&bcache->cache, 0);
283130803Smarcel  xfree (bcache->bucket);
284130803Smarcel  xfree (bcache);
28598944Sobrien}
28698944Sobrien
28798944Sobrien
28898944Sobrien
28998944Sobrien/* Printing statistics.  */
29098944Sobrien
29198944Sobrienstatic int
29298944Sobriencompare_ints (const void *ap, const void *bp)
29398944Sobrien{
29498944Sobrien  /* Because we know we're comparing two ints which are positive,
29598944Sobrien     there's no danger of overflow here.  */
29698944Sobrien  return * (int *) ap - * (int *) bp;
29798944Sobrien}
29898944Sobrien
29998944Sobrien
30098944Sobrienstatic void
30198944Sobrienprint_percentage (int portion, int total)
30298944Sobrien{
30398944Sobrien  if (total == 0)
30498944Sobrien    printf_filtered ("(not applicable)\n");
30519370Spst  else
30698944Sobrien    printf_filtered ("%3d%%\n", portion * 100 / total);
30719370Spst}
30819370Spst
30919370Spst
31098944Sobrien/* Print statistics on BCACHE's memory usage and efficacity at
31198944Sobrien   eliminating duplication.  NAME should describe the kind of data
31298944Sobrien   BCACHE holds.  Statistics are printed using `printf_filtered' and
31398944Sobrien   its ilk.  */
31419370Spstvoid
31598944Sobrienprint_bcache_statistics (struct bcache *c, char *type)
31619370Spst{
31798944Sobrien  int occupied_buckets;
31898944Sobrien  int max_chain_length;
31998944Sobrien  int median_chain_length;
320130803Smarcel  int max_entry_size;
321130803Smarcel  int median_entry_size;
32219370Spst
323130803Smarcel  /* Count the number of occupied buckets, tally the various string
324130803Smarcel     lengths, and measure chain lengths.  */
32598944Sobrien  {
32698944Sobrien    unsigned int b;
327130803Smarcel    int *chain_length = XCALLOC (c->num_buckets + 1, int);
328130803Smarcel    int *entry_size = XCALLOC (c->unique_count + 1, int);
329130803Smarcel    int stringi = 0;
33098944Sobrien
33198944Sobrien    occupied_buckets = 0;
33298944Sobrien
33398944Sobrien    for (b = 0; b < c->num_buckets; b++)
33498944Sobrien      {
33598944Sobrien	struct bstring *s = c->bucket[b];
33698944Sobrien
33798944Sobrien	chain_length[b] = 0;
33898944Sobrien
33998944Sobrien	if (s)
34098944Sobrien	  {
34198944Sobrien	    occupied_buckets++;
34298944Sobrien
34398944Sobrien	    while (s)
34498944Sobrien	      {
345130803Smarcel		gdb_assert (b < c->num_buckets);
34698944Sobrien		chain_length[b]++;
347130803Smarcel		gdb_assert (stringi < c->unique_count);
348130803Smarcel		entry_size[stringi++] = s->length;
34998944Sobrien		s = s->next;
35098944Sobrien	      }
35198944Sobrien	  }
35298944Sobrien      }
35398944Sobrien
35498944Sobrien    /* To compute the median, we need the set of chain lengths sorted.  */
35598944Sobrien    qsort (chain_length, c->num_buckets, sizeof (chain_length[0]),
35698944Sobrien	   compare_ints);
357130803Smarcel    qsort (entry_size, c->unique_count, sizeof (entry_size[0]),
358130803Smarcel	   compare_ints);
35998944Sobrien
36098944Sobrien    if (c->num_buckets > 0)
36198944Sobrien      {
36298944Sobrien	max_chain_length = chain_length[c->num_buckets - 1];
36398944Sobrien	median_chain_length = chain_length[c->num_buckets / 2];
36498944Sobrien      }
36598944Sobrien    else
36698944Sobrien      {
36798944Sobrien	max_chain_length = 0;
36898944Sobrien	median_chain_length = 0;
36998944Sobrien      }
370130803Smarcel    if (c->unique_count > 0)
371130803Smarcel      {
372130803Smarcel	max_entry_size = entry_size[c->unique_count - 1];
373130803Smarcel	median_entry_size = entry_size[c->unique_count / 2];
374130803Smarcel      }
375130803Smarcel    else
376130803Smarcel      {
377130803Smarcel	max_entry_size = 0;
378130803Smarcel	median_entry_size = 0;
379130803Smarcel      }
380130803Smarcel
381130803Smarcel    xfree (chain_length);
382130803Smarcel    xfree (entry_size);
38398944Sobrien  }
38498944Sobrien
38598944Sobrien  printf_filtered ("  Cached '%s' statistics:\n", type);
38698944Sobrien  printf_filtered ("    Total object count:  %ld\n", c->total_count);
38798944Sobrien  printf_filtered ("    Unique object count: %lu\n", c->unique_count);
38898944Sobrien  printf_filtered ("    Percentage of duplicates, by count: ");
38998944Sobrien  print_percentage (c->total_count - c->unique_count, c->total_count);
39098944Sobrien  printf_filtered ("\n");
39198944Sobrien
39298944Sobrien  printf_filtered ("    Total object size:   %ld\n", c->total_size);
39398944Sobrien  printf_filtered ("    Unique object size:  %ld\n", c->unique_size);
39498944Sobrien  printf_filtered ("    Percentage of duplicates, by size:  ");
39598944Sobrien  print_percentage (c->total_size - c->unique_size, c->total_size);
39698944Sobrien  printf_filtered ("\n");
39798944Sobrien
398130803Smarcel  printf_filtered ("    Max entry size:     %d\n", max_entry_size);
399130803Smarcel  printf_filtered ("    Average entry size: ");
400130803Smarcel  if (c->unique_count > 0)
401130803Smarcel    printf_filtered ("%ld\n", c->unique_size / c->unique_count);
402130803Smarcel  else
403130803Smarcel    printf_filtered ("(not applicable)\n");
404130803Smarcel  printf_filtered ("    Median entry size:  %d\n", median_entry_size);
405130803Smarcel  printf_filtered ("\n");
406130803Smarcel
40798944Sobrien  printf_filtered ("    Total memory used by bcache, including overhead: %ld\n",
40898944Sobrien		   c->structure_size);
40998944Sobrien  printf_filtered ("    Percentage memory overhead: ");
41098944Sobrien  print_percentage (c->structure_size - c->unique_size, c->unique_size);
41198944Sobrien  printf_filtered ("    Net memory savings:         ");
41298944Sobrien  print_percentage (c->total_size - c->structure_size, c->total_size);
41398944Sobrien  printf_filtered ("\n");
41498944Sobrien
41598944Sobrien  printf_filtered ("    Hash table size:           %3d\n", c->num_buckets);
416130803Smarcel  printf_filtered ("    Hash table expands:        %lu\n",
417130803Smarcel		   c->expand_count);
418130803Smarcel  printf_filtered ("    Hash table hashes:         %lu\n",
419130803Smarcel		   c->total_count + c->expand_hash_count);
420130803Smarcel  printf_filtered ("    Half hash misses:          %lu\n",
421130803Smarcel		   c->half_hash_miss_count);
42298944Sobrien  printf_filtered ("    Hash table population:     ");
42398944Sobrien  print_percentage (occupied_buckets, c->num_buckets);
42498944Sobrien  printf_filtered ("    Median hash chain length:  %3d\n",
42598944Sobrien		   median_chain_length);
42698944Sobrien  printf_filtered ("    Average hash chain length: ");
42798944Sobrien  if (c->num_buckets > 0)
42898944Sobrien    printf_filtered ("%3lu\n", c->unique_count / c->num_buckets);
42946283Sdfr  else
43098944Sobrien    printf_filtered ("(not applicable)\n");
43198944Sobrien  printf_filtered ("    Maximum hash chain length: %3d\n", max_chain_length);
43298944Sobrien  printf_filtered ("\n");
43319370Spst}
434130803Smarcel
435130803Smarcelint
436130803Smarcelbcache_memory_used (struct bcache *bcache)
437130803Smarcel{
438130803Smarcel  return obstack_memory_used (&bcache->cache);
439130803Smarcel}
440