hash.c revision 33965
133965Sjdp/* hash.c - hash table lookup strings -
233965Sjdp   Copyright (C) 1987, 90, 91, 92, 93, 94, 95, 1996
333965Sjdp   Free Software Foundation, Inc.
433965Sjdp
533965Sjdp   This file is part of GAS, the GNU Assembler.
633965Sjdp
733965Sjdp   GAS is free software; you can redistribute it and/or modify
833965Sjdp   it under the terms of the GNU General Public License as published by
933965Sjdp   the Free Software Foundation; either version 2, or (at your option)
1033965Sjdp   any later version.
1133965Sjdp
1233965Sjdp   GAS is distributed in the hope that it will be useful,
1333965Sjdp   but WITHOUT ANY WARRANTY; without even the implied warranty of
1433965Sjdp   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
1533965Sjdp   GNU General Public License for more details.
1633965Sjdp
1733965Sjdp   You should have received a copy of the GNU General Public License
1833965Sjdp   along with GAS; see the file COPYING.  If not, write to
1933965Sjdp   the Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.  */
2033965Sjdp
2133965Sjdp/*
2233965Sjdp * BUGS, GRIPES, APOLOGIA etc.
2333965Sjdp *
2433965Sjdp * A typical user doesn't need ALL this: I intend to make a library out
2533965Sjdp * of it one day - Dean Elsner.
2633965Sjdp * Also, I want to change the definition of a symbol to (address,length)
2733965Sjdp * so I can put arbitrary binary in the names stored. [see hsh.c for that]
2833965Sjdp *
2933965Sjdp * This slime is common coupled inside the module. Com-coupling (and other
3033965Sjdp * vandalism) was done to speed running time. The interfaces at the
3133965Sjdp * module's edges are adequately clean.
3233965Sjdp *
3333965Sjdp * There is no way to (a) run a test script through this heap and (b)
3433965Sjdp * compare results with previous scripts, to see if we have broken any
3533965Sjdp * code. Use GNU (f)utilities to do this. A few commands assist test.
3633965Sjdp * The testing is awkward: it tries to be both batch & interactive.
3733965Sjdp * For now, interactive rules!
3833965Sjdp */
3933965Sjdp
4033965Sjdp/*
4133965Sjdp *  The idea is to implement a symbol table. A test jig is here.
4233965Sjdp *  Symbols are arbitrary strings; they can't contain '\0'.
4333965Sjdp *	[See hsh.c for a more general symbol flavour.]
4433965Sjdp *  Each symbol is associated with a char*, which can point to anything
4533965Sjdp *  you want, allowing an arbitrary property list for each symbol.
4633965Sjdp *
4733965Sjdp *  The basic operations are:
4833965Sjdp *
4933965Sjdp *    new                     creates symbol table, returns handle
5033965Sjdp *    find (symbol)           returns char*
5133965Sjdp *    insert (symbol,char*)   error if symbol already in table
5233965Sjdp *    delete (symbol)         returns char* if symbol was in table
5333965Sjdp *    apply                   so you can delete all symbols before die()
5433965Sjdp *    die                     destroy symbol table (free up memory)
5533965Sjdp *
5633965Sjdp *  Supplementary functions include:
5733965Sjdp *
5833965Sjdp *    say                     how big? what % full?
5933965Sjdp *    replace (symbol,newval) report previous value
6033965Sjdp *    jam (symbol,value)      assert symbol:=value
6133965Sjdp *
6233965Sjdp *  You, the caller, have control over errors: this just reports them.
6333965Sjdp *
6433965Sjdp *  This package requires malloc(), free().
6533965Sjdp *  Malloc(size) returns NULL or address of char[size].
6633965Sjdp *  Free(address) frees same.
6733965Sjdp */
6833965Sjdp
6933965Sjdp/*
7033965Sjdp *  The code and its structures are re-enterent.
7133965Sjdp *
7233965Sjdp *  Before you do anything else, you must call hash_new() which will
7333965Sjdp *  return the address of a hash-table-control-block.  You then use
7433965Sjdp *  this address as a handle of the symbol table by passing it to all
7533965Sjdp *  the other hash_...() functions.  The only approved way to recover
7633965Sjdp *  the memory used by the symbol table is to call hash_die() with the
7733965Sjdp *  handle of the symbol table.
7833965Sjdp *
7933965Sjdp *  Before you call hash_die() you normally delete anything pointed to
8033965Sjdp *  by individual symbols. After hash_die() you can't use that symbol
8133965Sjdp *  table again.
8233965Sjdp *
8333965Sjdp *  The char* you associate with a symbol may not be NULL (0) because
8433965Sjdp *  NULL is returned whenever a symbol is not in the table. Any other
8533965Sjdp *  value is OK, except DELETED, #defined below.
8633965Sjdp *
8733965Sjdp *  When you supply a symbol string for insertion, YOU MUST PRESERVE THE
8833965Sjdp *  STRING until that symbol is deleted from the table. The reason is that
8933965Sjdp *  only the address you supply, NOT the symbol string itself, is stored
9033965Sjdp *  in the symbol table.
9133965Sjdp *
9233965Sjdp *  You may delete and add symbols arbitrarily.
9333965Sjdp *  Any or all symbols may have the same 'value' (char *). In fact, these
9433965Sjdp *  routines don't do anything with your symbol values.
9533965Sjdp *
9633965Sjdp *  You have no right to know where the symbol:char* mapping is stored,
9733965Sjdp *  because it moves around in memory; also because we may change how it
9833965Sjdp *  works and we don't want to break your code do we? However the handle
9933965Sjdp *  (address of struct hash_control) is never changed in
10033965Sjdp *  the life of the symbol table.
10133965Sjdp *
10233965Sjdp *  What you CAN find out about a symbol table is:
10333965Sjdp *    how many slots are in the hash table?
10433965Sjdp *    how many slots are filled with symbols?
10533965Sjdp *    (total hashes,collisions) for (reads,writes) (*)
10633965Sjdp *  All of the above values vary in time.
10733965Sjdp *  (*) some of these numbers will not be meaningful if we change the
10833965Sjdp *  internals. */
10933965Sjdp
11033965Sjdp/*
11133965Sjdp *  I N T E R N A L
11233965Sjdp *
11333965Sjdp *  Hash table is an array of hash_entries; each entry is a pointer to a
11433965Sjdp *  a string and a user-supplied value 1 char* wide.
11533965Sjdp *
11633965Sjdp *  The array always has 2 ** n elements, n>0, n integer.
11733965Sjdp *  There is also a 'wall' entry after the array, which is always empty
11833965Sjdp *  and acts as a sentinel to stop running off the end of the array.
11933965Sjdp *  When the array gets too full, we create a new array twice as large
12033965Sjdp *  and re-hash the symbols into the new array, then forget the old array.
12133965Sjdp *  (Of course, we copy the values into the new array before we junk the
12233965Sjdp *  old array!)
12333965Sjdp *
12433965Sjdp */
12533965Sjdp
12633965Sjdp#include <stdio.h>
12733965Sjdp
12833965Sjdp#ifndef FALSE
12933965Sjdp#define FALSE	(0)
13033965Sjdp#define TRUE	(!FALSE)
13133965Sjdp#endif /* no FALSE yet */
13233965Sjdp
13333965Sjdp#include <ctype.h>
13433965Sjdp#define min(a, b)	((a) < (b) ? (a) : (b))
13533965Sjdp
13633965Sjdp#include "as.h"
13733965Sjdp
13833965Sjdp#define error	as_fatal
13933965Sjdp
14033965Sjdpstatic char _deleted_[1];
14133965Sjdp#define DELETED     ((PTR)_deleted_)	/* guarenteed unique address */
14233965Sjdp#define START_POWER    (10)	/* power of two: size of new hash table */
14333965Sjdp
14433965Sjdp/* TRUE if a symbol is in entry @ ptr.  */
14533965Sjdp#define islive(ptr) (ptr->hash_string && ptr->hash_string!=DELETED)
14633965Sjdp
14733965Sjdpenum stat_enum {
14833965Sjdp  /* Number of slots in hash table.  The wall does not count here.
14933965Sjdp     We expect this is always a power of 2.  */
15033965Sjdp  STAT_SIZE = 0,
15133965Sjdp  /* Number of hash_ask calls.  */
15233965Sjdp  STAT_ACCESS,
15333965Sjdp  STAT_ACCESS_w,
15433965Sjdp  /* Number of collisions (total).  This may exceed STAT_ACCESS if we
15533965Sjdp     have lots of collisions/access.  */
15633965Sjdp  STAT_COLLIDE,
15733965Sjdp  STAT_COLLIDE_w,
15833965Sjdp  /* Slots used right now.  */
15933965Sjdp  STAT_USED,
16033965Sjdp  /* How many string compares?  */
16133965Sjdp  STAT_STRCMP,
16233965Sjdp  STAT_STRCMP_w,
16333965Sjdp  /* Size of statistics block... this must be last.  */
16433965Sjdp  STATLENGTH
16533965Sjdp};
16633965Sjdp#define STAT__READ     (0)	/* reading */
16733965Sjdp#define STAT__WRITE    (1)	/* writing */
16833965Sjdp
16933965Sjdp/* When we grow a hash table, by what power of two do we increase it?  */
17033965Sjdp#define GROW_FACTOR 1
17133965Sjdp/* When should we grow it?  */
17233965Sjdp#define FULL_VALUE(N)	((N) / 2)
17333965Sjdp
17433965Sjdp/* #define SUSPECT to do runtime checks */
17533965Sjdp/* #define TEST to be a test jig for hash...() */
17633965Sjdp
17733965Sjdp#ifdef TEST
17833965Sjdp/* TEST: use smaller hash table */
17933965Sjdp#undef  START_POWER
18033965Sjdp#define START_POWER (3)
18133965Sjdp#undef  START_SIZE
18233965Sjdp#define START_SIZE  (8)
18333965Sjdp#undef  START_FULL
18433965Sjdp#define START_FULL  (4)
18533965Sjdp#endif
18633965Sjdp
18733965Sjdpstruct hash_entry
18833965Sjdp{
18933965Sjdp  const char *hash_string;	/* points to where the symbol string is */
19033965Sjdp  /* NULL means slot is not used */
19133965Sjdp  /* DELETED means slot was deleted */
19233965Sjdp  PTR hash_value;		/* user's datum, associated with symbol */
19333965Sjdp  unsigned long h;
19433965Sjdp};
19533965Sjdp
19633965Sjdpstruct hash_control {
19733965Sjdp  struct hash_entry *hash_where;/* address of hash table */
19833965Sjdp  int hash_sizelog;		/* Log of ( hash_mask + 1 ) */
19933965Sjdp  int hash_mask;		/* masks a hash into index into table */
20033965Sjdp  int hash_full;		/* when hash_stat[STAT_USED] exceeds this, */
20133965Sjdp  /* grow table */
20233965Sjdp  struct hash_entry *hash_wall;	/* point just after last (usable) entry */
20333965Sjdp  /* here we have some statistics */
20433965Sjdp  int hash_stat[STATLENGTH];	/* lies & statistics */
20533965Sjdp};
20633965Sjdp
20733965Sjdp/*------------------ plan ---------------------------------- i = internal
20833965Sjdp
20933965Sjdp  struct hash_control * c;
21033965Sjdp  struct hash_entry   * e;                                                    i
21133965Sjdp  int                   b[z];     buffer for statistics
21233965Sjdp  z         size of b
21333965Sjdp  char                * s;        symbol string (address) [ key ]
21433965Sjdp  char                * v;        value string (address)  [datum]
21533965Sjdp  boolean               f;        TRUE if we found s in hash table            i
21633965Sjdp  char                * t;        error string; 0 means OK
21733965Sjdp  int                   a;        access type [0...n)                         i
21833965Sjdp
21933965Sjdp  c=hash_new       ()             create new hash_control
22033965Sjdp
22133965Sjdp  hash_die         (c)            destroy hash_control (and hash table)
22233965Sjdp  table should be empty.
22333965Sjdp  doesn't check if table is empty.
22433965Sjdp  c has no meaning after this.
22533965Sjdp
22633965Sjdp  hash_say         (c,b,z)        report statistics of hash_control.
22733965Sjdp  also report number of available statistics.
22833965Sjdp
22933965Sjdp  v=hash_delete    (c,s)          delete symbol, return old value if any.
23033965Sjdp  ask()                       NULL means no old value.
23133965Sjdp  f
23233965Sjdp
23333965Sjdp  v=hash_replace   (c,s,v)        replace old value of s with v.
23433965Sjdp  ask()                       NULL means no old value: no table change.
23533965Sjdp  f
23633965Sjdp
23733965Sjdp  t=hash_insert    (c,s,v)        insert (s,v) in c.
23833965Sjdp  ask()                       return error string.
23933965Sjdp  f                           it is an error to insert if s is already
24033965Sjdp  in table.
24133965Sjdp  if any error, c is unchanged.
24233965Sjdp
24333965Sjdp  t=hash_jam       (c,s,v)        assert that new value of s will be v.       i
24433965Sjdp  ask()                       it may decide to GROW the table.            i
24533965Sjdp  f                                                                       i
24633965Sjdp  grow()                                                                  i
24733965Sjdp  t=hash_grow      (c)            grow the hash table.                        i
24833965Sjdp  jam()                       will invoke JAM.                            i
24933965Sjdp
25033965Sjdp  ?=hash_apply     (c,y)          apply y() to every symbol in c.
25133965Sjdp  y                           evtries visited in 'unspecified' order.
25233965Sjdp
25333965Sjdp  v=hash_find      (c,s)          return value of s, or NULL if s not in c.
25433965Sjdp  ask()
25533965Sjdp  f
25633965Sjdp
25733965Sjdp  f,e=hash_ask()   (c,s,a)        return slot where s SHOULD live.            i
25833965Sjdp  code()                      maintain collision stats in c.              i
25933965Sjdp
26033965Sjdp  .=hash_code      (c,s)          compute hash-code for s,                    i
26133965Sjdp  from parameters of c.                       i
26233965Sjdp
26333965Sjdp  */
26433965Sjdp
26533965Sjdp/* Returned by hash_ask() to stop extra testing. hash_ask() wants to
26633965Sjdp   return both a slot and a status. This is the status.  TRUE: found
26733965Sjdp   symbol FALSE: absent: empty or deleted slot Also returned by
26833965Sjdp   hash_jam().  TRUE: we replaced a value FALSE: we inserted a value.  */
26933965Sjdpstatic char hash_found;
27033965Sjdp
27133965Sjdpstatic struct hash_entry *hash_ask PARAMS ((struct hash_control *,
27233965Sjdp					    const char *, int));
27333965Sjdpstatic int hash_code PARAMS ((struct hash_control *, const char *));
27433965Sjdpstatic const char *hash_grow PARAMS ((struct hash_control *));
27533965Sjdp
27633965Sjdp/* Create a new hash table.  Return NULL if failed; otherwise return handle
27733965Sjdp   (address of struct hash).  */
27833965Sjdpstruct hash_control *
27933965Sjdphash_new ()
28033965Sjdp{
28133965Sjdp  struct hash_control *retval;
28233965Sjdp  struct hash_entry *room;	/* points to hash table */
28333965Sjdp  struct hash_entry *wall;
28433965Sjdp  struct hash_entry *entry;
28533965Sjdp  int *ip;		/* scan stats block of struct hash_control */
28633965Sjdp  int *nd;		/* limit of stats block */
28733965Sjdp
28833965Sjdp  room = (struct hash_entry *) xmalloc (sizeof (struct hash_entry)
28933965Sjdp					/* +1 for the wall entry */
29033965Sjdp					* ((1 << START_POWER) + 1));
29133965Sjdp  retval = (struct hash_control *) xmalloc (sizeof (struct hash_control));
29233965Sjdp
29333965Sjdp  nd = retval->hash_stat + STATLENGTH;
29433965Sjdp  for (ip = retval->hash_stat; ip < nd; ip++)
29533965Sjdp    *ip = 0;
29633965Sjdp
29733965Sjdp  retval->hash_stat[STAT_SIZE] = 1 << START_POWER;
29833965Sjdp  retval->hash_mask = (1 << START_POWER) - 1;
29933965Sjdp  retval->hash_sizelog = START_POWER;
30033965Sjdp  /* works for 1's compl ok */
30133965Sjdp  retval->hash_where = room;
30233965Sjdp  retval->hash_wall =
30333965Sjdp    wall = room + (1 << START_POWER);
30433965Sjdp  retval->hash_full = FULL_VALUE (1 << START_POWER);
30533965Sjdp  for (entry = room; entry <= wall; entry++)
30633965Sjdp    entry->hash_string = NULL;
30733965Sjdp  return retval;
30833965Sjdp}
30933965Sjdp
31033965Sjdp/*
31133965Sjdp *           h a s h _ d i e ( )
31233965Sjdp *
31333965Sjdp * Table should be empty, but this is not checked.
31433965Sjdp * To empty the table, try hash_apply()ing a symbol deleter.
31533965Sjdp * Return to free memory both the hash table and it's control
31633965Sjdp * block.
31733965Sjdp * 'handle' has no meaning after this function.
31833965Sjdp * No errors are recoverable.
31933965Sjdp */
32033965Sjdpvoid
32133965Sjdphash_die (handle)
32233965Sjdp     struct hash_control *handle;
32333965Sjdp{
32433965Sjdp  free ((char *) handle->hash_where);
32533965Sjdp  free ((char *) handle);
32633965Sjdp}
32733965Sjdp
32833965Sjdp#ifdef TEST
32933965Sjdp/*
33033965Sjdp *           h a s h _ s a y ( )
33133965Sjdp *
33233965Sjdp * Return the size of the statistics table, and as many statistics as
33333965Sjdp * we can until either (a) we have run out of statistics or (b) caller
33433965Sjdp * has run out of buffer.
33533965Sjdp * NOTE: hash_say treats all statistics alike.
33633965Sjdp * These numbers may change with time, due to insertions, deletions
33733965Sjdp * and expansions of the table.
33833965Sjdp * The first "statistic" returned is the length of hash_stat[].
33933965Sjdp * Then contents of hash_stat[] are read out (in ascending order)
34033965Sjdp * until your buffer or hash_stat[] is exausted.
34133965Sjdp */
34233965Sjdpstatic void
34333965Sjdphash_say (handle, buffer, bufsiz)
34433965Sjdp     struct hash_control *handle;
34533965Sjdp     int buffer[ /*bufsiz*/ ];
34633965Sjdp     int bufsiz;
34733965Sjdp{
34833965Sjdp  int *nd;		/* limit of statistics block */
34933965Sjdp  int *ip;		/* scan statistics */
35033965Sjdp
35133965Sjdp  ip = handle->hash_stat;
35233965Sjdp  nd = ip + min (bufsiz - 1, STATLENGTH);
35333965Sjdp  if (bufsiz > 0)		/* trust nothing! bufsiz<=0 is dangerous */
35433965Sjdp    {
35533965Sjdp      *buffer++ = STATLENGTH;
35633965Sjdp      for (; ip < nd; ip++, buffer++)
35733965Sjdp	{
35833965Sjdp	  *buffer = *ip;
35933965Sjdp	}
36033965Sjdp    }
36133965Sjdp}
36233965Sjdp#endif
36333965Sjdp
36433965Sjdp/*
36533965Sjdp *           h a s h _ d e l e t e ( )
36633965Sjdp *
36733965Sjdp * Try to delete a symbol from the table.
36833965Sjdp * If it was there, return its value (and adjust STAT_USED).
36933965Sjdp * Otherwise, return NULL.
37033965Sjdp * Anyway, the symbol is not present after this function.
37133965Sjdp *
37233965Sjdp */
37333965SjdpPTR				/* NULL if string not in table, else */
37433965Sjdp/* returns value of deleted symbol */
37533965Sjdphash_delete (handle, string)
37633965Sjdp     struct hash_control *handle;
37733965Sjdp     const char *string;
37833965Sjdp{
37933965Sjdp  PTR retval;
38033965Sjdp  struct hash_entry *entry;
38133965Sjdp
38233965Sjdp  entry = hash_ask (handle, string, STAT__WRITE);
38333965Sjdp  if (hash_found)
38433965Sjdp    {
38533965Sjdp      retval = entry->hash_value;
38633965Sjdp      entry->hash_string = DELETED;
38733965Sjdp      handle->hash_stat[STAT_USED] -= 1;
38833965Sjdp#ifdef SUSPECT
38933965Sjdp      if (handle->hash_stat[STAT_USED] < 0)
39033965Sjdp	{
39133965Sjdp	  error ("hash_delete");
39233965Sjdp	}
39333965Sjdp#endif /* def SUSPECT */
39433965Sjdp    }
39533965Sjdp  else
39633965Sjdp    {
39733965Sjdp      retval = NULL;
39833965Sjdp    }
39933965Sjdp  return (retval);
40033965Sjdp}
40133965Sjdp
40233965Sjdp/*
40333965Sjdp *                   h a s h _ r e p l a c e ( )
40433965Sjdp *
40533965Sjdp * Try to replace the old value of a symbol with a new value.
40633965Sjdp * Normally return the old value.
40733965Sjdp * Return NULL and don't change the table if the symbol is not already
40833965Sjdp * in the table.
40933965Sjdp */
41033965SjdpPTR
41133965Sjdphash_replace (handle, string, value)
41233965Sjdp     struct hash_control *handle;
41333965Sjdp     const char *string;
41433965Sjdp     PTR value;
41533965Sjdp{
41633965Sjdp  struct hash_entry *entry;
41733965Sjdp  char *retval;
41833965Sjdp
41933965Sjdp  entry = hash_ask (handle, string, STAT__WRITE);
42033965Sjdp  if (hash_found)
42133965Sjdp    {
42233965Sjdp      retval = entry->hash_value;
42333965Sjdp      entry->hash_value = value;
42433965Sjdp    }
42533965Sjdp  else
42633965Sjdp    {
42733965Sjdp      retval = NULL;
42833965Sjdp    }
42933965Sjdp  ;
43033965Sjdp  return retval;
43133965Sjdp}
43233965Sjdp
43333965Sjdp/*
43433965Sjdp *                   h a s h _ i n s e r t ( )
43533965Sjdp *
43633965Sjdp * Insert a (symbol-string, value) into the hash table.
43733965Sjdp * Return an error string, 0 means OK.
43833965Sjdp * It is an 'error' to insert an existing symbol.
43933965Sjdp */
44033965Sjdp
44133965Sjdpconst char *			/* return error string */
44233965Sjdphash_insert (handle, string, value)
44333965Sjdp     struct hash_control *handle;
44433965Sjdp     const char *string;
44533965Sjdp     PTR value;
44633965Sjdp{
44733965Sjdp  struct hash_entry *entry;
44833965Sjdp  const char *retval;
44933965Sjdp
45033965Sjdp  retval = 0;
45133965Sjdp  if (handle->hash_stat[STAT_USED] > handle->hash_full)
45233965Sjdp    {
45333965Sjdp      retval = hash_grow (handle);
45433965Sjdp    }
45533965Sjdp  if (!retval)
45633965Sjdp    {
45733965Sjdp      entry = hash_ask (handle, string, STAT__WRITE);
45833965Sjdp      if (hash_found)
45933965Sjdp	{
46033965Sjdp	  retval = "exists";
46133965Sjdp	}
46233965Sjdp      else
46333965Sjdp	{
46433965Sjdp	  entry->hash_value = value;
46533965Sjdp	  entry->hash_string = string;
46633965Sjdp	  handle->hash_stat[STAT_USED] += 1;
46733965Sjdp	}
46833965Sjdp    }
46933965Sjdp  return retval;
47033965Sjdp}
47133965Sjdp
47233965Sjdp/*
47333965Sjdp *               h a s h _ j a m ( )
47433965Sjdp *
47533965Sjdp * Regardless of what was in the symbol table before, after hash_jam()
47633965Sjdp * the named symbol has the given value. The symbol is either inserted or
47733965Sjdp * (its value is) replaced.
47833965Sjdp * An error message string is returned, 0 means OK.
47933965Sjdp *
48033965Sjdp * WARNING: this may decide to grow the hashed symbol table.
48133965Sjdp * To do this, we call hash_grow(), WHICH WILL recursively CALL US.
48233965Sjdp *
48333965Sjdp * We report status internally: hash_found is TRUE if we replaced, but
48433965Sjdp * false if we inserted.
48533965Sjdp */
48633965Sjdpconst char *
48733965Sjdphash_jam (handle, string, value)
48833965Sjdp     struct hash_control *handle;
48933965Sjdp     const char *string;
49033965Sjdp     PTR value;
49133965Sjdp{
49233965Sjdp  const char *retval;
49333965Sjdp  struct hash_entry *entry;
49433965Sjdp
49533965Sjdp  retval = 0;
49633965Sjdp  if (handle->hash_stat[STAT_USED] > handle->hash_full)
49733965Sjdp    {
49833965Sjdp      retval = hash_grow (handle);
49933965Sjdp    }
50033965Sjdp  if (!retval)
50133965Sjdp    {
50233965Sjdp      entry = hash_ask (handle, string, STAT__WRITE);
50333965Sjdp      if (!hash_found)
50433965Sjdp	{
50533965Sjdp	  entry->hash_string = string;
50633965Sjdp	  handle->hash_stat[STAT_USED] += 1;
50733965Sjdp	}
50833965Sjdp      entry->hash_value = value;
50933965Sjdp    }
51033965Sjdp  return retval;
51133965Sjdp}
51233965Sjdp
51333965Sjdp/*
51433965Sjdp *               h a s h _ g r o w ( )
51533965Sjdp *
51633965Sjdp * Grow a new (bigger) hash table from the old one.
51733965Sjdp * We choose to double the hash table's size.
51833965Sjdp * Return a human-scrutible error string: 0 if OK.
51933965Sjdp * Warning! This uses hash_jam(), which had better not recurse
52033965Sjdp * back here! Hash_jam() conditionally calls us, but we ALWAYS
52133965Sjdp * call hash_jam()!
52233965Sjdp * Internal.
52333965Sjdp */
52433965Sjdpstatic const char *
52533965Sjdphash_grow (handle)		/* make a hash table grow */
52633965Sjdp     struct hash_control *handle;
52733965Sjdp{
52833965Sjdp  struct hash_entry *newwall;
52933965Sjdp  struct hash_entry *newwhere;
53033965Sjdp  struct hash_entry *newtrack;
53133965Sjdp  struct hash_entry *oldtrack;
53233965Sjdp  struct hash_entry *oldwhere;
53333965Sjdp  struct hash_entry *oldwall;
53433965Sjdp  int temp;
53533965Sjdp  int newsize;
53633965Sjdp  const char *string;
53733965Sjdp  const char *retval;
53833965Sjdp#ifdef SUSPECT
53933965Sjdp  int oldused;
54033965Sjdp#endif
54133965Sjdp
54233965Sjdp  /*
54333965Sjdp   * capture info about old hash table
54433965Sjdp   */
54533965Sjdp  oldwhere = handle->hash_where;
54633965Sjdp  oldwall = handle->hash_wall;
54733965Sjdp#ifdef SUSPECT
54833965Sjdp  oldused = handle->hash_stat[STAT_USED];
54933965Sjdp#endif
55033965Sjdp  /*
55133965Sjdp   * attempt to get enough room for a hash table twice as big
55233965Sjdp   */
55333965Sjdp  temp = handle->hash_stat[STAT_SIZE];
55433965Sjdp  newwhere = ((struct hash_entry *)
55533965Sjdp	      xmalloc ((unsigned long) ((temp << (GROW_FACTOR + 1))
55633965Sjdp					/* +1 for wall slot */
55733965Sjdp					* sizeof (struct hash_entry))));
55833965Sjdp  if (newwhere == NULL)
55933965Sjdp    return "no_room";
56033965Sjdp
56133965Sjdp  /*
56233965Sjdp   * have enough room: now we do all the work.
56333965Sjdp   * double the size of everything in handle.
56433965Sjdp   */
56533965Sjdp  handle->hash_mask = ((handle->hash_mask + 1) << GROW_FACTOR) - 1;
56633965Sjdp  handle->hash_stat[STAT_SIZE] <<= GROW_FACTOR;
56733965Sjdp  newsize = handle->hash_stat[STAT_SIZE];
56833965Sjdp  handle->hash_where = newwhere;
56933965Sjdp  handle->hash_full <<= GROW_FACTOR;
57033965Sjdp  handle->hash_sizelog += GROW_FACTOR;
57133965Sjdp  handle->hash_wall = newwall = newwhere + newsize;
57233965Sjdp  /* Set all those pesky new slots to vacant.  */
57333965Sjdp  for (newtrack = newwhere; newtrack <= newwall; newtrack++)
57433965Sjdp    newtrack->hash_string = NULL;
57533965Sjdp  /* We will do a scan of the old table, the hard way, using the
57633965Sjdp   * new control block to re-insert the data into new hash table.  */
57733965Sjdp  handle->hash_stat[STAT_USED] = 0;
57833965Sjdp  for (oldtrack = oldwhere; oldtrack < oldwall; oldtrack++)
57933965Sjdp    if (((string = oldtrack->hash_string) != NULL) && string != DELETED)
58033965Sjdp      if ((retval = hash_jam (handle, string, oldtrack->hash_value)))
58133965Sjdp	return retval;
58233965Sjdp
58333965Sjdp#ifdef SUSPECT
58433965Sjdp  if (handle->hash_stat[STAT_USED] != oldused)
58533965Sjdp    return "hash_used";
58633965Sjdp#endif
58733965Sjdp
58833965Sjdp  /* We have a completely faked up control block.
58933965Sjdp     Return the old hash table.  */
59033965Sjdp  free ((char *) oldwhere);
59133965Sjdp
59233965Sjdp  return 0;
59333965Sjdp}
59433965Sjdp
59533965Sjdp#ifdef TEST
59633965Sjdp/*
59733965Sjdp *          h a s h _ a p p l y ( )
59833965Sjdp *
59933965Sjdp * Use this to scan each entry in symbol table.
60033965Sjdp * For each symbol, this calls (applys) a nominated function supplying the
60133965Sjdp * symbol's value (and the symbol's name).
60233965Sjdp * The idea is you use this to destroy whatever is associted with
60333965Sjdp * any values in the table BEFORE you destroy the table with hash_die.
60433965Sjdp * Of course, you can use it for other jobs; whenever you need to
60533965Sjdp * visit all extant symbols in the table.
60633965Sjdp *
60733965Sjdp * We choose to have a call-you-back idea for two reasons:
60833965Sjdp *  asthetic: it is a neater idea to use apply than an explicit loop
60933965Sjdp *  sensible: if we ever had to grow the symbol table (due to insertions)
61033965Sjdp *            then we would lose our place in the table when we re-hashed
61133965Sjdp *            symbols into the new table in a different order.
61233965Sjdp *
61333965Sjdp * The order symbols are visited depends entirely on the hashing function.
61433965Sjdp * Whenever you insert a (symbol, value) you risk expanding the table. If
61533965Sjdp * you do expand the table, then the hashing function WILL change, so you
61633965Sjdp * MIGHT get a different order of symbols visited. In other words, if you
61733965Sjdp * want the same order of visiting symbols as the last time you used
61833965Sjdp * hash_apply() then you better not have done any hash_insert()s or
61933965Sjdp * hash_jam()s since the last time you used hash_apply().
62033965Sjdp *
62133965Sjdp * In future we may use the value returned by your nominated function.
62233965Sjdp * One idea is to abort the scan if, after applying the function to a
62333965Sjdp * certain node, the function returns a certain code.
62433965Sjdp *
62533965Sjdp * The function you supply should be of the form:
62633965Sjdp *      void myfunct(string,value)
62733965Sjdp *              char * string;        |* the symbol's name *|
62833965Sjdp *              char * value;         |* the symbol's value *|
62933965Sjdp *      {
63033965Sjdp *        |* ... *|
63133965Sjdp *      }
63233965Sjdp *
63333965Sjdp */
63433965Sjdpvoid
63533965Sjdphash_apply (handle, function)
63633965Sjdp     struct hash_control *handle;
63733965Sjdp     void (*function) ();
63833965Sjdp{
63933965Sjdp  struct hash_entry *entry;
64033965Sjdp  struct hash_entry *wall;
64133965Sjdp
64233965Sjdp  wall = handle->hash_wall;
64333965Sjdp  for (entry = handle->hash_where; entry < wall; entry++)
64433965Sjdp    {
64533965Sjdp      if (islive (entry))	/* silly code: tests entry->string twice! */
64633965Sjdp	{
64733965Sjdp	  (*function) (entry->hash_string, entry->hash_value);
64833965Sjdp	}
64933965Sjdp    }
65033965Sjdp}
65133965Sjdp#endif
65233965Sjdp
65333965Sjdp/*
65433965Sjdp *          h a s h _ f i n d ( )
65533965Sjdp *
65633965Sjdp * Given symbol string, find value (if any).
65733965Sjdp * Return found value or NULL.
65833965Sjdp */
65933965SjdpPTR
66033965Sjdphash_find (handle, string)
66133965Sjdp     struct hash_control *handle;
66233965Sjdp     const char *string;
66333965Sjdp{
66433965Sjdp  struct hash_entry *entry;
66533965Sjdp
66633965Sjdp  entry = hash_ask (handle, string, STAT__READ);
66733965Sjdp  if (hash_found)
66833965Sjdp    return entry->hash_value;
66933965Sjdp  else
67033965Sjdp    return NULL;
67133965Sjdp}
67233965Sjdp
67333965Sjdp/*
67433965Sjdp *          h a s h _ a s k ( )
67533965Sjdp *
67633965Sjdp * Searches for given symbol string.
67733965Sjdp * Return the slot where it OUGHT to live. It may be there.
67833965Sjdp * Return hash_found: TRUE only if symbol is in that slot.
67933965Sjdp * Access argument is to help keep statistics in control block.
68033965Sjdp * Internal.
68133965Sjdp */
68233965Sjdpstatic struct hash_entry *	/* string slot, may be empty or deleted */
68333965Sjdphash_ask (handle, string, access_type)
68433965Sjdp     struct hash_control *handle;
68533965Sjdp     const char *string;
68633965Sjdp     int access_type;
68733965Sjdp{
68833965Sjdp  const char *s;
68933965Sjdp  struct hash_entry *slot;
69033965Sjdp  int collision;	/* count collisions */
69133965Sjdp  int strcmps;
69233965Sjdp  int hcode;
69333965Sjdp
69433965Sjdp  /* start looking here */
69533965Sjdp  hcode = hash_code (handle, string);
69633965Sjdp  slot = handle->hash_where + (hcode & handle->hash_mask);
69733965Sjdp
69833965Sjdp  handle->hash_stat[STAT_ACCESS + access_type] += 1;
69933965Sjdp  collision = strcmps = 0;
70033965Sjdp  hash_found = FALSE;
70133965Sjdp  while (((s = slot->hash_string) != NULL) && s != DELETED)
70233965Sjdp    {
70333965Sjdp      if (string == s)
70433965Sjdp	{
70533965Sjdp	  hash_found = TRUE;
70633965Sjdp	  break;
70733965Sjdp	}
70833965Sjdp      if (slot->h == hcode)
70933965Sjdp	{
71033965Sjdp	  if (!strcmp (string, s))
71133965Sjdp	    {
71233965Sjdp	      hash_found = TRUE;
71333965Sjdp	      break;
71433965Sjdp	    }
71533965Sjdp	  strcmps++;
71633965Sjdp	}
71733965Sjdp      collision++;
71833965Sjdp      slot++;
71933965Sjdp    }
72033965Sjdp  /*
72133965Sjdp   * slot:                                                      return:
72233965Sjdp   *       in use:     we found string                           slot
72333965Sjdp   *       at empty:
72433965Sjdp   *                   at wall:        we fell off: wrap round   ????
72533965Sjdp   *                   in table:       dig here                  slot
72633965Sjdp   *       at DELETED: dig here                                  slot
72733965Sjdp   */
72833965Sjdp  if (slot == handle->hash_wall)
72933965Sjdp    {
73033965Sjdp      slot = handle->hash_where;/* now look again */
73133965Sjdp      while (((s = slot->hash_string) != NULL) && s != DELETED)
73233965Sjdp	{
73333965Sjdp	  if (string == s)
73433965Sjdp	    {
73533965Sjdp	      hash_found = TRUE;
73633965Sjdp	      break;
73733965Sjdp	    }
73833965Sjdp	  if (slot->h == hcode)
73933965Sjdp	    {
74033965Sjdp	      if (!strcmp (string, s))
74133965Sjdp		{
74233965Sjdp		  hash_found = TRUE;
74333965Sjdp		  break;
74433965Sjdp		}
74533965Sjdp	      strcmps++;
74633965Sjdp	    }
74733965Sjdp	  collision++;
74833965Sjdp	  slot++;
74933965Sjdp	}
75033965Sjdp      /*
75133965Sjdp       * slot:                                                   return:
75233965Sjdp       *       in use: we found it                                slot
75333965Sjdp       *       empty:  wall:         ERROR IMPOSSIBLE             !!!!
75433965Sjdp       *               in table:     dig here                     slot
75533965Sjdp       *       DELETED:dig here                                   slot
75633965Sjdp       */
75733965Sjdp    }
75833965Sjdp  handle->hash_stat[STAT_COLLIDE + access_type] += collision;
75933965Sjdp  handle->hash_stat[STAT_STRCMP + access_type] += strcmps;
76033965Sjdp  if (!hash_found)
76133965Sjdp    slot->h = hcode;
76233965Sjdp  return slot;			/* also return hash_found */
76333965Sjdp}
76433965Sjdp
76533965Sjdp/*
76633965Sjdp *           h a s h _ c o d e
76733965Sjdp *
76833965Sjdp * Does hashing of symbol string to hash number.
76933965Sjdp * Internal.
77033965Sjdp */
77133965Sjdpstatic int
77233965Sjdphash_code (handle, string)
77333965Sjdp     struct hash_control *handle;
77433965Sjdp     const char *string;
77533965Sjdp{
77633965Sjdp#if 1 /* There seems to be some interesting property of this function
77733965Sjdp	 that prevents the bfd version below from being an adequate
77833965Sjdp	 substitute.  @@ Figure out what this property is!  */
77933965Sjdp  long h;		/* hash code built here */
78033965Sjdp  long c;		/* each character lands here */
78133965Sjdp  int n;		/* Amount to shift h by */
78233965Sjdp
78333965Sjdp  n = (handle->hash_sizelog - 3);
78433965Sjdp  h = 0;
78533965Sjdp  while ((c = *string++) != 0)
78633965Sjdp    {
78733965Sjdp      h += c;
78833965Sjdp      h = (h << 3) + (h >> n) + c;
78933965Sjdp    }
79033965Sjdp  return h;
79133965Sjdp#else
79233965Sjdp  /* from bfd */
79333965Sjdp  unsigned long h = 0;
79433965Sjdp  unsigned int len = 0;
79533965Sjdp  unsigned int c;
79633965Sjdp
79733965Sjdp  while ((c = *string++) != 0)
79833965Sjdp    {
79933965Sjdp      h += c + (c << 17);
80033965Sjdp      h ^= h >> 2;
80133965Sjdp      ++len;
80233965Sjdp    }
80333965Sjdp  h += len + (len << 17);
80433965Sjdp  h ^= h >> 2;
80533965Sjdp  return h;
80633965Sjdp#endif
80733965Sjdp}
80833965Sjdp
80933965Sjdpvoid
81033965Sjdphash_print_statistics (file, name, h)
81133965Sjdp     FILE *file;
81233965Sjdp     const char *name;
81333965Sjdp     struct hash_control *h;
81433965Sjdp{
81533965Sjdp  unsigned long sz, used, pct;
81633965Sjdp
81733965Sjdp  if (h == 0)
81833965Sjdp    return;
81933965Sjdp
82033965Sjdp  sz = h->hash_stat[STAT_SIZE];
82133965Sjdp  used = h->hash_stat[STAT_USED];
82233965Sjdp  pct = (used * 100 + sz / 2) / sz;
82333965Sjdp
82433965Sjdp  fprintf (file, "%s hash statistics:\n\t%lu/%lu slots used (%lu%%)\n",
82533965Sjdp	   name, used, sz, pct);
82633965Sjdp
82733965Sjdp#define P(name, off)							\
82833965Sjdp  fprintf (file, "\t%-16s %6dr + %6dw = %7d\n", name,			\
82933965Sjdp	   h->hash_stat[off+STAT__READ],				\
83033965Sjdp	   h->hash_stat[off+STAT__WRITE],				\
83133965Sjdp	   h->hash_stat[off+STAT__READ] + h->hash_stat[off+STAT__WRITE])
83233965Sjdp
83333965Sjdp  P ("accesses:", STAT_ACCESS);
83433965Sjdp  P ("collisions:", STAT_COLLIDE);
83533965Sjdp  P ("string compares:", STAT_STRCMP);
83633965Sjdp
83733965Sjdp#undef P
83833965Sjdp}
83933965Sjdp
84033965Sjdp/*
84133965Sjdp * Here is a test program to exercise above.
84233965Sjdp */
84333965Sjdp#ifdef TEST
84433965Sjdp
84533965Sjdp#define TABLES (6)		/* number of hash tables to maintain */
84633965Sjdp/* (at once) in any testing */
84733965Sjdp#define STATBUFSIZE (12)	/* we can have 12 statistics */
84833965Sjdp
84933965Sjdpint statbuf[STATBUFSIZE];	/* display statistics here */
85033965Sjdpchar answer[100];		/* human farts here */
85133965Sjdpchar *hashtable[TABLES];	/* we test many hash tables at once */
85233965Sjdpchar *h;			/* points to curent hash_control */
85333965Sjdpchar **pp;
85433965Sjdpchar *p;
85533965Sjdpchar *name;
85633965Sjdpchar *value;
85733965Sjdpint size;
85833965Sjdpint used;
85933965Sjdpchar command;
86033965Sjdpint number;			/* number 0:TABLES-1 of current hashed */
86133965Sjdp/* symbol table */
86233965Sjdp
86333965Sjdpmain ()
86433965Sjdp{
86533965Sjdp  void applicatee ();
86633965Sjdp  void destroy ();
86733965Sjdp  char *what ();
86833965Sjdp  int *ip;
86933965Sjdp
87033965Sjdp  number = 0;
87133965Sjdp  h = 0;
87233965Sjdp  printf ("type h <RETURN> for help\n");
87333965Sjdp  for (;;)
87433965Sjdp    {
87533965Sjdp      printf ("hash_test command: ");
87633965Sjdp      gets (answer);
87733965Sjdp      command = answer[0];
87833965Sjdp      if (isupper (command))
87933965Sjdp	command = tolower (command);	/* ecch! */
88033965Sjdp      switch (command)
88133965Sjdp	{
88233965Sjdp	case '#':
88333965Sjdp	  printf ("old hash table #=%d.\n", number);
88433965Sjdp	  whattable ();
88533965Sjdp	  break;
88633965Sjdp	case '?':
88733965Sjdp	  for (pp = hashtable; pp < hashtable + TABLES; pp++)
88833965Sjdp	    {
88933965Sjdp	      printf ("address of hash table #%d control block is %xx\n"
89033965Sjdp		      ,pp - hashtable, *pp);
89133965Sjdp	    }
89233965Sjdp	  break;
89333965Sjdp	case 'a':
89433965Sjdp	  hash_apply (h, applicatee);
89533965Sjdp	  break;
89633965Sjdp	case 'd':
89733965Sjdp	  hash_apply (h, destroy);
89833965Sjdp	  hash_die (h);
89933965Sjdp	  break;
90033965Sjdp	case 'f':
90133965Sjdp	  p = hash_find (h, name = what ("symbol"));
90233965Sjdp	  printf ("value of \"%s\" is \"%s\"\n", name, p ? p : "NOT-PRESENT");
90333965Sjdp	  break;
90433965Sjdp	case 'h':
90533965Sjdp	  printf ("# show old, select new default hash table number\n");
90633965Sjdp	  printf ("? display all hashtable control block addresses\n");
90733965Sjdp	  printf ("a apply a simple display-er to each symbol in table\n");
90833965Sjdp	  printf ("d die: destroy hashtable\n");
90933965Sjdp	  printf ("f find value of nominated symbol\n");
91033965Sjdp	  printf ("h this help\n");
91133965Sjdp	  printf ("i insert value into symbol\n");
91233965Sjdp	  printf ("j jam value into symbol\n");
91333965Sjdp	  printf ("n new hashtable\n");
91433965Sjdp	  printf ("r replace a value with another\n");
91533965Sjdp	  printf ("s say what %% of table is used\n");
91633965Sjdp	  printf ("q exit this program\n");
91733965Sjdp	  printf ("x delete a symbol from table, report its value\n");
91833965Sjdp	  break;
91933965Sjdp	case 'i':
92033965Sjdp	  p = hash_insert (h, name = what ("symbol"), value = what ("value"));
92133965Sjdp	  if (p)
92233965Sjdp	    {
92333965Sjdp	      printf ("symbol=\"%s\"  value=\"%s\"  error=%s\n", name, value,
92433965Sjdp		      p);
92533965Sjdp	    }
92633965Sjdp	  break;
92733965Sjdp	case 'j':
92833965Sjdp	  p = hash_jam (h, name = what ("symbol"), value = what ("value"));
92933965Sjdp	  if (p)
93033965Sjdp	    {
93133965Sjdp	      printf ("symbol=\"%s\"  value=\"%s\"  error=%s\n", name, value, p);
93233965Sjdp	    }
93333965Sjdp	  break;
93433965Sjdp	case 'n':
93533965Sjdp	  h = hashtable[number] = (char *) hash_new ();
93633965Sjdp	  break;
93733965Sjdp	case 'q':
93833965Sjdp	  exit (EXIT_SUCCESS);
93933965Sjdp	case 'r':
94033965Sjdp	  p = hash_replace (h, name = what ("symbol"), value = what ("value"));
94133965Sjdp	  printf ("old value was \"%s\"\n", p ? p : "{}");
94233965Sjdp	  break;
94333965Sjdp	case 's':
94433965Sjdp	  hash_say (h, statbuf, STATBUFSIZE);
94533965Sjdp	  for (ip = statbuf; ip < statbuf + STATBUFSIZE; ip++)
94633965Sjdp	    {
94733965Sjdp	      printf ("%d ", *ip);
94833965Sjdp	    }
94933965Sjdp	  printf ("\n");
95033965Sjdp	  break;
95133965Sjdp	case 'x':
95233965Sjdp	  p = hash_delete (h, name = what ("symbol"));
95333965Sjdp	  printf ("old value was \"%s\"\n", p ? p : "{}");
95433965Sjdp	  break;
95533965Sjdp	default:
95633965Sjdp	  printf ("I can't understand command \"%c\"\n", command);
95733965Sjdp	  break;
95833965Sjdp	}
95933965Sjdp    }
96033965Sjdp}
96133965Sjdp
96233965Sjdpchar *
96333965Sjdpwhat (description)
96433965Sjdp     char *description;
96533965Sjdp{
96633965Sjdp  char *retval;
96733965Sjdp  char *malloc ();
96833965Sjdp
96933965Sjdp  printf ("   %s : ", description);
97033965Sjdp  gets (answer);
97133965Sjdp  /* will one day clean up answer here */
97233965Sjdp  retval = malloc (strlen (answer) + 1);
97333965Sjdp  if (!retval)
97433965Sjdp    {
97533965Sjdp      error ("room");
97633965Sjdp    }
97733965Sjdp  (void) strcpy (retval, answer);
97833965Sjdp  return (retval);
97933965Sjdp}
98033965Sjdp
98133965Sjdpvoid
98233965Sjdpdestroy (string, value)
98333965Sjdp     char *string;
98433965Sjdp     char *value;
98533965Sjdp{
98633965Sjdp  free (string);
98733965Sjdp  free (value);
98833965Sjdp}
98933965Sjdp
99033965Sjdp
99133965Sjdpvoid
99233965Sjdpapplicatee (string, value)
99333965Sjdp     char *string;
99433965Sjdp     char *value;
99533965Sjdp{
99633965Sjdp  printf ("%.20s-%.20s\n", string, value);
99733965Sjdp}
99833965Sjdp
99933965Sjdpwhattable ()			/* determine number: what hash table to use */
100033965Sjdp     /* also determine h: points to hash_control */
100133965Sjdp{
100233965Sjdp
100333965Sjdp  for (;;)
100433965Sjdp    {
100533965Sjdp      printf ("   what hash table (%d:%d) ?  ", 0, TABLES - 1);
100633965Sjdp      gets (answer);
100733965Sjdp      sscanf (answer, "%d", &number);
100833965Sjdp      if (number >= 0 && number < TABLES)
100933965Sjdp	{
101033965Sjdp	  h = hashtable[number];
101133965Sjdp	  if (!h)
101233965Sjdp	    {
101333965Sjdp	      printf ("warning: current hash-table-#%d. has no hash-control\n", number);
101433965Sjdp	    }
101533965Sjdp	  return;
101633965Sjdp	}
101733965Sjdp      else
101833965Sjdp	{
101933965Sjdp	  printf ("invalid hash table number: %d\n", number);
102033965Sjdp	}
102133965Sjdp    }
102233965Sjdp}
102333965Sjdp
102433965Sjdp
102533965Sjdp
102633965Sjdp#endif /* #ifdef TEST */
102733965Sjdp
102833965Sjdp/* end of hash.c */
1029