hash.c revision 33965
133965Sjdp/* hash.c - hash table lookup strings - 233965Sjdp Copyright (C) 1987, 90, 91, 92, 93, 94, 95, 1996 333965Sjdp Free Software Foundation, Inc. 433965Sjdp 533965Sjdp This file is part of GAS, the GNU Assembler. 633965Sjdp 733965Sjdp GAS is free software; you can redistribute it and/or modify 833965Sjdp it under the terms of the GNU General Public License as published by 933965Sjdp the Free Software Foundation; either version 2, or (at your option) 1033965Sjdp any later version. 1133965Sjdp 1233965Sjdp GAS is distributed in the hope that it will be useful, 1333965Sjdp but WITHOUT ANY WARRANTY; without even the implied warranty of 1433965Sjdp MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 1533965Sjdp GNU General Public License for more details. 1633965Sjdp 1733965Sjdp You should have received a copy of the GNU General Public License 1833965Sjdp along with GAS; see the file COPYING. If not, write to 1933965Sjdp the Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ 2033965Sjdp 2133965Sjdp/* 2233965Sjdp * BUGS, GRIPES, APOLOGIA etc. 2333965Sjdp * 2433965Sjdp * A typical user doesn't need ALL this: I intend to make a library out 2533965Sjdp * of it one day - Dean Elsner. 2633965Sjdp * Also, I want to change the definition of a symbol to (address,length) 2733965Sjdp * so I can put arbitrary binary in the names stored. [see hsh.c for that] 2833965Sjdp * 2933965Sjdp * This slime is common coupled inside the module. Com-coupling (and other 3033965Sjdp * vandalism) was done to speed running time. The interfaces at the 3133965Sjdp * module's edges are adequately clean. 3233965Sjdp * 3333965Sjdp * There is no way to (a) run a test script through this heap and (b) 3433965Sjdp * compare results with previous scripts, to see if we have broken any 3533965Sjdp * code. Use GNU (f)utilities to do this. A few commands assist test. 3633965Sjdp * The testing is awkward: it tries to be both batch & interactive. 3733965Sjdp * For now, interactive rules! 3833965Sjdp */ 3933965Sjdp 4033965Sjdp/* 4133965Sjdp * The idea is to implement a symbol table. A test jig is here. 4233965Sjdp * Symbols are arbitrary strings; they can't contain '\0'. 4333965Sjdp * [See hsh.c for a more general symbol flavour.] 4433965Sjdp * Each symbol is associated with a char*, which can point to anything 4533965Sjdp * you want, allowing an arbitrary property list for each symbol. 4633965Sjdp * 4733965Sjdp * The basic operations are: 4833965Sjdp * 4933965Sjdp * new creates symbol table, returns handle 5033965Sjdp * find (symbol) returns char* 5133965Sjdp * insert (symbol,char*) error if symbol already in table 5233965Sjdp * delete (symbol) returns char* if symbol was in table 5333965Sjdp * apply so you can delete all symbols before die() 5433965Sjdp * die destroy symbol table (free up memory) 5533965Sjdp * 5633965Sjdp * Supplementary functions include: 5733965Sjdp * 5833965Sjdp * say how big? what % full? 5933965Sjdp * replace (symbol,newval) report previous value 6033965Sjdp * jam (symbol,value) assert symbol:=value 6133965Sjdp * 6233965Sjdp * You, the caller, have control over errors: this just reports them. 6333965Sjdp * 6433965Sjdp * This package requires malloc(), free(). 6533965Sjdp * Malloc(size) returns NULL or address of char[size]. 6633965Sjdp * Free(address) frees same. 6733965Sjdp */ 6833965Sjdp 6933965Sjdp/* 7033965Sjdp * The code and its structures are re-enterent. 7133965Sjdp * 7233965Sjdp * Before you do anything else, you must call hash_new() which will 7333965Sjdp * return the address of a hash-table-control-block. You then use 7433965Sjdp * this address as a handle of the symbol table by passing it to all 7533965Sjdp * the other hash_...() functions. The only approved way to recover 7633965Sjdp * the memory used by the symbol table is to call hash_die() with the 7733965Sjdp * handle of the symbol table. 7833965Sjdp * 7933965Sjdp * Before you call hash_die() you normally delete anything pointed to 8033965Sjdp * by individual symbols. After hash_die() you can't use that symbol 8133965Sjdp * table again. 8233965Sjdp * 8333965Sjdp * The char* you associate with a symbol may not be NULL (0) because 8433965Sjdp * NULL is returned whenever a symbol is not in the table. Any other 8533965Sjdp * value is OK, except DELETED, #defined below. 8633965Sjdp * 8733965Sjdp * When you supply a symbol string for insertion, YOU MUST PRESERVE THE 8833965Sjdp * STRING until that symbol is deleted from the table. The reason is that 8933965Sjdp * only the address you supply, NOT the symbol string itself, is stored 9033965Sjdp * in the symbol table. 9133965Sjdp * 9233965Sjdp * You may delete and add symbols arbitrarily. 9333965Sjdp * Any or all symbols may have the same 'value' (char *). In fact, these 9433965Sjdp * routines don't do anything with your symbol values. 9533965Sjdp * 9633965Sjdp * You have no right to know where the symbol:char* mapping is stored, 9733965Sjdp * because it moves around in memory; also because we may change how it 9833965Sjdp * works and we don't want to break your code do we? However the handle 9933965Sjdp * (address of struct hash_control) is never changed in 10033965Sjdp * the life of the symbol table. 10133965Sjdp * 10233965Sjdp * What you CAN find out about a symbol table is: 10333965Sjdp * how many slots are in the hash table? 10433965Sjdp * how many slots are filled with symbols? 10533965Sjdp * (total hashes,collisions) for (reads,writes) (*) 10633965Sjdp * All of the above values vary in time. 10733965Sjdp * (*) some of these numbers will not be meaningful if we change the 10833965Sjdp * internals. */ 10933965Sjdp 11033965Sjdp/* 11133965Sjdp * I N T E R N A L 11233965Sjdp * 11333965Sjdp * Hash table is an array of hash_entries; each entry is a pointer to a 11433965Sjdp * a string and a user-supplied value 1 char* wide. 11533965Sjdp * 11633965Sjdp * The array always has 2 ** n elements, n>0, n integer. 11733965Sjdp * There is also a 'wall' entry after the array, which is always empty 11833965Sjdp * and acts as a sentinel to stop running off the end of the array. 11933965Sjdp * When the array gets too full, we create a new array twice as large 12033965Sjdp * and re-hash the symbols into the new array, then forget the old array. 12133965Sjdp * (Of course, we copy the values into the new array before we junk the 12233965Sjdp * old array!) 12333965Sjdp * 12433965Sjdp */ 12533965Sjdp 12633965Sjdp#include <stdio.h> 12733965Sjdp 12833965Sjdp#ifndef FALSE 12933965Sjdp#define FALSE (0) 13033965Sjdp#define TRUE (!FALSE) 13133965Sjdp#endif /* no FALSE yet */ 13233965Sjdp 13333965Sjdp#include <ctype.h> 13433965Sjdp#define min(a, b) ((a) < (b) ? (a) : (b)) 13533965Sjdp 13633965Sjdp#include "as.h" 13733965Sjdp 13833965Sjdp#define error as_fatal 13933965Sjdp 14033965Sjdpstatic char _deleted_[1]; 14133965Sjdp#define DELETED ((PTR)_deleted_) /* guarenteed unique address */ 14233965Sjdp#define START_POWER (10) /* power of two: size of new hash table */ 14333965Sjdp 14433965Sjdp/* TRUE if a symbol is in entry @ ptr. */ 14533965Sjdp#define islive(ptr) (ptr->hash_string && ptr->hash_string!=DELETED) 14633965Sjdp 14733965Sjdpenum stat_enum { 14833965Sjdp /* Number of slots in hash table. The wall does not count here. 14933965Sjdp We expect this is always a power of 2. */ 15033965Sjdp STAT_SIZE = 0, 15133965Sjdp /* Number of hash_ask calls. */ 15233965Sjdp STAT_ACCESS, 15333965Sjdp STAT_ACCESS_w, 15433965Sjdp /* Number of collisions (total). This may exceed STAT_ACCESS if we 15533965Sjdp have lots of collisions/access. */ 15633965Sjdp STAT_COLLIDE, 15733965Sjdp STAT_COLLIDE_w, 15833965Sjdp /* Slots used right now. */ 15933965Sjdp STAT_USED, 16033965Sjdp /* How many string compares? */ 16133965Sjdp STAT_STRCMP, 16233965Sjdp STAT_STRCMP_w, 16333965Sjdp /* Size of statistics block... this must be last. */ 16433965Sjdp STATLENGTH 16533965Sjdp}; 16633965Sjdp#define STAT__READ (0) /* reading */ 16733965Sjdp#define STAT__WRITE (1) /* writing */ 16833965Sjdp 16933965Sjdp/* When we grow a hash table, by what power of two do we increase it? */ 17033965Sjdp#define GROW_FACTOR 1 17133965Sjdp/* When should we grow it? */ 17233965Sjdp#define FULL_VALUE(N) ((N) / 2) 17333965Sjdp 17433965Sjdp/* #define SUSPECT to do runtime checks */ 17533965Sjdp/* #define TEST to be a test jig for hash...() */ 17633965Sjdp 17733965Sjdp#ifdef TEST 17833965Sjdp/* TEST: use smaller hash table */ 17933965Sjdp#undef START_POWER 18033965Sjdp#define START_POWER (3) 18133965Sjdp#undef START_SIZE 18233965Sjdp#define START_SIZE (8) 18333965Sjdp#undef START_FULL 18433965Sjdp#define START_FULL (4) 18533965Sjdp#endif 18633965Sjdp 18733965Sjdpstruct hash_entry 18833965Sjdp{ 18933965Sjdp const char *hash_string; /* points to where the symbol string is */ 19033965Sjdp /* NULL means slot is not used */ 19133965Sjdp /* DELETED means slot was deleted */ 19233965Sjdp PTR hash_value; /* user's datum, associated with symbol */ 19333965Sjdp unsigned long h; 19433965Sjdp}; 19533965Sjdp 19633965Sjdpstruct hash_control { 19733965Sjdp struct hash_entry *hash_where;/* address of hash table */ 19833965Sjdp int hash_sizelog; /* Log of ( hash_mask + 1 ) */ 19933965Sjdp int hash_mask; /* masks a hash into index into table */ 20033965Sjdp int hash_full; /* when hash_stat[STAT_USED] exceeds this, */ 20133965Sjdp /* grow table */ 20233965Sjdp struct hash_entry *hash_wall; /* point just after last (usable) entry */ 20333965Sjdp /* here we have some statistics */ 20433965Sjdp int hash_stat[STATLENGTH]; /* lies & statistics */ 20533965Sjdp}; 20633965Sjdp 20733965Sjdp/*------------------ plan ---------------------------------- i = internal 20833965Sjdp 20933965Sjdp struct hash_control * c; 21033965Sjdp struct hash_entry * e; i 21133965Sjdp int b[z]; buffer for statistics 21233965Sjdp z size of b 21333965Sjdp char * s; symbol string (address) [ key ] 21433965Sjdp char * v; value string (address) [datum] 21533965Sjdp boolean f; TRUE if we found s in hash table i 21633965Sjdp char * t; error string; 0 means OK 21733965Sjdp int a; access type [0...n) i 21833965Sjdp 21933965Sjdp c=hash_new () create new hash_control 22033965Sjdp 22133965Sjdp hash_die (c) destroy hash_control (and hash table) 22233965Sjdp table should be empty. 22333965Sjdp doesn't check if table is empty. 22433965Sjdp c has no meaning after this. 22533965Sjdp 22633965Sjdp hash_say (c,b,z) report statistics of hash_control. 22733965Sjdp also report number of available statistics. 22833965Sjdp 22933965Sjdp v=hash_delete (c,s) delete symbol, return old value if any. 23033965Sjdp ask() NULL means no old value. 23133965Sjdp f 23233965Sjdp 23333965Sjdp v=hash_replace (c,s,v) replace old value of s with v. 23433965Sjdp ask() NULL means no old value: no table change. 23533965Sjdp f 23633965Sjdp 23733965Sjdp t=hash_insert (c,s,v) insert (s,v) in c. 23833965Sjdp ask() return error string. 23933965Sjdp f it is an error to insert if s is already 24033965Sjdp in table. 24133965Sjdp if any error, c is unchanged. 24233965Sjdp 24333965Sjdp t=hash_jam (c,s,v) assert that new value of s will be v. i 24433965Sjdp ask() it may decide to GROW the table. i 24533965Sjdp f i 24633965Sjdp grow() i 24733965Sjdp t=hash_grow (c) grow the hash table. i 24833965Sjdp jam() will invoke JAM. i 24933965Sjdp 25033965Sjdp ?=hash_apply (c,y) apply y() to every symbol in c. 25133965Sjdp y evtries visited in 'unspecified' order. 25233965Sjdp 25333965Sjdp v=hash_find (c,s) return value of s, or NULL if s not in c. 25433965Sjdp ask() 25533965Sjdp f 25633965Sjdp 25733965Sjdp f,e=hash_ask() (c,s,a) return slot where s SHOULD live. i 25833965Sjdp code() maintain collision stats in c. i 25933965Sjdp 26033965Sjdp .=hash_code (c,s) compute hash-code for s, i 26133965Sjdp from parameters of c. i 26233965Sjdp 26333965Sjdp */ 26433965Sjdp 26533965Sjdp/* Returned by hash_ask() to stop extra testing. hash_ask() wants to 26633965Sjdp return both a slot and a status. This is the status. TRUE: found 26733965Sjdp symbol FALSE: absent: empty or deleted slot Also returned by 26833965Sjdp hash_jam(). TRUE: we replaced a value FALSE: we inserted a value. */ 26933965Sjdpstatic char hash_found; 27033965Sjdp 27133965Sjdpstatic struct hash_entry *hash_ask PARAMS ((struct hash_control *, 27233965Sjdp const char *, int)); 27333965Sjdpstatic int hash_code PARAMS ((struct hash_control *, const char *)); 27433965Sjdpstatic const char *hash_grow PARAMS ((struct hash_control *)); 27533965Sjdp 27633965Sjdp/* Create a new hash table. Return NULL if failed; otherwise return handle 27733965Sjdp (address of struct hash). */ 27833965Sjdpstruct hash_control * 27933965Sjdphash_new () 28033965Sjdp{ 28133965Sjdp struct hash_control *retval; 28233965Sjdp struct hash_entry *room; /* points to hash table */ 28333965Sjdp struct hash_entry *wall; 28433965Sjdp struct hash_entry *entry; 28533965Sjdp int *ip; /* scan stats block of struct hash_control */ 28633965Sjdp int *nd; /* limit of stats block */ 28733965Sjdp 28833965Sjdp room = (struct hash_entry *) xmalloc (sizeof (struct hash_entry) 28933965Sjdp /* +1 for the wall entry */ 29033965Sjdp * ((1 << START_POWER) + 1)); 29133965Sjdp retval = (struct hash_control *) xmalloc (sizeof (struct hash_control)); 29233965Sjdp 29333965Sjdp nd = retval->hash_stat + STATLENGTH; 29433965Sjdp for (ip = retval->hash_stat; ip < nd; ip++) 29533965Sjdp *ip = 0; 29633965Sjdp 29733965Sjdp retval->hash_stat[STAT_SIZE] = 1 << START_POWER; 29833965Sjdp retval->hash_mask = (1 << START_POWER) - 1; 29933965Sjdp retval->hash_sizelog = START_POWER; 30033965Sjdp /* works for 1's compl ok */ 30133965Sjdp retval->hash_where = room; 30233965Sjdp retval->hash_wall = 30333965Sjdp wall = room + (1 << START_POWER); 30433965Sjdp retval->hash_full = FULL_VALUE (1 << START_POWER); 30533965Sjdp for (entry = room; entry <= wall; entry++) 30633965Sjdp entry->hash_string = NULL; 30733965Sjdp return retval; 30833965Sjdp} 30933965Sjdp 31033965Sjdp/* 31133965Sjdp * h a s h _ d i e ( ) 31233965Sjdp * 31333965Sjdp * Table should be empty, but this is not checked. 31433965Sjdp * To empty the table, try hash_apply()ing a symbol deleter. 31533965Sjdp * Return to free memory both the hash table and it's control 31633965Sjdp * block. 31733965Sjdp * 'handle' has no meaning after this function. 31833965Sjdp * No errors are recoverable. 31933965Sjdp */ 32033965Sjdpvoid 32133965Sjdphash_die (handle) 32233965Sjdp struct hash_control *handle; 32333965Sjdp{ 32433965Sjdp free ((char *) handle->hash_where); 32533965Sjdp free ((char *) handle); 32633965Sjdp} 32733965Sjdp 32833965Sjdp#ifdef TEST 32933965Sjdp/* 33033965Sjdp * h a s h _ s a y ( ) 33133965Sjdp * 33233965Sjdp * Return the size of the statistics table, and as many statistics as 33333965Sjdp * we can until either (a) we have run out of statistics or (b) caller 33433965Sjdp * has run out of buffer. 33533965Sjdp * NOTE: hash_say treats all statistics alike. 33633965Sjdp * These numbers may change with time, due to insertions, deletions 33733965Sjdp * and expansions of the table. 33833965Sjdp * The first "statistic" returned is the length of hash_stat[]. 33933965Sjdp * Then contents of hash_stat[] are read out (in ascending order) 34033965Sjdp * until your buffer or hash_stat[] is exausted. 34133965Sjdp */ 34233965Sjdpstatic void 34333965Sjdphash_say (handle, buffer, bufsiz) 34433965Sjdp struct hash_control *handle; 34533965Sjdp int buffer[ /*bufsiz*/ ]; 34633965Sjdp int bufsiz; 34733965Sjdp{ 34833965Sjdp int *nd; /* limit of statistics block */ 34933965Sjdp int *ip; /* scan statistics */ 35033965Sjdp 35133965Sjdp ip = handle->hash_stat; 35233965Sjdp nd = ip + min (bufsiz - 1, STATLENGTH); 35333965Sjdp if (bufsiz > 0) /* trust nothing! bufsiz<=0 is dangerous */ 35433965Sjdp { 35533965Sjdp *buffer++ = STATLENGTH; 35633965Sjdp for (; ip < nd; ip++, buffer++) 35733965Sjdp { 35833965Sjdp *buffer = *ip; 35933965Sjdp } 36033965Sjdp } 36133965Sjdp} 36233965Sjdp#endif 36333965Sjdp 36433965Sjdp/* 36533965Sjdp * h a s h _ d e l e t e ( ) 36633965Sjdp * 36733965Sjdp * Try to delete a symbol from the table. 36833965Sjdp * If it was there, return its value (and adjust STAT_USED). 36933965Sjdp * Otherwise, return NULL. 37033965Sjdp * Anyway, the symbol is not present after this function. 37133965Sjdp * 37233965Sjdp */ 37333965SjdpPTR /* NULL if string not in table, else */ 37433965Sjdp/* returns value of deleted symbol */ 37533965Sjdphash_delete (handle, string) 37633965Sjdp struct hash_control *handle; 37733965Sjdp const char *string; 37833965Sjdp{ 37933965Sjdp PTR retval; 38033965Sjdp struct hash_entry *entry; 38133965Sjdp 38233965Sjdp entry = hash_ask (handle, string, STAT__WRITE); 38333965Sjdp if (hash_found) 38433965Sjdp { 38533965Sjdp retval = entry->hash_value; 38633965Sjdp entry->hash_string = DELETED; 38733965Sjdp handle->hash_stat[STAT_USED] -= 1; 38833965Sjdp#ifdef SUSPECT 38933965Sjdp if (handle->hash_stat[STAT_USED] < 0) 39033965Sjdp { 39133965Sjdp error ("hash_delete"); 39233965Sjdp } 39333965Sjdp#endif /* def SUSPECT */ 39433965Sjdp } 39533965Sjdp else 39633965Sjdp { 39733965Sjdp retval = NULL; 39833965Sjdp } 39933965Sjdp return (retval); 40033965Sjdp} 40133965Sjdp 40233965Sjdp/* 40333965Sjdp * h a s h _ r e p l a c e ( ) 40433965Sjdp * 40533965Sjdp * Try to replace the old value of a symbol with a new value. 40633965Sjdp * Normally return the old value. 40733965Sjdp * Return NULL and don't change the table if the symbol is not already 40833965Sjdp * in the table. 40933965Sjdp */ 41033965SjdpPTR 41133965Sjdphash_replace (handle, string, value) 41233965Sjdp struct hash_control *handle; 41333965Sjdp const char *string; 41433965Sjdp PTR value; 41533965Sjdp{ 41633965Sjdp struct hash_entry *entry; 41733965Sjdp char *retval; 41833965Sjdp 41933965Sjdp entry = hash_ask (handle, string, STAT__WRITE); 42033965Sjdp if (hash_found) 42133965Sjdp { 42233965Sjdp retval = entry->hash_value; 42333965Sjdp entry->hash_value = value; 42433965Sjdp } 42533965Sjdp else 42633965Sjdp { 42733965Sjdp retval = NULL; 42833965Sjdp } 42933965Sjdp ; 43033965Sjdp return retval; 43133965Sjdp} 43233965Sjdp 43333965Sjdp/* 43433965Sjdp * h a s h _ i n s e r t ( ) 43533965Sjdp * 43633965Sjdp * Insert a (symbol-string, value) into the hash table. 43733965Sjdp * Return an error string, 0 means OK. 43833965Sjdp * It is an 'error' to insert an existing symbol. 43933965Sjdp */ 44033965Sjdp 44133965Sjdpconst char * /* return error string */ 44233965Sjdphash_insert (handle, string, value) 44333965Sjdp struct hash_control *handle; 44433965Sjdp const char *string; 44533965Sjdp PTR value; 44633965Sjdp{ 44733965Sjdp struct hash_entry *entry; 44833965Sjdp const char *retval; 44933965Sjdp 45033965Sjdp retval = 0; 45133965Sjdp if (handle->hash_stat[STAT_USED] > handle->hash_full) 45233965Sjdp { 45333965Sjdp retval = hash_grow (handle); 45433965Sjdp } 45533965Sjdp if (!retval) 45633965Sjdp { 45733965Sjdp entry = hash_ask (handle, string, STAT__WRITE); 45833965Sjdp if (hash_found) 45933965Sjdp { 46033965Sjdp retval = "exists"; 46133965Sjdp } 46233965Sjdp else 46333965Sjdp { 46433965Sjdp entry->hash_value = value; 46533965Sjdp entry->hash_string = string; 46633965Sjdp handle->hash_stat[STAT_USED] += 1; 46733965Sjdp } 46833965Sjdp } 46933965Sjdp return retval; 47033965Sjdp} 47133965Sjdp 47233965Sjdp/* 47333965Sjdp * h a s h _ j a m ( ) 47433965Sjdp * 47533965Sjdp * Regardless of what was in the symbol table before, after hash_jam() 47633965Sjdp * the named symbol has the given value. The symbol is either inserted or 47733965Sjdp * (its value is) replaced. 47833965Sjdp * An error message string is returned, 0 means OK. 47933965Sjdp * 48033965Sjdp * WARNING: this may decide to grow the hashed symbol table. 48133965Sjdp * To do this, we call hash_grow(), WHICH WILL recursively CALL US. 48233965Sjdp * 48333965Sjdp * We report status internally: hash_found is TRUE if we replaced, but 48433965Sjdp * false if we inserted. 48533965Sjdp */ 48633965Sjdpconst char * 48733965Sjdphash_jam (handle, string, value) 48833965Sjdp struct hash_control *handle; 48933965Sjdp const char *string; 49033965Sjdp PTR value; 49133965Sjdp{ 49233965Sjdp const char *retval; 49333965Sjdp struct hash_entry *entry; 49433965Sjdp 49533965Sjdp retval = 0; 49633965Sjdp if (handle->hash_stat[STAT_USED] > handle->hash_full) 49733965Sjdp { 49833965Sjdp retval = hash_grow (handle); 49933965Sjdp } 50033965Sjdp if (!retval) 50133965Sjdp { 50233965Sjdp entry = hash_ask (handle, string, STAT__WRITE); 50333965Sjdp if (!hash_found) 50433965Sjdp { 50533965Sjdp entry->hash_string = string; 50633965Sjdp handle->hash_stat[STAT_USED] += 1; 50733965Sjdp } 50833965Sjdp entry->hash_value = value; 50933965Sjdp } 51033965Sjdp return retval; 51133965Sjdp} 51233965Sjdp 51333965Sjdp/* 51433965Sjdp * h a s h _ g r o w ( ) 51533965Sjdp * 51633965Sjdp * Grow a new (bigger) hash table from the old one. 51733965Sjdp * We choose to double the hash table's size. 51833965Sjdp * Return a human-scrutible error string: 0 if OK. 51933965Sjdp * Warning! This uses hash_jam(), which had better not recurse 52033965Sjdp * back here! Hash_jam() conditionally calls us, but we ALWAYS 52133965Sjdp * call hash_jam()! 52233965Sjdp * Internal. 52333965Sjdp */ 52433965Sjdpstatic const char * 52533965Sjdphash_grow (handle) /* make a hash table grow */ 52633965Sjdp struct hash_control *handle; 52733965Sjdp{ 52833965Sjdp struct hash_entry *newwall; 52933965Sjdp struct hash_entry *newwhere; 53033965Sjdp struct hash_entry *newtrack; 53133965Sjdp struct hash_entry *oldtrack; 53233965Sjdp struct hash_entry *oldwhere; 53333965Sjdp struct hash_entry *oldwall; 53433965Sjdp int temp; 53533965Sjdp int newsize; 53633965Sjdp const char *string; 53733965Sjdp const char *retval; 53833965Sjdp#ifdef SUSPECT 53933965Sjdp int oldused; 54033965Sjdp#endif 54133965Sjdp 54233965Sjdp /* 54333965Sjdp * capture info about old hash table 54433965Sjdp */ 54533965Sjdp oldwhere = handle->hash_where; 54633965Sjdp oldwall = handle->hash_wall; 54733965Sjdp#ifdef SUSPECT 54833965Sjdp oldused = handle->hash_stat[STAT_USED]; 54933965Sjdp#endif 55033965Sjdp /* 55133965Sjdp * attempt to get enough room for a hash table twice as big 55233965Sjdp */ 55333965Sjdp temp = handle->hash_stat[STAT_SIZE]; 55433965Sjdp newwhere = ((struct hash_entry *) 55533965Sjdp xmalloc ((unsigned long) ((temp << (GROW_FACTOR + 1)) 55633965Sjdp /* +1 for wall slot */ 55733965Sjdp * sizeof (struct hash_entry)))); 55833965Sjdp if (newwhere == NULL) 55933965Sjdp return "no_room"; 56033965Sjdp 56133965Sjdp /* 56233965Sjdp * have enough room: now we do all the work. 56333965Sjdp * double the size of everything in handle. 56433965Sjdp */ 56533965Sjdp handle->hash_mask = ((handle->hash_mask + 1) << GROW_FACTOR) - 1; 56633965Sjdp handle->hash_stat[STAT_SIZE] <<= GROW_FACTOR; 56733965Sjdp newsize = handle->hash_stat[STAT_SIZE]; 56833965Sjdp handle->hash_where = newwhere; 56933965Sjdp handle->hash_full <<= GROW_FACTOR; 57033965Sjdp handle->hash_sizelog += GROW_FACTOR; 57133965Sjdp handle->hash_wall = newwall = newwhere + newsize; 57233965Sjdp /* Set all those pesky new slots to vacant. */ 57333965Sjdp for (newtrack = newwhere; newtrack <= newwall; newtrack++) 57433965Sjdp newtrack->hash_string = NULL; 57533965Sjdp /* We will do a scan of the old table, the hard way, using the 57633965Sjdp * new control block to re-insert the data into new hash table. */ 57733965Sjdp handle->hash_stat[STAT_USED] = 0; 57833965Sjdp for (oldtrack = oldwhere; oldtrack < oldwall; oldtrack++) 57933965Sjdp if (((string = oldtrack->hash_string) != NULL) && string != DELETED) 58033965Sjdp if ((retval = hash_jam (handle, string, oldtrack->hash_value))) 58133965Sjdp return retval; 58233965Sjdp 58333965Sjdp#ifdef SUSPECT 58433965Sjdp if (handle->hash_stat[STAT_USED] != oldused) 58533965Sjdp return "hash_used"; 58633965Sjdp#endif 58733965Sjdp 58833965Sjdp /* We have a completely faked up control block. 58933965Sjdp Return the old hash table. */ 59033965Sjdp free ((char *) oldwhere); 59133965Sjdp 59233965Sjdp return 0; 59333965Sjdp} 59433965Sjdp 59533965Sjdp#ifdef TEST 59633965Sjdp/* 59733965Sjdp * h a s h _ a p p l y ( ) 59833965Sjdp * 59933965Sjdp * Use this to scan each entry in symbol table. 60033965Sjdp * For each symbol, this calls (applys) a nominated function supplying the 60133965Sjdp * symbol's value (and the symbol's name). 60233965Sjdp * The idea is you use this to destroy whatever is associted with 60333965Sjdp * any values in the table BEFORE you destroy the table with hash_die. 60433965Sjdp * Of course, you can use it for other jobs; whenever you need to 60533965Sjdp * visit all extant symbols in the table. 60633965Sjdp * 60733965Sjdp * We choose to have a call-you-back idea for two reasons: 60833965Sjdp * asthetic: it is a neater idea to use apply than an explicit loop 60933965Sjdp * sensible: if we ever had to grow the symbol table (due to insertions) 61033965Sjdp * then we would lose our place in the table when we re-hashed 61133965Sjdp * symbols into the new table in a different order. 61233965Sjdp * 61333965Sjdp * The order symbols are visited depends entirely on the hashing function. 61433965Sjdp * Whenever you insert a (symbol, value) you risk expanding the table. If 61533965Sjdp * you do expand the table, then the hashing function WILL change, so you 61633965Sjdp * MIGHT get a different order of symbols visited. In other words, if you 61733965Sjdp * want the same order of visiting symbols as the last time you used 61833965Sjdp * hash_apply() then you better not have done any hash_insert()s or 61933965Sjdp * hash_jam()s since the last time you used hash_apply(). 62033965Sjdp * 62133965Sjdp * In future we may use the value returned by your nominated function. 62233965Sjdp * One idea is to abort the scan if, after applying the function to a 62333965Sjdp * certain node, the function returns a certain code. 62433965Sjdp * 62533965Sjdp * The function you supply should be of the form: 62633965Sjdp * void myfunct(string,value) 62733965Sjdp * char * string; |* the symbol's name *| 62833965Sjdp * char * value; |* the symbol's value *| 62933965Sjdp * { 63033965Sjdp * |* ... *| 63133965Sjdp * } 63233965Sjdp * 63333965Sjdp */ 63433965Sjdpvoid 63533965Sjdphash_apply (handle, function) 63633965Sjdp struct hash_control *handle; 63733965Sjdp void (*function) (); 63833965Sjdp{ 63933965Sjdp struct hash_entry *entry; 64033965Sjdp struct hash_entry *wall; 64133965Sjdp 64233965Sjdp wall = handle->hash_wall; 64333965Sjdp for (entry = handle->hash_where; entry < wall; entry++) 64433965Sjdp { 64533965Sjdp if (islive (entry)) /* silly code: tests entry->string twice! */ 64633965Sjdp { 64733965Sjdp (*function) (entry->hash_string, entry->hash_value); 64833965Sjdp } 64933965Sjdp } 65033965Sjdp} 65133965Sjdp#endif 65233965Sjdp 65333965Sjdp/* 65433965Sjdp * h a s h _ f i n d ( ) 65533965Sjdp * 65633965Sjdp * Given symbol string, find value (if any). 65733965Sjdp * Return found value or NULL. 65833965Sjdp */ 65933965SjdpPTR 66033965Sjdphash_find (handle, string) 66133965Sjdp struct hash_control *handle; 66233965Sjdp const char *string; 66333965Sjdp{ 66433965Sjdp struct hash_entry *entry; 66533965Sjdp 66633965Sjdp entry = hash_ask (handle, string, STAT__READ); 66733965Sjdp if (hash_found) 66833965Sjdp return entry->hash_value; 66933965Sjdp else 67033965Sjdp return NULL; 67133965Sjdp} 67233965Sjdp 67333965Sjdp/* 67433965Sjdp * h a s h _ a s k ( ) 67533965Sjdp * 67633965Sjdp * Searches for given symbol string. 67733965Sjdp * Return the slot where it OUGHT to live. It may be there. 67833965Sjdp * Return hash_found: TRUE only if symbol is in that slot. 67933965Sjdp * Access argument is to help keep statistics in control block. 68033965Sjdp * Internal. 68133965Sjdp */ 68233965Sjdpstatic struct hash_entry * /* string slot, may be empty or deleted */ 68333965Sjdphash_ask (handle, string, access_type) 68433965Sjdp struct hash_control *handle; 68533965Sjdp const char *string; 68633965Sjdp int access_type; 68733965Sjdp{ 68833965Sjdp const char *s; 68933965Sjdp struct hash_entry *slot; 69033965Sjdp int collision; /* count collisions */ 69133965Sjdp int strcmps; 69233965Sjdp int hcode; 69333965Sjdp 69433965Sjdp /* start looking here */ 69533965Sjdp hcode = hash_code (handle, string); 69633965Sjdp slot = handle->hash_where + (hcode & handle->hash_mask); 69733965Sjdp 69833965Sjdp handle->hash_stat[STAT_ACCESS + access_type] += 1; 69933965Sjdp collision = strcmps = 0; 70033965Sjdp hash_found = FALSE; 70133965Sjdp while (((s = slot->hash_string) != NULL) && s != DELETED) 70233965Sjdp { 70333965Sjdp if (string == s) 70433965Sjdp { 70533965Sjdp hash_found = TRUE; 70633965Sjdp break; 70733965Sjdp } 70833965Sjdp if (slot->h == hcode) 70933965Sjdp { 71033965Sjdp if (!strcmp (string, s)) 71133965Sjdp { 71233965Sjdp hash_found = TRUE; 71333965Sjdp break; 71433965Sjdp } 71533965Sjdp strcmps++; 71633965Sjdp } 71733965Sjdp collision++; 71833965Sjdp slot++; 71933965Sjdp } 72033965Sjdp /* 72133965Sjdp * slot: return: 72233965Sjdp * in use: we found string slot 72333965Sjdp * at empty: 72433965Sjdp * at wall: we fell off: wrap round ???? 72533965Sjdp * in table: dig here slot 72633965Sjdp * at DELETED: dig here slot 72733965Sjdp */ 72833965Sjdp if (slot == handle->hash_wall) 72933965Sjdp { 73033965Sjdp slot = handle->hash_where;/* now look again */ 73133965Sjdp while (((s = slot->hash_string) != NULL) && s != DELETED) 73233965Sjdp { 73333965Sjdp if (string == s) 73433965Sjdp { 73533965Sjdp hash_found = TRUE; 73633965Sjdp break; 73733965Sjdp } 73833965Sjdp if (slot->h == hcode) 73933965Sjdp { 74033965Sjdp if (!strcmp (string, s)) 74133965Sjdp { 74233965Sjdp hash_found = TRUE; 74333965Sjdp break; 74433965Sjdp } 74533965Sjdp strcmps++; 74633965Sjdp } 74733965Sjdp collision++; 74833965Sjdp slot++; 74933965Sjdp } 75033965Sjdp /* 75133965Sjdp * slot: return: 75233965Sjdp * in use: we found it slot 75333965Sjdp * empty: wall: ERROR IMPOSSIBLE !!!! 75433965Sjdp * in table: dig here slot 75533965Sjdp * DELETED:dig here slot 75633965Sjdp */ 75733965Sjdp } 75833965Sjdp handle->hash_stat[STAT_COLLIDE + access_type] += collision; 75933965Sjdp handle->hash_stat[STAT_STRCMP + access_type] += strcmps; 76033965Sjdp if (!hash_found) 76133965Sjdp slot->h = hcode; 76233965Sjdp return slot; /* also return hash_found */ 76333965Sjdp} 76433965Sjdp 76533965Sjdp/* 76633965Sjdp * h a s h _ c o d e 76733965Sjdp * 76833965Sjdp * Does hashing of symbol string to hash number. 76933965Sjdp * Internal. 77033965Sjdp */ 77133965Sjdpstatic int 77233965Sjdphash_code (handle, string) 77333965Sjdp struct hash_control *handle; 77433965Sjdp const char *string; 77533965Sjdp{ 77633965Sjdp#if 1 /* There seems to be some interesting property of this function 77733965Sjdp that prevents the bfd version below from being an adequate 77833965Sjdp substitute. @@ Figure out what this property is! */ 77933965Sjdp long h; /* hash code built here */ 78033965Sjdp long c; /* each character lands here */ 78133965Sjdp int n; /* Amount to shift h by */ 78233965Sjdp 78333965Sjdp n = (handle->hash_sizelog - 3); 78433965Sjdp h = 0; 78533965Sjdp while ((c = *string++) != 0) 78633965Sjdp { 78733965Sjdp h += c; 78833965Sjdp h = (h << 3) + (h >> n) + c; 78933965Sjdp } 79033965Sjdp return h; 79133965Sjdp#else 79233965Sjdp /* from bfd */ 79333965Sjdp unsigned long h = 0; 79433965Sjdp unsigned int len = 0; 79533965Sjdp unsigned int c; 79633965Sjdp 79733965Sjdp while ((c = *string++) != 0) 79833965Sjdp { 79933965Sjdp h += c + (c << 17); 80033965Sjdp h ^= h >> 2; 80133965Sjdp ++len; 80233965Sjdp } 80333965Sjdp h += len + (len << 17); 80433965Sjdp h ^= h >> 2; 80533965Sjdp return h; 80633965Sjdp#endif 80733965Sjdp} 80833965Sjdp 80933965Sjdpvoid 81033965Sjdphash_print_statistics (file, name, h) 81133965Sjdp FILE *file; 81233965Sjdp const char *name; 81333965Sjdp struct hash_control *h; 81433965Sjdp{ 81533965Sjdp unsigned long sz, used, pct; 81633965Sjdp 81733965Sjdp if (h == 0) 81833965Sjdp return; 81933965Sjdp 82033965Sjdp sz = h->hash_stat[STAT_SIZE]; 82133965Sjdp used = h->hash_stat[STAT_USED]; 82233965Sjdp pct = (used * 100 + sz / 2) / sz; 82333965Sjdp 82433965Sjdp fprintf (file, "%s hash statistics:\n\t%lu/%lu slots used (%lu%%)\n", 82533965Sjdp name, used, sz, pct); 82633965Sjdp 82733965Sjdp#define P(name, off) \ 82833965Sjdp fprintf (file, "\t%-16s %6dr + %6dw = %7d\n", name, \ 82933965Sjdp h->hash_stat[off+STAT__READ], \ 83033965Sjdp h->hash_stat[off+STAT__WRITE], \ 83133965Sjdp h->hash_stat[off+STAT__READ] + h->hash_stat[off+STAT__WRITE]) 83233965Sjdp 83333965Sjdp P ("accesses:", STAT_ACCESS); 83433965Sjdp P ("collisions:", STAT_COLLIDE); 83533965Sjdp P ("string compares:", STAT_STRCMP); 83633965Sjdp 83733965Sjdp#undef P 83833965Sjdp} 83933965Sjdp 84033965Sjdp/* 84133965Sjdp * Here is a test program to exercise above. 84233965Sjdp */ 84333965Sjdp#ifdef TEST 84433965Sjdp 84533965Sjdp#define TABLES (6) /* number of hash tables to maintain */ 84633965Sjdp/* (at once) in any testing */ 84733965Sjdp#define STATBUFSIZE (12) /* we can have 12 statistics */ 84833965Sjdp 84933965Sjdpint statbuf[STATBUFSIZE]; /* display statistics here */ 85033965Sjdpchar answer[100]; /* human farts here */ 85133965Sjdpchar *hashtable[TABLES]; /* we test many hash tables at once */ 85233965Sjdpchar *h; /* points to curent hash_control */ 85333965Sjdpchar **pp; 85433965Sjdpchar *p; 85533965Sjdpchar *name; 85633965Sjdpchar *value; 85733965Sjdpint size; 85833965Sjdpint used; 85933965Sjdpchar command; 86033965Sjdpint number; /* number 0:TABLES-1 of current hashed */ 86133965Sjdp/* symbol table */ 86233965Sjdp 86333965Sjdpmain () 86433965Sjdp{ 86533965Sjdp void applicatee (); 86633965Sjdp void destroy (); 86733965Sjdp char *what (); 86833965Sjdp int *ip; 86933965Sjdp 87033965Sjdp number = 0; 87133965Sjdp h = 0; 87233965Sjdp printf ("type h <RETURN> for help\n"); 87333965Sjdp for (;;) 87433965Sjdp { 87533965Sjdp printf ("hash_test command: "); 87633965Sjdp gets (answer); 87733965Sjdp command = answer[0]; 87833965Sjdp if (isupper (command)) 87933965Sjdp command = tolower (command); /* ecch! */ 88033965Sjdp switch (command) 88133965Sjdp { 88233965Sjdp case '#': 88333965Sjdp printf ("old hash table #=%d.\n", number); 88433965Sjdp whattable (); 88533965Sjdp break; 88633965Sjdp case '?': 88733965Sjdp for (pp = hashtable; pp < hashtable + TABLES; pp++) 88833965Sjdp { 88933965Sjdp printf ("address of hash table #%d control block is %xx\n" 89033965Sjdp ,pp - hashtable, *pp); 89133965Sjdp } 89233965Sjdp break; 89333965Sjdp case 'a': 89433965Sjdp hash_apply (h, applicatee); 89533965Sjdp break; 89633965Sjdp case 'd': 89733965Sjdp hash_apply (h, destroy); 89833965Sjdp hash_die (h); 89933965Sjdp break; 90033965Sjdp case 'f': 90133965Sjdp p = hash_find (h, name = what ("symbol")); 90233965Sjdp printf ("value of \"%s\" is \"%s\"\n", name, p ? p : "NOT-PRESENT"); 90333965Sjdp break; 90433965Sjdp case 'h': 90533965Sjdp printf ("# show old, select new default hash table number\n"); 90633965Sjdp printf ("? display all hashtable control block addresses\n"); 90733965Sjdp printf ("a apply a simple display-er to each symbol in table\n"); 90833965Sjdp printf ("d die: destroy hashtable\n"); 90933965Sjdp printf ("f find value of nominated symbol\n"); 91033965Sjdp printf ("h this help\n"); 91133965Sjdp printf ("i insert value into symbol\n"); 91233965Sjdp printf ("j jam value into symbol\n"); 91333965Sjdp printf ("n new hashtable\n"); 91433965Sjdp printf ("r replace a value with another\n"); 91533965Sjdp printf ("s say what %% of table is used\n"); 91633965Sjdp printf ("q exit this program\n"); 91733965Sjdp printf ("x delete a symbol from table, report its value\n"); 91833965Sjdp break; 91933965Sjdp case 'i': 92033965Sjdp p = hash_insert (h, name = what ("symbol"), value = what ("value")); 92133965Sjdp if (p) 92233965Sjdp { 92333965Sjdp printf ("symbol=\"%s\" value=\"%s\" error=%s\n", name, value, 92433965Sjdp p); 92533965Sjdp } 92633965Sjdp break; 92733965Sjdp case 'j': 92833965Sjdp p = hash_jam (h, name = what ("symbol"), value = what ("value")); 92933965Sjdp if (p) 93033965Sjdp { 93133965Sjdp printf ("symbol=\"%s\" value=\"%s\" error=%s\n", name, value, p); 93233965Sjdp } 93333965Sjdp break; 93433965Sjdp case 'n': 93533965Sjdp h = hashtable[number] = (char *) hash_new (); 93633965Sjdp break; 93733965Sjdp case 'q': 93833965Sjdp exit (EXIT_SUCCESS); 93933965Sjdp case 'r': 94033965Sjdp p = hash_replace (h, name = what ("symbol"), value = what ("value")); 94133965Sjdp printf ("old value was \"%s\"\n", p ? p : "{}"); 94233965Sjdp break; 94333965Sjdp case 's': 94433965Sjdp hash_say (h, statbuf, STATBUFSIZE); 94533965Sjdp for (ip = statbuf; ip < statbuf + STATBUFSIZE; ip++) 94633965Sjdp { 94733965Sjdp printf ("%d ", *ip); 94833965Sjdp } 94933965Sjdp printf ("\n"); 95033965Sjdp break; 95133965Sjdp case 'x': 95233965Sjdp p = hash_delete (h, name = what ("symbol")); 95333965Sjdp printf ("old value was \"%s\"\n", p ? p : "{}"); 95433965Sjdp break; 95533965Sjdp default: 95633965Sjdp printf ("I can't understand command \"%c\"\n", command); 95733965Sjdp break; 95833965Sjdp } 95933965Sjdp } 96033965Sjdp} 96133965Sjdp 96233965Sjdpchar * 96333965Sjdpwhat (description) 96433965Sjdp char *description; 96533965Sjdp{ 96633965Sjdp char *retval; 96733965Sjdp char *malloc (); 96833965Sjdp 96933965Sjdp printf (" %s : ", description); 97033965Sjdp gets (answer); 97133965Sjdp /* will one day clean up answer here */ 97233965Sjdp retval = malloc (strlen (answer) + 1); 97333965Sjdp if (!retval) 97433965Sjdp { 97533965Sjdp error ("room"); 97633965Sjdp } 97733965Sjdp (void) strcpy (retval, answer); 97833965Sjdp return (retval); 97933965Sjdp} 98033965Sjdp 98133965Sjdpvoid 98233965Sjdpdestroy (string, value) 98333965Sjdp char *string; 98433965Sjdp char *value; 98533965Sjdp{ 98633965Sjdp free (string); 98733965Sjdp free (value); 98833965Sjdp} 98933965Sjdp 99033965Sjdp 99133965Sjdpvoid 99233965Sjdpapplicatee (string, value) 99333965Sjdp char *string; 99433965Sjdp char *value; 99533965Sjdp{ 99633965Sjdp printf ("%.20s-%.20s\n", string, value); 99733965Sjdp} 99833965Sjdp 99933965Sjdpwhattable () /* determine number: what hash table to use */ 100033965Sjdp /* also determine h: points to hash_control */ 100133965Sjdp{ 100233965Sjdp 100333965Sjdp for (;;) 100433965Sjdp { 100533965Sjdp printf (" what hash table (%d:%d) ? ", 0, TABLES - 1); 100633965Sjdp gets (answer); 100733965Sjdp sscanf (answer, "%d", &number); 100833965Sjdp if (number >= 0 && number < TABLES) 100933965Sjdp { 101033965Sjdp h = hashtable[number]; 101133965Sjdp if (!h) 101233965Sjdp { 101333965Sjdp printf ("warning: current hash-table-#%d. has no hash-control\n", number); 101433965Sjdp } 101533965Sjdp return; 101633965Sjdp } 101733965Sjdp else 101833965Sjdp { 101933965Sjdp printf ("invalid hash table number: %d\n", number); 102033965Sjdp } 102133965Sjdp } 102233965Sjdp} 102333965Sjdp 102433965Sjdp 102533965Sjdp 102633965Sjdp#endif /* #ifdef TEST */ 102733965Sjdp 102833965Sjdp/* end of hash.c */ 1029