hash.c revision 3229
13229Spst/************************************************************************ 23229Spst Copyright 1988, 1991 by Carnegie Mellon University 33229Spst 43229Spst All Rights Reserved 53229Spst 63229SpstPermission to use, copy, modify, and distribute this software and its 73229Spstdocumentation for any purpose and without fee is hereby granted, provided 83229Spstthat the above copyright notice appear in all copies and that both that 93229Spstcopyright notice and this permission notice appear in supporting 103229Spstdocumentation, and that the name of Carnegie Mellon University not be used 113229Spstin advertising or publicity pertaining to distribution of the software 123229Spstwithout specific, written prior permission. 133229Spst 143229SpstCARNEGIE MELLON UNIVERSITY DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS 153229SpstSOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. 163229SpstIN NO EVENT SHALL CMU BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL 173229SpstDAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR 183229SpstPROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS 193229SpstACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS 203229SpstSOFTWARE. 213229Spst************************************************************************/ 223229Spst 233229Spst#ifndef lint 243229Spststatic char rcsid[] = "$Id: hash.c,v 1.1.1.1 1994/09/10 14:44:55 csgr Exp $"; 253229Spst#endif 263229Spst 273229Spst 283229Spst/* 293229Spst * Generalized hash table ADT 303229Spst * 313229Spst * Provides multiple, dynamically-allocated, variable-sized hash tables on 323229Spst * various data and keys. 333229Spst * 343229Spst * This package attempts to follow some of the coding conventions suggested 353229Spst * by Bob Sidebotham and the AFS Clean Code Committee of the 363229Spst * Information Technology Center at Carnegie Mellon. 373229Spst */ 383229Spst 393229Spst 403229Spst#include <sys/types.h> 413229Spst#include <stdlib.h> 423229Spst 433229Spst#ifndef USE_BFUNCS 443229Spst#include <memory.h> 453229Spst/* Yes, memcpy is OK here (no overlapped copies). */ 463229Spst#define bcopy(a,b,c) memcpy(b,a,c) 473229Spst#define bzero(p,l) memset(p,0,l) 483229Spst#define bcmp(a,b,c) memcmp(a,b,c) 493229Spst#endif 503229Spst 513229Spst#include "hash.h" 523229Spst 533229Spst#define TRUE 1 543229Spst#define FALSE 0 553229Spst#ifndef NULL 563229Spst#define NULL 0 573229Spst#endif 583229Spst 593229Spst/* 603229Spst * This can be changed to make internal routines visible to debuggers, etc. 613229Spst */ 623229Spst#ifndef PRIVATE 633229Spst#define PRIVATE static 643229Spst#endif 653229Spst 663229Spst#ifdef __STDC__ 673229Spst#define P(args) args 683229Spst#else 693229Spst#define P(args) () 703229Spst#endif 713229Spst 723229SpstPRIVATE void hashi_FreeMembers P((hash_member *, hash_freefp)); 733229Spst 743229Spst#undef P 753229Spst 763229Spst 773229Spst 783229Spst/* 793229Spst * Hash table initialization routine. 803229Spst * 813229Spst * This routine creates and intializes a hash table of size "tablesize" 823229Spst * entries. Successful calls return a pointer to the hash table (which must 833229Spst * be passed to other hash routines to identify the hash table). Failed 843229Spst * calls return NULL. 853229Spst */ 863229Spst 873229Spsthash_tbl * 883229Spsthash_Init(tablesize) 893229Spst unsigned tablesize; 903229Spst{ 913229Spst register hash_tbl *hashtblptr; 923229Spst register unsigned totalsize; 933229Spst 943229Spst if (tablesize > 0) { 953229Spst totalsize = sizeof(hash_tbl) 963229Spst + sizeof(hash_member *) * (tablesize - 1); 973229Spst hashtblptr = (hash_tbl *) malloc(totalsize); 983229Spst if (hashtblptr) { 993229Spst bzero((char *) hashtblptr, totalsize); 1003229Spst hashtblptr->size = tablesize; /* Success! */ 1013229Spst hashtblptr->bucketnum = 0; 1023229Spst hashtblptr->member = (hashtblptr->table)[0]; 1033229Spst } 1043229Spst } else { 1053229Spst hashtblptr = NULL; /* Disallow zero-length tables */ 1063229Spst } 1073229Spst return hashtblptr; /* NULL if failure */ 1083229Spst} 1093229Spst 1103229Spst 1113229Spst 1123229Spst/* 1133229Spst * Frees an entire linked list of bucket members (used in the open 1143229Spst * hashing scheme). Does nothing if the passed pointer is NULL. 1153229Spst */ 1163229Spst 1173229SpstPRIVATE void 1183229Spsthashi_FreeMembers(bucketptr, free_data) 1193229Spst hash_member *bucketptr; 1203229Spst hash_freefp free_data; 1213229Spst{ 1223229Spst hash_member *nextbucket; 1233229Spst while (bucketptr) { 1243229Spst nextbucket = bucketptr->next; 1253229Spst (*free_data) (bucketptr->data); 1263229Spst free((char *) bucketptr); 1273229Spst bucketptr = nextbucket; 1283229Spst } 1293229Spst} 1303229Spst 1313229Spst 1323229Spst 1333229Spst 1343229Spst/* 1353229Spst * This routine re-initializes the hash table. It frees all the allocated 1363229Spst * memory and resets all bucket pointers to NULL. 1373229Spst */ 1383229Spst 1393229Spstvoid 1403229Spsthash_Reset(hashtable, free_data) 1413229Spst hash_tbl *hashtable; 1423229Spst hash_freefp free_data; 1433229Spst{ 1443229Spst hash_member **bucketptr; 1453229Spst unsigned i; 1463229Spst 1473229Spst bucketptr = hashtable->table; 1483229Spst for (i = 0; i < hashtable->size; i++) { 1493229Spst hashi_FreeMembers(*bucketptr, free_data); 1503229Spst *bucketptr++ = NULL; 1513229Spst } 1523229Spst hashtable->bucketnum = 0; 1533229Spst hashtable->member = (hashtable->table)[0]; 1543229Spst} 1553229Spst 1563229Spst 1573229Spst 1583229Spst/* 1593229Spst * Generic hash function to calculate a hash code from the given string. 1603229Spst * 1613229Spst * For each byte of the string, this function left-shifts the value in an 1623229Spst * accumulator and then adds the byte into the accumulator. The contents of 1633229Spst * the accumulator is returned after the entire string has been processed. 1643229Spst * It is assumed that this result will be used as the "hashcode" parameter in 1653229Spst * calls to other functions in this package. These functions automatically 1663229Spst * adjust the hashcode for the size of each hashtable. 1673229Spst * 1683229Spst * This algorithm probably works best when the hash table size is a prime 1693229Spst * number. 1703229Spst * 1713229Spst * Hopefully, this function is better than the previous one which returned 1723229Spst * the sum of the squares of all the bytes. I'm still open to other 1733229Spst * suggestions for a default hash function. The programmer is more than 1743229Spst * welcome to supply his/her own hash function as that is one of the design 1753229Spst * features of this package. 1763229Spst */ 1773229Spst 1783229Spstunsigned 1793229Spsthash_HashFunction(string, len) 1803229Spst unsigned char *string; 1813229Spst register unsigned len; 1823229Spst{ 1833229Spst register unsigned accum; 1843229Spst 1853229Spst accum = 0; 1863229Spst for (; len > 0; len--) { 1873229Spst accum <<= 1; 1883229Spst accum += (unsigned) (*string++ & 0xFF); 1893229Spst } 1903229Spst return accum; 1913229Spst} 1923229Spst 1933229Spst 1943229Spst 1953229Spst/* 1963229Spst * Returns TRUE if at least one entry for the given key exists; FALSE 1973229Spst * otherwise. 1983229Spst */ 1993229Spst 2003229Spstint 2013229Spsthash_Exists(hashtable, hashcode, compare, key) 2023229Spst hash_tbl *hashtable; 2033229Spst unsigned hashcode; 2043229Spst hash_cmpfp compare; 2053229Spst hash_datum *key; 2063229Spst{ 2073229Spst register hash_member *memberptr; 2083229Spst 2093229Spst memberptr = (hashtable->table)[hashcode % (hashtable->size)]; 2103229Spst while (memberptr) { 2113229Spst if ((*compare) (key, memberptr->data)) { 2123229Spst return TRUE; /* Entry does exist */ 2133229Spst } 2143229Spst memberptr = memberptr->next; 2153229Spst } 2163229Spst return FALSE; /* Entry does not exist */ 2173229Spst} 2183229Spst 2193229Spst 2203229Spst 2213229Spst/* 2223229Spst * Insert the data item "element" into the hash table using "hashcode" 2233229Spst * to determine the bucket number, and "compare" and "key" to determine 2243229Spst * its uniqueness. 2253229Spst * 2263229Spst * If the insertion is successful 0 is returned. If a matching entry 2273229Spst * already exists in the given bucket of the hash table, or some other error 2283229Spst * occurs, -1 is returned and the insertion is not done. 2293229Spst */ 2303229Spst 2313229Spstint 2323229Spsthash_Insert(hashtable, hashcode, compare, key, element) 2333229Spst hash_tbl *hashtable; 2343229Spst unsigned hashcode; 2353229Spst hash_cmpfp compare; 2363229Spst hash_datum *key, *element; 2373229Spst{ 2383229Spst hash_member *temp; 2393229Spst 2403229Spst hashcode %= hashtable->size; 2413229Spst if (hash_Exists(hashtable, hashcode, compare, key)) { 2423229Spst return -1; /* At least one entry already exists */ 2433229Spst } 2443229Spst temp = (hash_member *) malloc(sizeof(hash_member)); 2453229Spst if (!temp) 2463229Spst return -1; /* malloc failed! */ 2473229Spst 2483229Spst temp->data = element; 2493229Spst temp->next = (hashtable->table)[hashcode]; 2503229Spst (hashtable->table)[hashcode] = temp; 2513229Spst return 0; /* Success */ 2523229Spst} 2533229Spst 2543229Spst 2553229Spst 2563229Spst/* 2573229Spst * Delete all data elements which match the given key. If at least one 2583229Spst * element is found and the deletion is successful, 0 is returned. 2593229Spst * If no matching elements can be found in the hash table, -1 is returned. 2603229Spst */ 2613229Spst 2623229Spstint 2633229Spsthash_Delete(hashtable, hashcode, compare, key, free_data) 2643229Spst hash_tbl *hashtable; 2653229Spst unsigned hashcode; 2663229Spst hash_cmpfp compare; 2673229Spst hash_datum *key; 2683229Spst hash_freefp free_data; 2693229Spst{ 2703229Spst hash_member *memberptr, *tempptr; 2713229Spst hash_member *previous = NULL; 2723229Spst int retval; 2733229Spst 2743229Spst retval = -1; 2753229Spst hashcode %= hashtable->size; 2763229Spst 2773229Spst /* 2783229Spst * Delete the first member of the list if it matches. Since this moves 2793229Spst * the second member into the first position we have to keep doing this 2803229Spst * over and over until it no longer matches. 2813229Spst */ 2823229Spst memberptr = (hashtable->table)[hashcode]; 2833229Spst while (memberptr && (*compare) (key, memberptr->data)) { 2843229Spst (hashtable->table)[hashcode] = memberptr->next; 2853229Spst /* 2863229Spst * Stop hashi_FreeMembers() from deleting the whole list! 2873229Spst */ 2883229Spst memberptr->next = NULL; 2893229Spst hashi_FreeMembers(memberptr, free_data); 2903229Spst memberptr = (hashtable->table)[hashcode]; 2913229Spst retval = 0; 2923229Spst } 2933229Spst 2943229Spst /* 2953229Spst * Now traverse the rest of the list 2963229Spst */ 2973229Spst if (memberptr) { 2983229Spst previous = memberptr; 2993229Spst memberptr = memberptr->next; 3003229Spst } 3013229Spst while (memberptr) { 3023229Spst if ((*compare) (key, memberptr->data)) { 3033229Spst tempptr = memberptr; 3043229Spst previous->next = memberptr = memberptr->next; 3053229Spst /* 3063229Spst * Put the brakes on hashi_FreeMembers(). . . . 3073229Spst */ 3083229Spst tempptr->next = NULL; 3093229Spst hashi_FreeMembers(tempptr, free_data); 3103229Spst retval = 0; 3113229Spst } else { 3123229Spst previous = memberptr; 3133229Spst memberptr = memberptr->next; 3143229Spst } 3153229Spst } 3163229Spst return retval; 3173229Spst} 3183229Spst 3193229Spst 3203229Spst 3213229Spst/* 3223229Spst * Locate and return the data entry associated with the given key. 3233229Spst * 3243229Spst * If the data entry is found, a pointer to it is returned. Otherwise, 3253229Spst * NULL is returned. 3263229Spst */ 3273229Spst 3283229Spsthash_datum * 3293229Spsthash_Lookup(hashtable, hashcode, compare, key) 3303229Spst hash_tbl *hashtable; 3313229Spst unsigned hashcode; 3323229Spst hash_cmpfp compare; 3333229Spst hash_datum *key; 3343229Spst{ 3353229Spst hash_member *memberptr; 3363229Spst 3373229Spst memberptr = (hashtable->table)[hashcode % (hashtable->size)]; 3383229Spst while (memberptr) { 3393229Spst if ((*compare) (key, memberptr->data)) { 3403229Spst return (memberptr->data); 3413229Spst } 3423229Spst memberptr = memberptr->next; 3433229Spst } 3443229Spst return NULL; 3453229Spst} 3463229Spst 3473229Spst 3483229Spst 3493229Spst/* 3503229Spst * Return the next available entry in the hashtable for a linear search 3513229Spst */ 3523229Spst 3533229Spsthash_datum * 3543229Spsthash_NextEntry(hashtable) 3553229Spst hash_tbl *hashtable; 3563229Spst{ 3573229Spst register unsigned bucket; 3583229Spst register hash_member *memberptr; 3593229Spst 3603229Spst /* 3613229Spst * First try to pick up where we left off. 3623229Spst */ 3633229Spst memberptr = hashtable->member; 3643229Spst if (memberptr) { 3653229Spst hashtable->member = memberptr->next; /* Set up for next call */ 3663229Spst return memberptr->data; /* Return the data */ 3673229Spst } 3683229Spst /* 3693229Spst * We hit the end of a chain, so look through the array of buckets 3703229Spst * until we find a new chain (non-empty bucket) or run out of buckets. 3713229Spst */ 3723229Spst bucket = hashtable->bucketnum + 1; 3733229Spst while ((bucket < hashtable->size) && 3743229Spst !(memberptr = (hashtable->table)[bucket])) { 3753229Spst bucket++; 3763229Spst } 3773229Spst 3783229Spst /* 3793229Spst * Check to see if we ran out of buckets. 3803229Spst */ 3813229Spst if (bucket >= hashtable->size) { 3823229Spst /* 3833229Spst * Reset to top of table for next call. 3843229Spst */ 3853229Spst hashtable->bucketnum = 0; 3863229Spst hashtable->member = (hashtable->table)[0]; 3873229Spst /* 3883229Spst * But return end-of-table indication to the caller this time. 3893229Spst */ 3903229Spst return NULL; 3913229Spst } 3923229Spst /* 3933229Spst * Must have found a non-empty bucket. 3943229Spst */ 3953229Spst hashtable->bucketnum = bucket; 3963229Spst hashtable->member = memberptr->next; /* Set up for next call */ 3973229Spst return memberptr->data; /* Return the data */ 3983229Spst} 3993229Spst 4003229Spst 4013229Spst 4023229Spst/* 4033229Spst * Return the first entry in a hash table for a linear search 4043229Spst */ 4053229Spst 4063229Spsthash_datum * 4073229Spsthash_FirstEntry(hashtable) 4083229Spst hash_tbl *hashtable; 4093229Spst{ 4103229Spst hashtable->bucketnum = 0; 4113229Spst hashtable->member = (hashtable->table)[0]; 4123229Spst return hash_NextEntry(hashtable); 4133229Spst} 4143229Spst 4153229Spst/* 4163229Spst * Local Variables: 4173229Spst * tab-width: 4 4183229Spst * c-indent-level: 4 4193229Spst * c-argdecl-indent: 4 4203229Spst * c-continued-statement-offset: 4 4213229Spst * c-continued-brace-offset: -4 4223229Spst * c-label-offset: -4 4233229Spst * c-brace-offset: 0 4243229Spst * End: 4253229Spst */ 426