1/* 2 * 3 * Copyright 1990 4 * Terry Jones & Jordan Hubbard 5 * 6 * PCS Computer Systeme, GmbH. 7 * Munich, West Germany 8 * 9 * 10 * All rights reserved. 11 * 12 * This is unsupported software and is subject to change without notice. 13 * the author makes no representations about the suitability of this software 14 * for any purpose. It is supplied "as is" without express or implied 15 * warranty. 16 * 17 * Permission to use, copy, modify, and distribute this software and its 18 * documentation for any purpose and without fee is hereby granted, provided 19 * that the above copyright notice appear in all copies and that both that 20 * copyright notice and this permission notice appear in supporting 21 * documentation, and that the name of the author not be used in 22 * advertising or publicity pertaining to distribution of the software 23 * without specific, written prior permission. 24 * 25 */ 26 27/* 28 * This is a fairly simple open addressing hash scheme. 29 * Terry did all the code, I just did the spec. 30 * Thanks again, you crazy Aussie.. 31 * 32 */ 33 34/* 35 * $Log: strhash.c,v $ 36 * Revision 2.0 90/03/26 01:44:26 jkh 37 * pre-beta check-in 38 * 39 * Revision 1.8 90/03/09 19:22:35 jkh 40 * Fixed bogus comment. 41 * 42 * Revision 1.7 90/03/09 19:01:08 jkh 43 * Added comments, GPL. 44 * 45 * Revision 1.6 90/03/08 17:55:58 terry 46 * Rearranged hash_purge to be a tiny bit more efficient. 47 * Added verbose option to hash_stats. 48 * 49 * Revision 1.5 90/03/08 17:19:54 terry 50 * Added hash_purge. Added arg to hash_traverse. Changed all 51 * void * to Generic. 52 * 53 * Revision 1.4 90/03/08 12:02:35 terry 54 * Fixed problems with allocation that I screwed up last night. 55 * Changed bucket lists to be singly linked. Thanks to JKH, my hero. 56 * 57 * Revision 1.3 90/03/07 21:33:33 terry 58 * Cleaned up a few decls to keep gcc -Wall quiet. 59 * 60 * Revision 1.2 90/03/07 21:14:53 terry 61 * Comments. Added HASH_STATS define. Removed hash_find() 62 * and new_node(). 63 * 64 * Revision 1.1 90/03/07 20:49:45 terry 65 * Initial revision 66 * 67 * 68 */ 69 70#include <sys/cdefs.h> 71__FBSDID("$FreeBSD: src/lib/libc/stdlib/strhash.c,v 1.10 2002/03/22 21:53:10 obrien Exp $"); 72 73#include <stdio.h> 74#include <stdlib.h> 75#include <string.h> 76#include <sys/types.h> 77#include <strhash.h> 78 79#define HASH_NULL (hash_table *)0 80#define NODE_NULL (hash_node *)0 81#define GENERIC_NULL (void *)0 82 83#define HASH_SZ 97 84 85 86static int _hash(int size, char *key); 87static hash_node *list_find(caddr_t key, hash_node *head); 88static int assign_key(char *key, hash_node *node); 89 90 91/* 92 * hash_create() 93 * 94 * Malloc room for a new hash table and then room for its 95 * bucket pointers. Then set all the buckets to 96 * point to 0. Return the address of the new table. 97 */ 98hash_table * 99hash_create(int size) 100{ 101 int i; 102 hash_table *new = (hash_table *)malloc(sizeof(hash_table)); 103 104 if (!new || size < 0){ 105 return HASH_NULL; 106 } 107 108 if (size == 0){ 109 size = HASH_SZ; 110 } 111 112 if (!(new->buckets = (hash_node **)malloc(size * sizeof(hash_node *)))){ 113 return HASH_NULL; 114 } 115 116 for (i = 0; i < size; i++){ 117 new->buckets[i] = NODE_NULL; 118 } 119 new->size = size; 120 121 return new; 122} 123 124 125/* 126 * list_find() 127 * 128 * Find the key in the linked list pointed to by head. 129 */ 130static hash_node * 131list_find(caddr_t key, hash_node *head) 132{ 133 while (head){ 134 if (!strcmp(head->key, key)){ 135 return head; 136 } 137 head = head->next; 138 } 139 return NODE_NULL; 140} 141 142 143/* 144 * _hash() 145 * 146 * Compute the hash value for the given key. 147 */ 148static int 149_hash(int size, char *key) 150{ 151 unsigned int h = 0x0; 152 153 while (*key){ 154 h = (h << 1) ^ (h ^ (unsigned char) *key++); 155 } 156 157 h %= size; 158 return h; 159} 160 161/* 162 * hash_destroy() 163 * 164 * Find the key and (if it's there) remove it entirely. 165 * The function (*nukefunc)() is in charge of disposing 166 * of the storage help by the data associated with the node. 167 */ 168void 169hash_destroy(hash_table *table, char *key, void (*nukefunc)()) 170{ 171 int bucket = _hash(table->size, key); 172 hash_node *found = table->buckets[bucket]; 173 hash_node *to_free = NODE_NULL; 174 175 if (!found) { 176 return; 177 } 178 179 if (!strcmp(found->key, key)) { 180 /* 181 * It was the head of the list. 182 */ 183 table->buckets[bucket] = found->next; 184 to_free = found; 185 } 186 else { 187 /* 188 * Walk the list, looking one ahead. 189 */ 190 while (found->next) { 191 if (!strcmp(found->next->key, key)) { 192 to_free = found->next; 193 found->next = found->next->next; 194 break; 195 } 196 found = found->next; 197 } 198 199 if (!to_free){ 200 return; 201 } 202 } 203 204 if (nukefunc) 205 (*nukefunc)(to_free->key, to_free->data); 206 free(to_free); 207 return; 208} 209 210 211/* 212 * hash_search() 213 * 214 * Search the table for the given key. Then: 215 * 216 * 1) If you find it and there is no replacement function, just 217 * return what you found. (This is a simple search). 218 * 2) If you find it and there is a replacement function, run 219 * the function on the data you found, and replace the old 220 * data with whatever is passed in datum. Return 0. 221 * 3) If you don't find it and there is some datum, insert a 222 * new item into the table. Insertions go at the front of 223 * the bucket. Return 0. 224 * 4) Otherwise just return 0. 225 * 226 */ 227void * 228hash_search(hash_table *table, caddr_t key, void *datum, 229 void (*replace_func)()) 230{ 231 int bucket = _hash(table->size, key); 232 hash_node *found = list_find(key, table->buckets[bucket]); 233 234 if (found){ 235 if (!replace_func){ 236 return found->data; 237 } 238 else{ 239 (*replace_func)(found->data); 240 found->data = datum; 241 } 242 } 243 else{ 244 if (datum){ 245 246 hash_node *new = (hash_node *)malloc(sizeof(hash_node)); 247 248 if (!new || !assign_key(key, new)){ 249 return GENERIC_NULL; 250 } 251 new->data = datum; 252 new->next = table->buckets[bucket]; 253 table->buckets[bucket] = new; 254 return new; 255 } 256 } 257 return GENERIC_NULL; 258} 259 260 261/* 262 * assign_key() 263 * 264 * Set the key value of a node to be 'key'. Get some space from 265 * malloc and copy it in etc. Return 1 if all is well, 0 otherwise. 266 */ 267static int 268assign_key(char *key, hash_node *node) 269{ 270 if (!node || !key){ 271 return 0; 272 } 273 274 if (!(node->key = (char *)malloc(strlen(key) + 1))){ 275 return 0; 276 } 277 278 node->key[0] = '\0'; 279 strcat(node->key, key); 280 return 1; 281} 282 283/* 284 * hash_traverse() 285 * 286 * Traverse the hash table and run the function func on the 287 * data found at each node and the argument we're passed for it. 288 */ 289void 290hash_traverse(hash_table *table, int (*func)(), void *arg) 291{ 292 int i; 293 int size = table->size; 294 295 if (!func) 296 return; 297 298 for (i = 0; i < size; i++) { 299 hash_node *n = table->buckets[i]; 300 while (n) { 301 if ((*func)(n->key, n->data, arg) == 0) 302 return; 303 n = n->next; 304 } 305 } 306 return; 307} 308 309/* 310 * hash_purge() 311 * 312 * Run through the entire hash table. Call purge_func 313 * on the data found at each node, and then free the node. 314 * Set all the bucket pointers to 0. 315 */ 316void 317hash_purge(hash_table *table, void (*purge_func)(char *p1, void *p2)) 318{ 319 int i; 320 int size = table->size; 321 322 for (i = 0; i < size; i++) { 323 hash_node *n = table->buckets[i]; 324 if (n) { 325 do { 326 hash_node *to_free = n; 327 if (purge_func) { 328 (*purge_func)(n->key, n->data); 329 } 330 n = n->next; 331 free(to_free); 332 } while (n); 333 table->buckets[i] = NODE_NULL; 334 } 335 } 336} 337 338#undef min 339#define min(a, b) (a) < (b) ? (a) : (b) 340 341/* 342 * hash_stats() 343 * 344 * Print statistics about the current table allocation to stdout. 345 */ 346void 347hash_stats(hash_table *table, int verbose) 348{ 349 int i; 350 int total_elements = 0; 351 int non_empty_buckets = 0; 352 int max_count = 0; 353 int max_repeats = 0; 354 int *counts; 355 int size = table->size; 356 357 if (!(counts = (int *)malloc(size * sizeof(int)))){ 358 fprintf(stderr, "malloc returns 0\n"); 359 exit(1); 360 } 361 362 for (i = 0; i < size; i++){ 363 int x = 0; 364 hash_node *n = table->buckets[i]; 365 counts[i] = 0; 366 while (n){ 367 if (!x){ 368 x = 1; 369 non_empty_buckets++; 370 if (verbose){ 371 printf("bucket %2d: ", i); 372 } 373 } 374 if (verbose){ 375 printf(" %s", n->key); 376 } 377 counts[i]++; 378 n = n->next; 379 } 380 381 total_elements += counts[i]; 382 if (counts[i] > max_count){ 383 max_count = counts[i]; 384 max_repeats = 1; 385 } 386 else if (counts[i] == max_count){ 387 max_repeats++; 388 } 389 390 if (counts[i] && verbose){ 391 printf(" (%d)\n", counts[i]); 392 } 393 } 394 395 printf("\n"); 396 printf("%d element%s in storage.\n", total_elements, total_elements == 1 ? "" : "s"); 397 398 if (total_elements){ 399 printf("%d of %d (%.2f%%) buckets are in use\n", non_empty_buckets, size, 400 (double)100 * (double)non_empty_buckets / (double)(size)); 401 printf("the maximum number of elements in a bucket is %d (%d times)\n", max_count, max_repeats); 402 printf("average per bucket is %f\n", (double)total_elements / (double)non_empty_buckets); 403 printf("optimal would be %f\n", (double)total_elements / (double)(min(size, total_elements))); 404 } 405 return; 406} 407