1/*
2 *
3 *                      Copyright 1990
4 *               Terry Jones & Jordan Hubbard
5 *
6 *		  PCS Computer Systeme, GmbH.
7 *	             Munich, West Germany
8 *
9 *
10 *  All rights reserved.
11 *
12 *  This is unsupported software and is subject to change without notice.
13 *  the author makes no representations about the suitability of this software
14 *  for any purpose. It is supplied "as is" without express or implied
15 *  warranty.
16 *
17 *  Permission to use, copy, modify, and distribute this software and its
18 *  documentation for any purpose and without fee is hereby granted, provided
19 *  that the above copyright notice appear in all copies and that both that
20 *  copyright notice and this permission notice appear in supporting
21 *  documentation, and that the name of the author not be used in
22 *  advertising or publicity pertaining to distribution of the software
23 *  without specific, written prior permission.
24 *
25 */
26
27/*
28 * This is a fairly simple open addressing hash scheme.
29 * Terry did all the code, I just did the spec.
30 * Thanks again, you crazy Aussie..
31 *
32 */
33
34/*
35 * $Log: strhash.c,v $
36 * Revision 2.0  90/03/26  01:44:26  jkh
37 * pre-beta check-in
38 *
39 * Revision 1.8  90/03/09  19:22:35  jkh
40 * Fixed bogus comment.
41 *
42 * Revision 1.7  90/03/09  19:01:08  jkh
43 * Added comments, GPL.
44 *
45 * Revision 1.6  90/03/08  17:55:58  terry
46 * Rearranged hash_purge to be a tiny bit more efficient.
47 * Added verbose option to hash_stats.
48 *
49 * Revision 1.5  90/03/08  17:19:54  terry
50 * Added hash_purge. Added arg to hash_traverse. Changed all
51 * void * to Generic.
52 *
53 * Revision 1.4  90/03/08  12:02:35  terry
54 * Fixed problems with allocation that I screwed up last night.
55 * Changed bucket lists to be singly linked. Thanks to JKH, my hero.
56 *
57 * Revision 1.3  90/03/07  21:33:33  terry
58 * Cleaned up a few decls to keep gcc -Wall quiet.
59 *
60 * Revision 1.2  90/03/07  21:14:53  terry
61 * Comments. Added HASH_STATS define. Removed hash_find()
62 * and new_node().
63 *
64 * Revision 1.1  90/03/07  20:49:45  terry
65 * Initial revision
66 *
67 *
68 */
69
70#include <sys/cdefs.h>
71__FBSDID("$FreeBSD: src/lib/libc/stdlib/strhash.c,v 1.10 2002/03/22 21:53:10 obrien Exp $");
72
73#include <stdio.h>
74#include <stdlib.h>
75#include <string.h>
76#include <sys/types.h>
77#include <strhash.h>
78
79#define HASH_NULL    (hash_table *)0
80#define NODE_NULL    (hash_node *)0
81#define GENERIC_NULL (void *)0
82
83#define HASH_SZ 97
84
85
86static int _hash(int size, char *key);
87static hash_node *list_find(caddr_t key, hash_node *head);
88static int assign_key(char *key, hash_node *node);
89
90
91/*
92 * hash_create()
93 *
94 * Malloc room for a new hash table and then room for its
95 * bucket pointers. Then set all the buckets to
96 * point to 0. Return the address of the new table.
97 */
98hash_table *
99hash_create(int size)
100{
101    int i;
102    hash_table *new = (hash_table *)malloc(sizeof(hash_table));
103
104    if (!new || size < 0){
105	return HASH_NULL;
106    }
107
108    if (size == 0){
109	size = HASH_SZ;
110    }
111
112    if (!(new->buckets = (hash_node **)malloc(size * sizeof(hash_node *)))){
113	return HASH_NULL;
114    }
115
116    for (i = 0; i < size; i++){
117	new->buckets[i] = NODE_NULL;
118    }
119    new->size = size;
120
121    return new;
122}
123
124
125/*
126 * list_find()
127 *
128 * Find the key in the linked list pointed to by head.
129 */
130static hash_node *
131list_find(caddr_t key, hash_node *head)
132{
133    while (head){
134	if (!strcmp(head->key, key)){
135	    return head;
136	}
137	head = head->next;
138    }
139    return NODE_NULL;
140}
141
142
143/*
144 * _hash()
145 *
146 * Compute the hash value for the given key.
147 */
148static int
149_hash(int size, char *key)
150{
151    unsigned int h = 0x0;
152
153    while (*key){
154	h = (h << 1) ^ (h ^ (unsigned char) *key++);
155    }
156
157    h %= size;
158    return h;
159}
160
161/*
162 * hash_destroy()
163 *
164 * Find the key and (if it's there) remove it entirely.
165 * The function (*nukefunc)() is in charge of disposing
166 * of the storage help by the data associated with the node.
167 */
168void
169hash_destroy(hash_table *table, char *key, void (*nukefunc)())
170{
171    int bucket = _hash(table->size, key);
172    hash_node *found = table->buckets[bucket];
173    hash_node *to_free = NODE_NULL;
174
175    if (!found) {
176	return;
177    }
178
179    if (!strcmp(found->key, key)) {
180	/*
181	 * It was the head of the list.
182	 */
183	table->buckets[bucket] = found->next;
184	to_free = found;
185    }
186    else {
187	/*
188	 * Walk the list, looking one ahead.
189	 */
190	while (found->next) {
191	    if (!strcmp(found->next->key, key)) {
192		to_free = found->next;
193		found->next = found->next->next;
194		break;
195	    }
196	    found = found->next;
197	}
198
199	if (!to_free){
200	    return;
201	}
202    }
203
204    if (nukefunc)
205	(*nukefunc)(to_free->key, to_free->data);
206    free(to_free);
207    return;
208}
209
210
211/*
212 * hash_search()
213 *
214 * Search the table for the given key. Then:
215 *
216 * 1) If you find it and there is no replacement function, just
217 *    return what you found. (This is a simple search).
218 * 2) If you find it and there is a replacement function, run
219 *    the function on the data you found, and replace the old
220 *    data with whatever is passed in datum. Return 0.
221 * 3) If you don't find it and there is some datum, insert a
222 *    new item into the table. Insertions go at the front of
223 *    the bucket. Return 0.
224 * 4) Otherwise just return 0.
225 *
226 */
227void *
228hash_search(hash_table *table, caddr_t key, void *datum,
229	    void (*replace_func)())
230{
231    int bucket = _hash(table->size, key);
232    hash_node *found = list_find(key, table->buckets[bucket]);
233
234    if (found){
235	if (!replace_func){
236	    return found->data;
237	}
238	else{
239	    (*replace_func)(found->data);
240	    found->data = datum;
241	}
242    }
243    else{
244	if (datum){
245
246	    hash_node *new = (hash_node *)malloc(sizeof(hash_node));
247
248	    if (!new || !assign_key(key, new)){
249		return GENERIC_NULL;
250	    }
251	    new->data = datum;
252	    new->next = table->buckets[bucket];
253	    table->buckets[bucket] = new;
254	    return new;
255	}
256    }
257    return GENERIC_NULL;
258}
259
260
261/*
262 * assign_key()
263 *
264 * Set the key value of a node to be 'key'. Get some space from
265 * malloc and copy it in etc. Return 1 if all is well, 0 otherwise.
266 */
267static int
268assign_key(char *key, hash_node *node)
269{
270    if (!node || !key){
271	return 0;
272    }
273
274    if (!(node->key = (char *)malloc(strlen(key) + 1))){
275	return 0;
276    }
277
278    node->key[0] = '\0';
279    strcat(node->key, key);
280    return 1;
281}
282
283/*
284 * hash_traverse()
285 *
286 * Traverse the hash table and run the function func on the
287 * data found at each node and the argument we're passed for it.
288 */
289void
290hash_traverse(hash_table *table, int (*func)(), void *arg)
291{
292    int i;
293    int size = table->size;
294
295    if (!func)
296	return;
297
298    for (i = 0; i < size; i++) {
299	hash_node *n = table->buckets[i];
300	while (n) {
301	    if ((*func)(n->key, n->data, arg) == 0)
302		return;
303	    n = n->next;
304	}
305    }
306    return;
307}
308
309/*
310 * hash_purge()
311 *
312 * Run through the entire hash table. Call purge_func
313 * on the data found at each node, and then free the node.
314 * Set all the bucket pointers to 0.
315 */
316void
317hash_purge(hash_table *table, void (*purge_func)(char *p1, void *p2))
318{
319    int i;
320    int size = table->size;
321
322    for (i = 0; i < size; i++) {
323	hash_node *n = table->buckets[i];
324	if (n) {
325	    do {
326		hash_node *to_free = n;
327		if (purge_func) {
328		    (*purge_func)(n->key, n->data);
329		}
330		n = n->next;
331		free(to_free);
332	    } while (n);
333	    table->buckets[i] = NODE_NULL;
334	}
335    }
336}
337
338#undef min
339#define min(a, b) (a) < (b) ? (a) : (b)
340
341/*
342 * hash_stats()
343 *
344 * Print statistics about the current table allocation to stdout.
345 */
346void
347hash_stats(hash_table *table, int verbose)
348{
349    int i;
350    int total_elements = 0;
351    int non_empty_buckets = 0;
352    int max_count = 0;
353    int max_repeats = 0;
354    int *counts;
355    int size = table->size;
356
357    if (!(counts = (int *)malloc(size * sizeof(int)))){
358	fprintf(stderr, "malloc returns 0\n");
359	exit(1);
360    }
361
362    for (i = 0; i < size; i++){
363	int x = 0;
364	hash_node *n = table->buckets[i];
365	counts[i] = 0;
366	while (n){
367	    if (!x){
368		x = 1;
369		non_empty_buckets++;
370		if (verbose){
371		    printf("bucket %2d: ", i);
372		}
373	    }
374	    if (verbose){
375		printf(" %s", n->key);
376	    }
377	    counts[i]++;
378	    n = n->next;
379	}
380
381	total_elements += counts[i];
382	if (counts[i] > max_count){
383	    max_count = counts[i];
384	    max_repeats = 1;
385	}
386	else if (counts[i] == max_count){
387	    max_repeats++;
388	}
389
390	if (counts[i] && verbose){
391	    printf(" (%d)\n", counts[i]);
392	}
393    }
394
395    printf("\n");
396    printf("%d element%s in storage.\n", total_elements, total_elements == 1 ? "" : "s");
397
398    if (total_elements){
399	printf("%d of %d (%.2f%%) buckets are in use\n", non_empty_buckets, size,
400	       (double)100 * (double)non_empty_buckets / (double)(size));
401	printf("the maximum number of elements in a bucket is %d (%d times)\n", max_count, max_repeats);
402	printf("average per bucket is %f\n", (double)total_elements / (double)non_empty_buckets);
403	printf("optimal would be %f\n", (double)total_elements / (double)(min(size, total_elements)));
404    }
405    return;
406}
407