ohash.c revision 269162
1281168Spfg/* $OpenBSD: src/lib/libutil/ohash.c,v 1.1 2014/06/02 18:52:03 deraadt Exp $ */
212099Sjoerg
312099Sjoerg/* Copyright (c) 1999, 2004 Marc Espie <espie@openbsd.org>
491586Smarkm *
512099Sjoerg * Permission to use, copy, modify, and distribute this software for any
612099Sjoerg * purpose with or without fee is hereby granted, provided that the above
712099Sjoerg * copyright notice and this permission notice appear in all copies.
812099Sjoerg *
912099Sjoerg * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
1012099Sjoerg * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
1112099Sjoerg * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
1212099Sjoerg * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
1312099Sjoerg * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
1412099Sjoerg * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
1512099Sjoerg * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
1612099Sjoerg */
1712099Sjoerg
1812099Sjoerg#include <sys/cdefs.h>
1912099Sjoerg__FBSDID("$FreeBSD: head/usr.bin/m4/lib/ohash.c 269162 2014-07-27 22:54:13Z bapt $");
2012099Sjoerg
2112099Sjoerg#include <stddef.h>
2212099Sjoerg#include <stdint.h>
2312099Sjoerg#include <stdlib.h>
2412099Sjoerg#include <string.h>
2512099Sjoerg#include <limits.h>
2612099Sjoerg#include "ohash.h"
2712099Sjoerg
2812099Sjoergstruct _ohash_record {
2912099Sjoerg	uint32_t	hv;
3012099Sjoerg	const char	*p;
3112099Sjoerg};
3212099Sjoerg
3312099Sjoerg#define DELETED		((const char *)h)
3412099Sjoerg#define NONE		(h->size)
3591586Smarkm
3691586Smarkm/* Don't bother changing the hash table if the change is small enough.  */
37281168Spfg#define MINSIZE		(1UL << 4)
3812099Sjoerg#define MINDELETED	4
39108470Sschweikh
4012099Sjoergstatic void ohash_resize(struct ohash *);
4112099Sjoerg
4212099Sjoerg
4312099Sjoerg/* This handles the common case of variable length keys, where the
4412099Sjoerg * key is stored at the end of the record.
4591586Smarkm */
4691586Smarkmvoid *
4712099Sjoergohash_create_entry(struct ohash_info *i, const char *start, const char **end)
4812099Sjoerg{
4912099Sjoerg	char *p;
5012099Sjoerg
5112099Sjoerg	if (!*end)
5212099Sjoerg		*end = start + strlen(start);
5312099Sjoerg	p = (i->alloc)(i->key_offset + (*end - start) + 1, i->data);
5412099Sjoerg	if (p) {
5512099Sjoerg		memcpy(p+i->key_offset, start, *end-start);
5612099Sjoerg		p[i->key_offset + (*end - start)] = '\0';
5712099Sjoerg	}
5812099Sjoerg	return (void *)p;
5912099Sjoerg}
6012099Sjoerg
6112099Sjoerg/* hash_delete only frees the hash structure. Use hash_first/hash_next
6212099Sjoerg * to free entries as well.  */
6312099Sjoergvoid
6412099Sjoergohash_delete(struct ohash *h)
6512099Sjoerg{
6612099Sjoerg	(h->info.free)(h->t, h->info.data);
6712099Sjoerg#ifndef NDEBUG
6812099Sjoerg	h->t = NULL;
6912099Sjoerg#endif
7012099Sjoerg}
7112099Sjoerg
7212099Sjoergstatic void
7312099Sjoergohash_resize(struct ohash *h)
7412099Sjoerg{
7512099Sjoerg	struct _ohash_record *n;
7612099Sjoerg	size_t ns;
7712099Sjoerg	unsigned int	j;
7812099Sjoerg	unsigned int	i, incr;
7912099Sjoerg
8012099Sjoerg	if (4 * h->deleted < h->total) {
8112099Sjoerg		if (h->size >= (UINT_MAX >> 1U))
8212099Sjoerg			ns = UINT_MAX;
8312099Sjoerg		else
84228992Suqs			ns = h->size << 1U;
8512099Sjoerg	} else if (3 * h->deleted > 2 * h->total)
8612099Sjoerg		ns = h->size >> 1U;
8712099Sjoerg	else
8891586Smarkm		ns = h->size;
8912099Sjoerg	if (ns < MINSIZE)
9012099Sjoerg		ns = MINSIZE;
9112099Sjoerg#ifdef STATS_HASH
9212099Sjoerg	STAT_HASH_EXPAND++;
9312099Sjoerg	STAT_HASH_SIZE += ns - h->size;
9412099Sjoerg#endif
9512099Sjoerg
9612099Sjoerg	n = (h->info.calloc)(ns, sizeof(struct _ohash_record), h->info.data);
9712099Sjoerg	if (!n)
9812099Sjoerg		return;
9912099Sjoerg
10012099Sjoerg	for (j = 0; j < h->size; j++) {
10112099Sjoerg		if (h->t[j].p != NULL && h->t[j].p != DELETED) {
10212099Sjoerg			i = h->t[j].hv % ns;
10312099Sjoerg			incr = ((h->t[j].hv % (ns - 2)) & ~1) + 1;
10412099Sjoerg			while (n[i].p != NULL) {
10512099Sjoerg				i += incr;
10612099Sjoerg				if (i >= ns)
10712099Sjoerg					i -= ns;
10812099Sjoerg			}
10912099Sjoerg			n[i].hv = h->t[j].hv;
11012099Sjoerg			n[i].p = h->t[j].p;
11112099Sjoerg		}
11212099Sjoerg	}
11312099Sjoerg	(h->info.free)(h->t, h->info.data);
11412099Sjoerg	h->t = n;
11512099Sjoerg	h->size = ns;
11612099Sjoerg	h->total -= h->deleted;
11712099Sjoerg	h->deleted = 0;
11812099Sjoerg}
11912099Sjoerg
120281168Spfgvoid *
12112099Sjoergohash_remove(struct ohash *h, unsigned int i)
12212099Sjoerg{
12312099Sjoerg	void		*result = (void *)h->t[i].p;
12412099Sjoerg
12512099Sjoerg	if (result == NULL || result == DELETED)
12612099Sjoerg		return NULL;
12712099Sjoerg
12812099Sjoerg#ifdef STATS_HASH
12912099Sjoerg	STAT_HASH_ENTRIES--;
13012099Sjoerg#endif
13112099Sjoerg	h->t[i].p = DELETED;
13212099Sjoerg	h->deleted++;
13312099Sjoerg	if (h->deleted >= MINDELETED && 4 * h->deleted > h->total)
13412099Sjoerg		ohash_resize(h);
13512099Sjoerg	return result;
13612099Sjoerg}
13712099Sjoerg
13812099Sjoergvoid *
13912099Sjoergohash_find(struct ohash *h, unsigned int i)
14012099Sjoerg{
14112099Sjoerg	if (h->t[i].p == DELETED)
14212099Sjoerg		return NULL;
14312099Sjoerg	else
14412099Sjoerg		return (void *)h->t[i].p;
14512099Sjoerg}
14612099Sjoerg
14712099Sjoergvoid *
14812099Sjoergohash_insert(struct ohash *h, unsigned int i, void *p)
14912099Sjoerg{
15012099Sjoerg#ifdef STATS_HASH
15112099Sjoerg	STAT_HASH_ENTRIES++;
15212099Sjoerg#endif
15312099Sjoerg	if (h->t[i].p == DELETED) {
15412099Sjoerg		h->deleted--;
15512099Sjoerg		h->t[i].p = p;
15612099Sjoerg	} else {
15712099Sjoerg		h->t[i].p = p;
15891586Smarkm		/* Arbitrary resize boundary.  Tweak if not efficient enough.  */
15912099Sjoerg		if (++h->total * 4 > h->size * 3)
16012099Sjoerg			ohash_resize(h);
16112099Sjoerg	}
16212099Sjoerg	return p;
16312099Sjoerg}
16412099Sjoerg
16512099Sjoergunsigned int
16612099Sjoergohash_entries(struct ohash *h)
16712099Sjoerg{
16812099Sjoerg	return h->total - h->deleted;
16912099Sjoerg}
17012099Sjoerg
17112099Sjoergvoid *
17212099Sjoergohash_first(struct ohash *h, unsigned int *pos)
17312099Sjoerg{
17412099Sjoerg	*pos = 0;
17512099Sjoerg	return ohash_next(h, pos);
17612099Sjoerg}
17712099Sjoerg
17812099Sjoergvoid *
17912099Sjoergohash_next(struct ohash *h, unsigned int *pos)
18012099Sjoerg{
18112099Sjoerg	for (; *pos < h->size; (*pos)++)
18212099Sjoerg		if (h->t[*pos].p != DELETED && h->t[*pos].p != NULL)
18312099Sjoerg			return (void *)h->t[(*pos)++].p;
18412099Sjoerg	return NULL;
18512099Sjoerg}
18612099Sjoerg
18712099Sjoergvoid
18891586Smarkmohash_init(struct ohash *h, unsigned int size, struct ohash_info *info)
18912099Sjoerg{
19091586Smarkm	h->size = 1UL << size;
19191586Smarkm	if (h->size < MINSIZE)
19291586Smarkm		h->size = MINSIZE;
19391586Smarkm#ifdef STATS_HASH
19412099Sjoerg	STAT_HASH_CREATION++;
19512099Sjoerg	STAT_HASH_SIZE += h->size;
19612099Sjoerg#endif
19712099Sjoerg	/* Copy info so that caller may free it.  */
19812099Sjoerg	h->info.key_offset = info->key_offset;
19912099Sjoerg	h->info.calloc = info->calloc;
20012099Sjoerg	h->info.free = info->free;
20191586Smarkm	h->info.alloc = info->alloc;
20291586Smarkm	h->info.data = info->data;
20391586Smarkm	h->t = (h->info.calloc)(h->size, sizeof(struct _ohash_record),
20491586Smarkm		    h->info.data);
20591586Smarkm	h->total = h->deleted = 0;
20691586Smarkm}
20712099Sjoerg
20812099Sjoerguint32_t
20912099Sjoergohash_interval(const char *s, const char **e)
21012099Sjoerg{
211108470Sschweikh	uint32_t k;
21212099Sjoerg
21312099Sjoerg	if (!*e)
21412099Sjoerg		*e = s + strlen(s);
21512099Sjoerg	if (s == *e)
21612099Sjoerg		k = 0;
21712099Sjoerg	else
21891586Smarkm		k = *s++;
21912099Sjoerg	while (s != *e)
22091586Smarkm		k =  ((k << 2) | (k >> 30)) ^ *s++;
22112099Sjoerg	return k;
22212099Sjoerg}
22312099Sjoerg
22412099Sjoergunsigned int
22512099Sjoergohash_lookup_interval(struct ohash *h, const char *start, const char *end,
22612099Sjoerg    uint32_t hv)
22712099Sjoerg{
22812099Sjoerg	unsigned int	i, incr;
22912099Sjoerg	unsigned int	empty;
23012099Sjoerg
23112099Sjoerg#ifdef STATS_HASH
23212099Sjoerg	STAT_HASH_LOOKUP++;
23312099Sjoerg#endif
23412099Sjoerg	empty = NONE;
23512099Sjoerg	i = hv % h->size;
23612099Sjoerg	incr = ((hv % (h->size-2)) & ~1) + 1;
23712099Sjoerg	while (h->t[i].p != NULL) {
23812099Sjoerg#ifdef STATS_HASH
23912099Sjoerg		STAT_HASH_LENGTH++;
24012099Sjoerg#endif
24112099Sjoerg		if (h->t[i].p == DELETED) {
24212099Sjoerg			if (empty == NONE)
24312099Sjoerg				empty = i;
24412099Sjoerg		} else if (h->t[i].hv == hv &&
24512099Sjoerg		    strncmp(h->t[i].p+h->info.key_offset, start,
24612099Sjoerg			end - start) == 0 &&
24712099Sjoerg		    (h->t[i].p+h->info.key_offset)[end-start] == '\0') {
24812099Sjoerg			if (empty != NONE) {
24912099Sjoerg				h->t[empty].hv = hv;
25012099Sjoerg				h->t[empty].p = h->t[i].p;
25112099Sjoerg				h->t[i].p = DELETED;
25212099Sjoerg				return empty;
25312099Sjoerg			} else {
25412099Sjoerg#ifdef STATS_HASH
25512099Sjoerg				STAT_HASH_POSITIVE++;
25612099Sjoerg#endif
25712099Sjoerg				return i;
25812099Sjoerg			}
25912099Sjoerg		}
260281168Spfg		i += incr;
26112099Sjoerg		if (i >= h->size)
26212099Sjoerg			i -= h->size;
26312099Sjoerg	}
26412099Sjoerg
26512099Sjoerg	/* Found an empty position.  */
26612099Sjoerg	if (empty != NONE)
26712099Sjoerg		i = empty;
26812099Sjoerg	h->t[i].hv = hv;
26912099Sjoerg	return i;
27012099Sjoerg}
27112099Sjoerg
27212099Sjoergunsigned int
27312099Sjoergohash_lookup_memory(struct ohash *h, const char *k, size_t size, uint32_t hv)
27412099Sjoerg{
27512099Sjoerg	unsigned int	i, incr;
27691586Smarkm	unsigned int	empty;
27791586Smarkm
27891586Smarkm#ifdef STATS_HASH
27991586Smarkm	STAT_HASH_LOOKUP++;
28091586Smarkm#endif
28191586Smarkm	empty = NONE;
28212099Sjoerg	i = hv % h->size;
28312099Sjoerg	incr = ((hv % (h->size-2)) & ~1) + 1;
28412099Sjoerg	while (h->t[i].p != NULL) {
28512099Sjoerg#ifdef STATS_HASH
28612099Sjoerg		STAT_HASH_LENGTH++;
28712099Sjoerg#endif
28812099Sjoerg		if (h->t[i].p == DELETED) {
28912099Sjoerg			if (empty == NONE)
29012099Sjoerg				empty = i;
29112099Sjoerg		} else if (h->t[i].hv == hv &&
29212099Sjoerg		    memcmp(h->t[i].p+h->info.key_offset, k, size) == 0) {
29391586Smarkm			if (empty != NONE) {
29412099Sjoerg				h->t[empty].hv = hv;
29512099Sjoerg				h->t[empty].p = h->t[i].p;
29612099Sjoerg				h->t[i].p = DELETED;
29712099Sjoerg				return empty;
29812099Sjoerg			} else {
29912099Sjoerg#ifdef STATS_HASH
30012099Sjoerg				STAT_HASH_POSITIVE++;
30112099Sjoerg#endif
30212099Sjoerg			}	return i;
30312099Sjoerg		}
30412099Sjoerg		i += incr;
30512099Sjoerg		if (i >= h->size)
30612099Sjoerg			i -= h->size;
30712099Sjoerg	}
30812099Sjoerg
30912099Sjoerg	/* Found an empty position.  */
31012099Sjoerg	if (empty != NONE)
31112099Sjoerg		i = empty;
31212099Sjoerg	h->t[i].hv = hv;
31312099Sjoerg	return i;
31412099Sjoerg}
31512099Sjoerg
31612099Sjoergunsigned int
31712099Sjoergohash_qlookup(struct ohash *h, const char *s)
31812099Sjoerg{
31912099Sjoerg	const char *e = NULL;
32012099Sjoerg	return ohash_qlookupi(h, s, &e);
32112099Sjoerg}
32212099Sjoerg
32312099Sjoergunsigned int
32412099Sjoergohash_qlookupi(struct ohash *h, const char *s, const char **e)
32512099Sjoerg{
32612099Sjoerg	uint32_t hv;
32712099Sjoerg
32812099Sjoerg	hv = ohash_interval(s, e);
32912099Sjoerg	return ohash_lookup_interval(h, s, *e, hv);
33012099Sjoerg}
33112099Sjoerg