1/*-
2 * See the file LICENSE for redistribution information.
3 *
4 * Copyright (c) 1996,2008 Oracle.  All rights reserved.
5 */
6/*
7 * Copyright (c) 1990, 1993
8 *	Margo Seltzer.  All rights reserved.
9 */
10/*
11 * Copyright (c) 1990, 1993
12 *	The Regents of the University of California.  All rights reserved.
13 *
14 * This code is derived from software contributed to Berkeley by
15 * Margo Seltzer.
16 *
17 * Redistribution and use in source and binary forms, with or without
18 * modification, are permitted provided that the following conditions
19 * are met:
20 * 1. Redistributions of source code must retain the above copyright
21 *    notice, this list of conditions and the following disclaimer.
22 * 2. Redistributions in binary form must reproduce the above copyright
23 *    notice, this list of conditions and the following disclaimer in the
24 *    documentation and/or other materials provided with the distribution.
25 * 3. Neither the name of the University nor the names of its contributors
26 *    may be used to endorse or promote products derived from this software
27 *    without specific prior written permission.
28 *
29 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
30 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
33 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
34 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
35 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
36 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
37 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
38 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
39 * SUCH DAMAGE.
40 *
41 * $Id: hash_func.c,v 12.7 2008/01/08 20:58:34 bostic Exp $
42 */
43
44#include "db_config.h"
45
46#include "db_int.h"
47#include "dbinc/db_page.h"
48#include "dbinc/hash.h"
49
50/*
51 * __ham_func2 --
52 *	Phong Vo's linear congruential hash.
53 *
54 * PUBLIC: u_int32_t __ham_func2 __P((DB *, const void *, u_int32_t));
55 */
56#define	DCHARHASH(h, c)	((h) = 0x63c63cd9*(h) + 0x9c39c33d + (c))
57
58u_int32_t
59__ham_func2(dbp, key, len)
60	DB *dbp;
61	const void *key;
62	u_int32_t len;
63{
64	const u_int8_t *e, *k;
65	u_int32_t h;
66	u_int8_t c;
67
68	if (dbp != NULL)
69		COMPQUIET(dbp, NULL);
70
71	k = key;
72	e = k + len;
73	for (h = 0; k != e;) {
74		c = *k++;
75		if (!c && k > e)
76			break;
77		DCHARHASH(h, c);
78	}
79	return (h);
80}
81
82/*
83 * __ham_func3 --
84 *	Ozan Yigit's original sdbm hash.
85 *
86 * Ugly, but fast.  Break the string up into 8 byte units.  On the first time
87 * through the loop get the "leftover bytes" (strlen % 8).  On every other
88 * iteration, perform 8 HASHC's so we handle all 8 bytes.  Essentially, this
89 * saves us 7 cmp & branch instructions.
90 *
91 * PUBLIC: u_int32_t __ham_func3 __P((DB *, const void *, u_int32_t));
92 */
93u_int32_t
94__ham_func3(dbp, key, len)
95	DB *dbp;
96	const void *key;
97	u_int32_t len;
98{
99	const u_int8_t *k;
100	u_int32_t n, loop;
101
102	if (dbp != NULL)
103		COMPQUIET(dbp, NULL);
104
105	if (len == 0)
106		return (0);
107
108#define	HASHC	n = *k++ + 65599 * n
109	n = 0;
110	k = key;
111
112	loop = (len + 8 - 1) >> 3;
113	switch (len & (8 - 1)) {
114	case 0:
115		do {
116			HASHC;
117	case 7:
118			HASHC;
119	case 6:
120			HASHC;
121	case 5:
122			HASHC;
123	case 4:
124			HASHC;
125	case 3:
126			HASHC;
127	case 2:
128			HASHC;
129	case 1:
130			HASHC;
131		} while (--loop);
132	}
133	return (n);
134}
135
136/*
137 * __ham_func4 --
138 *	Chris Torek's hash function.  Although this function performs only
139 *	slightly worse than __ham_func5 on strings, it performs horribly on
140 *	numbers.
141 *
142 * PUBLIC: u_int32_t __ham_func4 __P((DB *, const void *, u_int32_t));
143 */
144u_int32_t
145__ham_func4(dbp, key, len)
146	DB *dbp;
147	const void *key;
148	u_int32_t len;
149{
150	const u_int8_t *k;
151	u_int32_t h, loop;
152
153	if (dbp != NULL)
154		COMPQUIET(dbp, NULL);
155
156	if (len == 0)
157		return (0);
158
159#define	HASH4a	h = (h << 5) - h + *k++;
160#define	HASH4b	h = (h << 5) + h + *k++;
161#define	HASH4	HASH4b
162	h = 0;
163	k = key;
164
165	loop = (len + 8 - 1) >> 3;
166	switch (len & (8 - 1)) {
167	case 0:
168		do {
169			HASH4;
170	case 7:
171			HASH4;
172	case 6:
173			HASH4;
174	case 5:
175			HASH4;
176	case 4:
177			HASH4;
178	case 3:
179			HASH4;
180	case 2:
181			HASH4;
182	case 1:
183			HASH4;
184		} while (--loop);
185	}
186	return (h);
187}
188
189/*
190 * Fowler/Noll/Vo hash
191 *
192 * The basis of the hash algorithm was taken from an idea sent by email to the
193 * IEEE Posix P1003.2 mailing list from Phong Vo (kpv@research.att.com) and
194 * Glenn Fowler (gsf@research.att.com).  Landon Curt Noll (chongo@toad.com)
195 * later improved on their algorithm.
196 *
197 * The magic is in the interesting relationship between the special prime
198 * 16777619 (2^24 + 403) and 2^32 and 2^8.
199 *
200 * This hash produces the fewest collisions of any function that we've seen so
201 * far, and works well on both numbers and strings.
202 *
203 * PUBLIC: u_int32_t __ham_func5 __P((DB *, const void *, u_int32_t));
204 */
205u_int32_t
206__ham_func5(dbp, key, len)
207	DB *dbp;
208	const void *key;
209	u_int32_t len;
210{
211	const u_int8_t *k, *e;
212	u_int32_t h;
213
214	if (dbp != NULL)
215		COMPQUIET(dbp, NULL);
216
217	k = key;
218	e = k + len;
219	for (h = 0; k < e; ++k) {
220		h *= 16777619;
221		h ^= *k;
222	}
223	return (h);
224}
225
226/*
227 * __ham_test --
228 *
229 * PUBLIC: u_int32_t __ham_test __P((DB *, const void *, u_int32_t));
230 */
231u_int32_t
232__ham_test(dbp, key, len)
233	DB *dbp;
234	const void *key;
235	u_int32_t len;
236{
237	COMPQUIET(dbp, NULL);
238	COMPQUIET(len, 0);
239	return ((u_int32_t)*(char *)key);
240}
241