hash.c revision 1.1
1/*	$NetBSD: hash.c,v 1.1 2018/08/12 12:08:24 christos Exp $	*/
2
3/*
4 * Copyright (C) Internet Systems Consortium, Inc. ("ISC")
5 *
6 * This Source Code Form is subject to the terms of the Mozilla Public
7 * License, v. 2.0. If a copy of the MPL was not distributed with this
8 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 *
10 * See the COPYRIGHT file distributed with this work for additional
11 * information regarding copyright ownership.
12 */
13
14/*! \file
15 * Some portion of this code was derived from universal hash function
16 * libraries of Rice University.
17\section license UH Universal Hashing Library
18
19Copyright ((c)) 2002, Rice University
20All rights reserved.
21
22Redistribution and use in source and binary forms, with or without
23modification, are permitted provided that the following conditions are
24met:
25
26    * Redistributions of source code must retain the above copyright
27    notice, this list of conditions and the following disclaimer.
28
29    * Redistributions in binary form must reproduce the above
30    copyright notice, this list of conditions and the following
31    disclaimer in the documentation and/or other materials provided
32    with the distribution.
33
34    * Neither the name of Rice University (RICE) nor the names of its
35    contributors may be used to endorse or promote products derived
36    from this software without specific prior written permission.
37
38
39This software is provided by RICE and the contributors on an "as is"
40basis, without any representations or warranties of any kind, express
41or implied including, but not limited to, representations or
42warranties of non-infringement, merchantability or fitness for a
43particular purpose. In no event shall RICE or contributors be liable
44for any direct, indirect, incidental, special, exemplary, or
45consequential damages (including, but not limited to, procurement of
46substitute goods or services; loss of use, data, or profits; or
47business interruption) however caused and on any theory of liability,
48whether in contract, strict liability, or tort (including negligence
49or otherwise) arising in any way out of the use of this software, even
50if advised of the possibility of such damage.
51*/
52
53#include <config.h>
54
55#include <isc/entropy.h>
56#include <isc/hash.h>
57#include <isc/mem.h>
58#include <isc/magic.h>
59#include <isc/mutex.h>
60#include <isc/once.h>
61#include <isc/random.h>
62#include <isc/refcount.h>
63#include <isc/string.h>
64#include <isc/util.h>
65
66#define HASH_MAGIC		ISC_MAGIC('H', 'a', 's', 'h')
67#define VALID_HASH(h)		ISC_MAGIC_VALID((h), HASH_MAGIC)
68
69/*%
70 * A large 32-bit prime number that specifies the range of the hash output.
71 */
72#define PRIME32 0xFFFFFFFB              /* 2^32 -  5 */
73
74/*@{*/
75/*%
76 * Types of random seed and hash accumulator.  Perhaps they can be system
77 * dependent.
78 */
79typedef isc_uint32_t hash_accum_t;
80typedef isc_uint16_t hash_random_t;
81/*@}*/
82
83/*% isc hash structure */
84struct isc_hash {
85	unsigned int	magic;
86	isc_mem_t	*mctx;
87	isc_mutex_t	lock;
88	isc_boolean_t	initialized;
89	isc_refcount_t	refcnt;
90	isc_entropy_t	*entropy; /*%< entropy source */
91	size_t		limit;	/*%< upper limit of key length */
92	size_t		vectorlen; /*%< size of the vector below */
93	hash_random_t	*rndvector; /*%< random vector for universal hashing */
94};
95
96static isc_mutex_t createlock;
97static isc_once_t once = ISC_ONCE_INIT;
98
99LIBISC_EXTERNAL_DATA isc_hash_t *isc_hashctx = NULL;
100
101static unsigned char maptolower[] = {
102	0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
103	0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
104	0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
105	0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
106	0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27,
107	0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f,
108	0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
109	0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f,
110	0x40, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
111	0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f,
112	0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
113	0x78, 0x79, 0x7a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f,
114	0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
115	0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f,
116	0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
117	0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f,
118	0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
119	0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
120	0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
121	0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f,
122	0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7,
123	0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
124	0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7,
125	0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf,
126	0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7,
127	0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf,
128	0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7,
129	0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf,
130	0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7,
131	0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef,
132	0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7,
133	0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff
134};
135
136isc_result_t
137isc_hash_ctxcreate(isc_mem_t *mctx, isc_entropy_t *entropy,
138		   size_t limit, isc_hash_t **hctxp)
139{
140	isc_result_t result;
141	isc_hash_t *hctx;
142	size_t vlen;
143	hash_random_t *rv;
144	hash_accum_t overflow_limit;
145
146	REQUIRE(mctx != NULL);
147	REQUIRE(hctxp != NULL && *hctxp == NULL);
148
149	/*
150	 * Overflow check.  Since our implementation only does a modulo
151	 * operation at the last stage of hash calculation, the accumulator
152	 * must not overflow.
153	 */
154	overflow_limit =
155		1 << (((sizeof(hash_accum_t) - sizeof(hash_random_t))) * 8);
156	if (overflow_limit < (limit + 1) * 0xff)
157		return (ISC_R_RANGE);
158
159	hctx = isc_mem_get(mctx, sizeof(isc_hash_t));
160	if (hctx == NULL)
161		return (ISC_R_NOMEMORY);
162
163	vlen = sizeof(hash_random_t) * (limit + 1);
164	rv = isc_mem_get(mctx, vlen);
165	if (rv == NULL) {
166		result = ISC_R_NOMEMORY;
167		goto errout;
168	}
169
170	/*
171	 * We need a lock.
172	 */
173	result = isc_mutex_init(&hctx->lock);
174	if (result != ISC_R_SUCCESS)
175		goto errout;
176
177	/*
178	 * From here down, no failures will/can occur.
179	 */
180	hctx->magic = HASH_MAGIC;
181	hctx->mctx = NULL;
182	isc_mem_attach(mctx, &hctx->mctx);
183	hctx->initialized = ISC_FALSE;
184	result = isc_refcount_init(&hctx->refcnt, 1);
185	if (result != ISC_R_SUCCESS)
186		goto cleanup_lock;
187	hctx->entropy = NULL;
188	hctx->limit = limit;
189	hctx->vectorlen = vlen;
190	hctx->rndvector = rv;
191
192	if (entropy != NULL)
193		isc_entropy_attach(entropy, &hctx->entropy);
194
195	*hctxp = hctx;
196	return (ISC_R_SUCCESS);
197
198 cleanup_lock:
199	DESTROYLOCK(&hctx->lock);
200 errout:
201	isc_mem_put(mctx, hctx, sizeof(isc_hash_t));
202	if (rv != NULL)
203		isc_mem_put(mctx, rv, vlen);
204
205	return (result);
206}
207
208static void
209initialize_lock(void) {
210	RUNTIME_CHECK(isc_mutex_init(&createlock) == ISC_R_SUCCESS);
211}
212
213isc_result_t
214isc_hash_create(isc_mem_t *mctx, isc_entropy_t *entropy, size_t limit) {
215	isc_result_t result = ISC_R_SUCCESS;
216
217	REQUIRE(mctx != NULL);
218	INSIST(isc_hashctx == NULL);
219
220	RUNTIME_CHECK(isc_once_do(&once, initialize_lock) == ISC_R_SUCCESS);
221
222	LOCK(&createlock);
223
224	if (isc_hashctx == NULL)
225		result = isc_hash_ctxcreate(mctx, entropy, limit,
226					    &isc_hashctx);
227
228	UNLOCK(&createlock);
229
230	return (result);
231}
232
233void
234isc_hash_ctxinit(isc_hash_t *hctx) {
235	LOCK(&hctx->lock);
236
237	if (hctx->initialized == ISC_TRUE)
238		goto out;
239
240	if (hctx->entropy != NULL) {
241		isc_result_t result;
242
243		result = isc_entropy_getdata(hctx->entropy,
244					     hctx->rndvector,
245					     (unsigned int)hctx->vectorlen,
246					     NULL, 0);
247		INSIST(result == ISC_R_SUCCESS);
248	} else {
249		isc_uint32_t pr;
250		size_t i, copylen;
251		unsigned char *p;
252
253		p = (unsigned char *)hctx->rndvector;
254		for (i = 0; i < hctx->vectorlen; i += copylen, p += copylen) {
255			isc_random_get(&pr);
256			if (i + sizeof(pr) <= hctx->vectorlen)
257				copylen = sizeof(pr);
258			else
259				copylen = hctx->vectorlen - i;
260
261			memmove(p, &pr, copylen);
262		}
263		INSIST(p == (unsigned char *)hctx->rndvector +
264		       hctx->vectorlen);
265	}
266
267	hctx->initialized = ISC_TRUE;
268
269 out:
270	UNLOCK(&hctx->lock);
271}
272
273void
274isc_hash_init(void) {
275	INSIST(isc_hashctx != NULL && VALID_HASH(isc_hashctx));
276
277	isc_hash_ctxinit(isc_hashctx);
278}
279
280void
281isc_hash_ctxattach(isc_hash_t *hctx, isc_hash_t **hctxp) {
282	REQUIRE(VALID_HASH(hctx));
283	REQUIRE(hctxp != NULL && *hctxp == NULL);
284
285	isc_refcount_increment(&hctx->refcnt, NULL);
286	*hctxp = hctx;
287}
288
289static void
290destroy(isc_hash_t **hctxp) {
291	isc_hash_t *hctx;
292	isc_mem_t *mctx;
293
294	REQUIRE(hctxp != NULL && *hctxp != NULL);
295	hctx = *hctxp;
296	*hctxp = NULL;
297
298	LOCK(&hctx->lock);
299
300	isc_refcount_destroy(&hctx->refcnt);
301
302	mctx = hctx->mctx;
303	if (hctx->entropy != NULL)
304		isc_entropy_detach(&hctx->entropy);
305	if (hctx->rndvector != NULL)
306		isc_mem_put(mctx, hctx->rndvector, hctx->vectorlen);
307
308	UNLOCK(&hctx->lock);
309
310	DESTROYLOCK(&hctx->lock);
311
312	memset(hctx, 0, sizeof(isc_hash_t));
313	isc_mem_put(mctx, hctx, sizeof(isc_hash_t));
314	isc_mem_detach(&mctx);
315}
316
317void
318isc_hash_ctxdetach(isc_hash_t **hctxp) {
319	isc_hash_t *hctx;
320	unsigned int refs;
321
322	REQUIRE(hctxp != NULL && VALID_HASH(*hctxp));
323	hctx = *hctxp;
324
325	isc_refcount_decrement(&hctx->refcnt, &refs);
326	if (refs == 0)
327		destroy(&hctx);
328
329	*hctxp = NULL;
330}
331
332void
333isc_hash_destroy(void) {
334	unsigned int refs;
335
336	INSIST(isc_hashctx != NULL && VALID_HASH(isc_hashctx));
337
338	isc_refcount_decrement(&isc_hashctx->refcnt, &refs);
339	INSIST(refs == 0);
340
341	destroy(&isc_hashctx);
342}
343
344static inline unsigned int
345hash_calc(isc_hash_t *hctx, const unsigned char *key, unsigned int keylen,
346	  isc_boolean_t case_sensitive)
347{
348	hash_accum_t partial_sum = 0;
349	hash_random_t *p = hctx->rndvector;
350	unsigned int i = 0;
351
352	/* Make it sure that the hash context is initialized. */
353	if (hctx->initialized == ISC_FALSE)
354		isc_hash_ctxinit(hctx);
355
356	if (case_sensitive) {
357		for (i = 0; i < keylen; i++)
358			partial_sum += key[i] * (hash_accum_t)p[i];
359	} else {
360		for (i = 0; i < keylen; i++)
361			partial_sum += maptolower[key[i]] * (hash_accum_t)p[i];
362	}
363
364	partial_sum += p[i];
365
366	return ((unsigned int)(partial_sum % PRIME32));
367}
368
369unsigned int
370isc_hash_ctxcalc(isc_hash_t *hctx, const unsigned char *key,
371		 unsigned int keylen, isc_boolean_t case_sensitive)
372{
373	REQUIRE(hctx != NULL && VALID_HASH(hctx));
374	REQUIRE(keylen <= hctx->limit);
375
376	return (hash_calc(hctx, key, keylen, case_sensitive));
377}
378
379unsigned int
380isc_hash_calc(const unsigned char *key, unsigned int keylen,
381	      isc_boolean_t case_sensitive)
382{
383	INSIST(isc_hashctx != NULL && VALID_HASH(isc_hashctx));
384	REQUIRE(keylen <= isc_hashctx->limit);
385
386	return (hash_calc(isc_hashctx, key, keylen, case_sensitive));
387}
388
389void
390isc__hash_setvec(const isc_uint16_t *vec) {
391	int i;
392	hash_random_t *p;
393
394	if (isc_hashctx == NULL)
395		return;
396
397	p = isc_hashctx->rndvector;
398	for (i = 0; i < 256; i++) {
399		p[i] = vec[i];
400	}
401}
402
403static isc_uint32_t fnv_offset_basis;
404static isc_once_t fnv_once = ISC_ONCE_INIT;
405static isc_boolean_t fnv_initialized = ISC_FALSE;
406
407static void
408fnv_initialize(void) {
409	/*
410	 * This function should not leave fnv_offset_basis set to
411	 * 0. Also, after this function has been called, if it is called
412	 * again, it should not change fnv_offset_basis.
413	 */
414	while (fnv_offset_basis == 0) {
415		isc_random_get(&fnv_offset_basis);
416	}
417
418	fnv_initialized = ISC_TRUE;
419}
420
421const void *
422isc_hash_get_initializer(void) {
423	if (ISC_UNLIKELY(!fnv_initialized))
424		RUNTIME_CHECK(isc_once_do(&fnv_once, fnv_initialize) == ISC_R_SUCCESS);
425
426	return (&fnv_offset_basis);
427}
428
429void
430isc_hash_set_initializer(const void *initializer) {
431	REQUIRE(initializer != NULL);
432
433	/*
434	 * Ensure that fnv_initialize() is not called after
435	 * isc_hash_set_initializer() is called.
436	 */
437	if (ISC_UNLIKELY(!fnv_initialized))
438		RUNTIME_CHECK(isc_once_do(&fnv_once, fnv_initialize) == ISC_R_SUCCESS);
439
440	fnv_offset_basis = *((const unsigned int *) initializer);
441}
442
443isc_uint32_t
444isc_hash_function(const void *data, size_t length,
445		  isc_boolean_t case_sensitive,
446		  const isc_uint32_t *previous_hashp)
447{
448	isc_uint32_t hval;
449	const unsigned char *bp;
450	const unsigned char *be;
451
452	REQUIRE(length == 0 || data != NULL);
453
454	if (ISC_UNLIKELY(!fnv_initialized))
455		RUNTIME_CHECK(isc_once_do(&fnv_once, fnv_initialize) == ISC_R_SUCCESS);
456
457	hval = ISC_UNLIKELY(previous_hashp != NULL) ?
458		*previous_hashp : fnv_offset_basis;
459
460	if (length == 0)
461		return (hval);
462
463	bp = (const unsigned char *) data;
464	be = bp + length;
465
466	/*
467	 * Fowler-Noll-Vo FNV-1a hash function.
468	 *
469	 * NOTE: A random FNV offset basis is used by default to avoid
470	 * collision attacks as the hash function is reversible. This
471	 * makes the mapping non-deterministic, but the distribution in
472	 * the domain is still uniform.
473	 */
474
475	if (case_sensitive) {
476		while (bp <= be - 4) {
477			hval ^= bp[0];
478			hval *= 16777619;
479			hval ^= bp[1];
480			hval *= 16777619;
481			hval ^= bp[2];
482			hval *= 16777619;
483			hval ^= bp[3];
484			hval *= 16777619;
485			bp += 4;
486		}
487		while (bp < be) {
488			hval ^= *bp++;
489			hval *= 16777619;
490		}
491	} else {
492		while (bp <= be - 4) {
493			hval ^= maptolower[bp[0]];
494			hval *= 16777619;
495			hval ^= maptolower[bp[1]];
496			hval *= 16777619;
497			hval ^= maptolower[bp[2]];
498			hval *= 16777619;
499			hval ^= maptolower[bp[3]];
500			hval *= 16777619;
501			bp += 4;
502		}
503		while (bp < be) {
504			hval ^= maptolower[*bp++];
505			hval *= 16777619;
506		}
507	}
508
509	return (hval);
510}
511
512isc_uint32_t
513isc_hash_function_reverse(const void *data, size_t length,
514			  isc_boolean_t case_sensitive,
515			  const isc_uint32_t *previous_hashp)
516{
517	isc_uint32_t hval;
518	const unsigned char *bp;
519	const unsigned char *be;
520
521	REQUIRE(length == 0 || data != NULL);
522
523	if (ISC_UNLIKELY(!fnv_initialized))
524		RUNTIME_CHECK(isc_once_do(&fnv_once, fnv_initialize) == ISC_R_SUCCESS);
525
526	hval = ISC_UNLIKELY(previous_hashp != NULL) ?
527		*previous_hashp : fnv_offset_basis;
528
529	if (length == 0)
530		return (hval);
531
532	bp = (const unsigned char *) data;
533	be = bp + length;
534
535	/*
536	 * Fowler-Noll-Vo FNV-1a hash function.
537	 *
538	 * NOTE: A random FNV offset basis is used by default to avoid
539	 * collision attacks as the hash function is reversible. This
540	 * makes the mapping non-deterministic, but the distribution in
541	 * the domain is still uniform.
542	 */
543
544	if (case_sensitive) {
545		while (be >= bp + 4) {
546			be -= 4;
547			hval ^= be[3];
548			hval *= 16777619;
549			hval ^= be[2];
550			hval *= 16777619;
551			hval ^= be[1];
552			hval *= 16777619;
553			hval ^= be[0];
554			hval *= 16777619;
555		}
556		while (--be >= bp) {
557			hval ^= *be;
558			hval *= 16777619;
559		}
560	} else {
561		while (be >= bp + 4) {
562			be -= 4;
563			hval ^= maptolower[be[3]];
564			hval *= 16777619;
565			hval ^= maptolower[be[2]];
566			hval *= 16777619;
567			hval ^= maptolower[be[1]];
568			hval *= 16777619;
569			hval ^= maptolower[be[0]];
570			hval *= 16777619;
571		}
572		while (--be >= bp) {
573			hval ^= maptolower[*be];
574			hval *= 16777619;
575		}
576	}
577
578	return (hval);
579}
580