1/*
2 * $Id: linkhash.c,v 1.4 2006/01/26 02:16:28 mclark Exp $
3 *
4 * Copyright (c) 2004, 2005 Metaparadigm Pte. Ltd.
5 * Michael Clark <michael@metaparadigm.com>
6 * Copyright (c) 2009 Hewlett-Packard Development Company, L.P.
7 *
8 * This library is free software; you can redistribute it and/or modify
9 * it under the terms of the MIT license. See COPYING for details.
10 *
11 */
12
13#include <stdio.h>
14#include <string.h>
15#include <stdlib.h>
16#include <stdarg.h>
17#include <stddef.h>
18#include <limits.h>
19
20#ifdef HAVE_ENDIAN_H
21# include <endian.h>    /* attempt to define endianness */
22#endif
23
24#include "random_seed.h"
25#include "linkhash.h"
26
27void lh_abort(const char *msg, ...)
28{
29	va_list ap;
30	va_start(ap, msg);
31	vprintf(msg, ap);
32	va_end(ap);
33	exit(1);
34}
35
36unsigned long lh_ptr_hash(const void *k)
37{
38	/* CAW: refactored to be 64bit nice */
39	return (unsigned long)((((ptrdiff_t)k * LH_PRIME) >> 4) & ULONG_MAX);
40}
41
42int lh_ptr_equal(const void *k1, const void *k2)
43{
44	return (k1 == k2);
45}
46
47/*
48 * hashlittle from lookup3.c, by Bob Jenkins, May 2006, Public Domain.
49 * http://burtleburtle.net/bob/c/lookup3.c
50 * minor modifications to make functions static so no symbols are exported
51 * minor mofifications to compile with -Werror
52 */
53
54/*
55-------------------------------------------------------------------------------
56lookup3.c, by Bob Jenkins, May 2006, Public Domain.
57
58These are functions for producing 32-bit hashes for hash table lookup.
59hashword(), hashlittle(), hashlittle2(), hashbig(), mix(), and final()
60are externally useful functions.  Routines to test the hash are included
61if SELF_TEST is defined.  You can use this free for any purpose.  It's in
62the public domain.  It has no warranty.
63
64You probably want to use hashlittle().  hashlittle() and hashbig()
65hash byte arrays.  hashlittle() is is faster than hashbig() on
66little-endian machines.  Intel and AMD are little-endian machines.
67On second thought, you probably want hashlittle2(), which is identical to
68hashlittle() except it returns two 32-bit hashes for the price of one.
69You could implement hashbig2() if you wanted but I haven't bothered here.
70
71If you want to find a hash of, say, exactly 7 integers, do
72  a = i1;  b = i2;  c = i3;
73  mix(a,b,c);
74  a += i4; b += i5; c += i6;
75  mix(a,b,c);
76  a += i7;
77  final(a,b,c);
78then use c as the hash value.  If you have a variable length array of
794-byte integers to hash, use hashword().  If you have a byte array (like
80a character string), use hashlittle().  If you have several byte arrays, or
81a mix of things, see the comments above hashlittle().
82
83Why is this so big?  I read 12 bytes at a time into 3 4-byte integers,
84then mix those integers.  This is fast (you can do a lot more thorough
85mixing with 12*3 instructions on 3 integers than you can with 3 instructions
86on 1 byte), but shoehorning those bytes into integers efficiently is messy.
87-------------------------------------------------------------------------------
88*/
89
90/*
91 * My best guess at if you are big-endian or little-endian.  This may
92 * need adjustment.
93 */
94#if (defined(__BYTE_ORDER) && defined(__LITTLE_ENDIAN) && \
95     __BYTE_ORDER == __LITTLE_ENDIAN) || \
96    (defined(i386) || defined(__i386__) || defined(__i486__) || \
97     defined(__i586__) || defined(__i686__) || defined(vax) || defined(MIPSEL))
98# define HASH_LITTLE_ENDIAN 1
99# define HASH_BIG_ENDIAN 0
100#elif (defined(__BYTE_ORDER) && defined(__BIG_ENDIAN) && \
101       __BYTE_ORDER == __BIG_ENDIAN) || \
102      (defined(sparc) || defined(POWERPC) || defined(mc68000) || defined(sel))
103# define HASH_LITTLE_ENDIAN 0
104# define HASH_BIG_ENDIAN 1
105#else
106# define HASH_LITTLE_ENDIAN 0
107# define HASH_BIG_ENDIAN 0
108#endif
109
110#define hashsize(n) ((uint32_t)1<<(n))
111#define hashmask(n) (hashsize(n)-1)
112#define rot(x,k) (((x)<<(k)) | ((x)>>(32-(k))))
113
114/*
115-------------------------------------------------------------------------------
116mix -- mix 3 32-bit values reversibly.
117
118This is reversible, so any information in (a,b,c) before mix() is
119still in (a,b,c) after mix().
120
121If four pairs of (a,b,c) inputs are run through mix(), or through
122mix() in reverse, there are at least 32 bits of the output that
123are sometimes the same for one pair and different for another pair.
124This was tested for:
125* pairs that differed by one bit, by two bits, in any combination
126  of top bits of (a,b,c), or in any combination of bottom bits of
127  (a,b,c).
128* "differ" is defined as +, -, ^, or ~^.  For + and -, I transformed
129  the output delta to a Gray code (a^(a>>1)) so a string of 1's (as
130  is commonly produced by subtraction) look like a single 1-bit
131  difference.
132* the base values were pseudorandom, all zero but one bit set, or
133  all zero plus a counter that starts at zero.
134
135Some k values for my "a-=c; a^=rot(c,k); c+=b;" arrangement that
136satisfy this are
137    4  6  8 16 19  4
138    9 15  3 18 27 15
139   14  9  3  7 17  3
140Well, "9 15 3 18 27 15" didn't quite get 32 bits diffing
141for "differ" defined as + with a one-bit base and a two-bit delta.  I
142used http://burtleburtle.net/bob/hash/avalanche.html to choose
143the operations, constants, and arrangements of the variables.
144
145This does not achieve avalanche.  There are input bits of (a,b,c)
146that fail to affect some output bits of (a,b,c), especially of a.  The
147most thoroughly mixed value is c, but it doesn't really even achieve
148avalanche in c.
149
150This allows some parallelism.  Read-after-writes are good at doubling
151the number of bits affected, so the goal of mixing pulls in the opposite
152direction as the goal of parallelism.  I did what I could.  Rotates
153seem to cost as much as shifts on every machine I could lay my hands
154on, and rotates are much kinder to the top and bottom bits, so I used
155rotates.
156-------------------------------------------------------------------------------
157*/
158#define mix(a,b,c) \
159{ \
160  a -= c;  a ^= rot(c, 4);  c += b; \
161  b -= a;  b ^= rot(a, 6);  a += c; \
162  c -= b;  c ^= rot(b, 8);  b += a; \
163  a -= c;  a ^= rot(c,16);  c += b; \
164  b -= a;  b ^= rot(a,19);  a += c; \
165  c -= b;  c ^= rot(b, 4);  b += a; \
166}
167
168/*
169-------------------------------------------------------------------------------
170final -- final mixing of 3 32-bit values (a,b,c) into c
171
172Pairs of (a,b,c) values differing in only a few bits will usually
173produce values of c that look totally different.  This was tested for
174* pairs that differed by one bit, by two bits, in any combination
175  of top bits of (a,b,c), or in any combination of bottom bits of
176  (a,b,c).
177* "differ" is defined as +, -, ^, or ~^.  For + and -, I transformed
178  the output delta to a Gray code (a^(a>>1)) so a string of 1's (as
179  is commonly produced by subtraction) look like a single 1-bit
180  difference.
181* the base values were pseudorandom, all zero but one bit set, or
182  all zero plus a counter that starts at zero.
183
184These constants passed:
185 14 11 25 16 4 14 24
186 12 14 25 16 4 14 24
187and these came close:
188  4  8 15 26 3 22 24
189 10  8 15 26 3 22 24
190 11  8 15 26 3 22 24
191-------------------------------------------------------------------------------
192*/
193#define final(a,b,c) \
194{ \
195  c ^= b; c -= rot(b,14); \
196  a ^= c; a -= rot(c,11); \
197  b ^= a; b -= rot(a,25); \
198  c ^= b; c -= rot(b,16); \
199  a ^= c; a -= rot(c,4);  \
200  b ^= a; b -= rot(a,14); \
201  c ^= b; c -= rot(b,24); \
202}
203
204
205/*
206-------------------------------------------------------------------------------
207hashlittle() -- hash a variable-length key into a 32-bit value
208  k       : the key (the unaligned variable-length array of bytes)
209  length  : the length of the key, counting by bytes
210  initval : can be any 4-byte value
211Returns a 32-bit value.  Every bit of the key affects every bit of
212the return value.  Two keys differing by one or two bits will have
213totally different hash values.
214
215The best hash table sizes are powers of 2.  There is no need to do
216mod a prime (mod is sooo slow!).  If you need less than 32 bits,
217use a bitmask.  For example, if you need only 10 bits, do
218  h = (h & hashmask(10));
219In which case, the hash table should have hashsize(10) elements.
220
221If you are hashing n strings (uint8_t **)k, do it like this:
222  for (i=0, h=0; i<n; ++i) h = hashlittle( k[i], len[i], h);
223
224By Bob Jenkins, 2006.  bob_jenkins@burtleburtle.net.  You may use this
225code any way you wish, private, educational, or commercial.  It's free.
226
227Use for hash table lookup, or anything where one collision in 2^^32 is
228acceptable.  Do NOT use for cryptographic purposes.
229-------------------------------------------------------------------------------
230*/
231
232static uint32_t hashlittle( const void *key, size_t length, uint32_t initval)
233{
234  uint32_t a,b,c;                                          /* internal state */
235  union { const void *ptr; size_t i; } u;     /* needed for Mac Powerbook G4 */
236
237  /* Set up the internal state */
238  a = b = c = 0xdeadbeef + ((uint32_t)length) + initval;
239
240  u.ptr = key;
241  if (HASH_LITTLE_ENDIAN && ((u.i & 0x3) == 0)) {
242    const uint32_t *k = (const uint32_t *)key;         /* read 32-bit chunks */
243
244    /*------ all but last block: aligned reads and affect 32 bits of (a,b,c) */
245    while (length > 12)
246    {
247      a += k[0];
248      b += k[1];
249      c += k[2];
250      mix(a,b,c);
251      length -= 12;
252      k += 3;
253    }
254
255    /*----------------------------- handle the last (probably partial) block */
256    /*
257     * "k[2]&0xffffff" actually reads beyond the end of the string, but
258     * then masks off the part it's not allowed to read.  Because the
259     * string is aligned, the masked-off tail is in the same word as the
260     * rest of the string.  Every machine with memory protection I've seen
261     * does it on word boundaries, so is OK with this.  But VALGRIND will
262     * still catch it and complain.  The masking trick does make the hash
263     * noticably faster for short strings (like English words).
264     */
265#ifndef VALGRIND
266
267    switch(length)
268    {
269    case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
270    case 11: c+=k[2]&0xffffff; b+=k[1]; a+=k[0]; break;
271    case 10: c+=k[2]&0xffff; b+=k[1]; a+=k[0]; break;
272    case 9 : c+=k[2]&0xff; b+=k[1]; a+=k[0]; break;
273    case 8 : b+=k[1]; a+=k[0]; break;
274    case 7 : b+=k[1]&0xffffff; a+=k[0]; break;
275    case 6 : b+=k[1]&0xffff; a+=k[0]; break;
276    case 5 : b+=k[1]&0xff; a+=k[0]; break;
277    case 4 : a+=k[0]; break;
278    case 3 : a+=k[0]&0xffffff; break;
279    case 2 : a+=k[0]&0xffff; break;
280    case 1 : a+=k[0]&0xff; break;
281    case 0 : return c;              /* zero length strings require no mixing */
282    }
283
284#else /* make valgrind happy */
285
286    const uint8_t  *k8 = (const uint8_t *)k;
287    switch(length)
288    {
289    case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
290    case 11: c+=((uint32_t)k8[10])<<16;  /* fall through */
291    case 10: c+=((uint32_t)k8[9])<<8;    /* fall through */
292    case 9 : c+=k8[8];                   /* fall through */
293    case 8 : b+=k[1]; a+=k[0]; break;
294    case 7 : b+=((uint32_t)k8[6])<<16;   /* fall through */
295    case 6 : b+=((uint32_t)k8[5])<<8;    /* fall through */
296    case 5 : b+=k8[4];                   /* fall through */
297    case 4 : a+=k[0]; break;
298    case 3 : a+=((uint32_t)k8[2])<<16;   /* fall through */
299    case 2 : a+=((uint32_t)k8[1])<<8;    /* fall through */
300    case 1 : a+=k8[0]; break;
301    case 0 : return c;
302    }
303
304#endif /* !valgrind */
305
306  } else if (HASH_LITTLE_ENDIAN && ((u.i & 0x1) == 0)) {
307    const uint16_t *k = (const uint16_t *)key;         /* read 16-bit chunks */
308    const uint8_t  *k8;
309
310    /*--------------- all but last block: aligned reads and different mixing */
311    while (length > 12)
312    {
313      a += k[0] + (((uint32_t)k[1])<<16);
314      b += k[2] + (((uint32_t)k[3])<<16);
315      c += k[4] + (((uint32_t)k[5])<<16);
316      mix(a,b,c);
317      length -= 12;
318      k += 6;
319    }
320
321    /*----------------------------- handle the last (probably partial) block */
322    k8 = (const uint8_t *)k;
323    switch(length)
324    {
325    case 12: c+=k[4]+(((uint32_t)k[5])<<16);
326             b+=k[2]+(((uint32_t)k[3])<<16);
327             a+=k[0]+(((uint32_t)k[1])<<16);
328             break;
329    case 11: c+=((uint32_t)k8[10])<<16;     /* fall through */
330    case 10: c+=k[4];
331             b+=k[2]+(((uint32_t)k[3])<<16);
332             a+=k[0]+(((uint32_t)k[1])<<16);
333             break;
334    case 9 : c+=k8[8];                      /* fall through */
335    case 8 : b+=k[2]+(((uint32_t)k[3])<<16);
336             a+=k[0]+(((uint32_t)k[1])<<16);
337             break;
338    case 7 : b+=((uint32_t)k8[6])<<16;      /* fall through */
339    case 6 : b+=k[2];
340             a+=k[0]+(((uint32_t)k[1])<<16);
341             break;
342    case 5 : b+=k8[4];                      /* fall through */
343    case 4 : a+=k[0]+(((uint32_t)k[1])<<16);
344             break;
345    case 3 : a+=((uint32_t)k8[2])<<16;      /* fall through */
346    case 2 : a+=k[0];
347             break;
348    case 1 : a+=k8[0];
349             break;
350    case 0 : return c;                     /* zero length requires no mixing */
351    }
352
353  } else {                        /* need to read the key one byte at a time */
354    const uint8_t *k = (const uint8_t *)key;
355
356    /*--------------- all but the last block: affect some 32 bits of (a,b,c) */
357    while (length > 12)
358    {
359      a += k[0];
360      a += ((uint32_t)k[1])<<8;
361      a += ((uint32_t)k[2])<<16;
362      a += ((uint32_t)k[3])<<24;
363      b += k[4];
364      b += ((uint32_t)k[5])<<8;
365      b += ((uint32_t)k[6])<<16;
366      b += ((uint32_t)k[7])<<24;
367      c += k[8];
368      c += ((uint32_t)k[9])<<8;
369      c += ((uint32_t)k[10])<<16;
370      c += ((uint32_t)k[11])<<24;
371      mix(a,b,c);
372      length -= 12;
373      k += 12;
374    }
375
376    /*-------------------------------- last block: affect all 32 bits of (c) */
377    switch(length)                   /* all the case statements fall through */
378    {
379    case 12: c+=((uint32_t)k[11])<<24;
380    case 11: c+=((uint32_t)k[10])<<16;
381    case 10: c+=((uint32_t)k[9])<<8;
382    case 9 : c+=k[8];
383    case 8 : b+=((uint32_t)k[7])<<24;
384    case 7 : b+=((uint32_t)k[6])<<16;
385    case 6 : b+=((uint32_t)k[5])<<8;
386    case 5 : b+=k[4];
387    case 4 : a+=((uint32_t)k[3])<<24;
388    case 3 : a+=((uint32_t)k[2])<<16;
389    case 2 : a+=((uint32_t)k[1])<<8;
390    case 1 : a+=k[0];
391             break;
392    case 0 : return c;
393    }
394  }
395
396  final(a,b,c);
397  return c;
398}
399
400unsigned long lh_char_hash(const void *k)
401{
402	static volatile int random_seed = -1;
403
404	if (random_seed == -1) {
405		int seed;
406		/* we can't use -1 as it is the unitialized sentinel */
407		while ((seed = json_c_get_random_seed()) == -1);
408#if defined __GNUC__ && defined __GCC_HAVE_SYNC_COMPARE_AND_SWAP_4
409		__sync_val_compare_and_swap(&random_seed, -1, seed);
410#elif defined _MSC_VER
411		InterlockedCompareExchange(&random_seed, seed, -1);
412#else
413#warning "racy random seed initializtion if used by multiple threads"
414		random_seed = seed; /* potentially racy */
415#endif
416	}
417
418	return hashlittle((const char*)k, strlen((const char*)k), random_seed);
419}
420
421int lh_char_equal(const void *k1, const void *k2)
422{
423	return (strcmp((const char*)k1, (const char*)k2) == 0);
424}
425
426struct lh_table* lh_table_new(int size, const char *name,
427			      lh_entry_free_fn *free_fn,
428			      lh_hash_fn *hash_fn,
429			      lh_equal_fn *equal_fn)
430{
431	int i;
432	struct lh_table *t;
433
434	t = (struct lh_table*)calloc(1, sizeof(struct lh_table));
435	if(!t) lh_abort("lh_table_new: calloc failed\n");
436	t->count = 0;
437	t->size = size;
438	t->name = name;
439	t->table = (struct lh_entry*)calloc(size, sizeof(struct lh_entry));
440	if(!t->table) lh_abort("lh_table_new: calloc failed\n");
441	t->free_fn = free_fn;
442	t->hash_fn = hash_fn;
443	t->equal_fn = equal_fn;
444	for(i = 0; i < size; i++) t->table[i].k = LH_EMPTY;
445	return t;
446}
447
448struct lh_table* lh_kchar_table_new(int size, const char *name,
449				    lh_entry_free_fn *free_fn)
450{
451	return lh_table_new(size, name, free_fn, lh_char_hash, lh_char_equal);
452}
453
454struct lh_table* lh_kptr_table_new(int size, const char *name,
455				   lh_entry_free_fn *free_fn)
456{
457	return lh_table_new(size, name, free_fn, lh_ptr_hash, lh_ptr_equal);
458}
459
460void lh_table_resize(struct lh_table *t, int new_size)
461{
462	struct lh_table *new_t;
463	struct lh_entry *ent;
464
465	new_t = lh_table_new(new_size, t->name, NULL, t->hash_fn, t->equal_fn);
466	ent = t->head;
467	while(ent) {
468		lh_table_insert(new_t, ent->k, ent->v);
469		ent = ent->next;
470	}
471	free(t->table);
472	t->table = new_t->table;
473	t->size = new_size;
474	t->head = new_t->head;
475	t->tail = new_t->tail;
476	t->resizes++;
477	free(new_t);
478}
479
480void lh_table_free(struct lh_table *t)
481{
482	struct lh_entry *c;
483	for(c = t->head; c != NULL; c = c->next) {
484		if(t->free_fn) {
485			t->free_fn(c);
486		}
487	}
488	free(t->table);
489	free(t);
490}
491
492
493int lh_table_insert(struct lh_table *t, void *k, const void *v)
494{
495	unsigned long h, n;
496
497	t->inserts++;
498	if(t->count >= t->size * LH_LOAD_FACTOR) lh_table_resize(t, t->size * 2);
499
500	h = t->hash_fn(k);
501	n = h % t->size;
502
503	while( 1 ) {
504		if(t->table[n].k == LH_EMPTY || t->table[n].k == LH_FREED) break;
505		t->collisions++;
506		if ((int)++n == t->size) n = 0;
507	}
508
509	t->table[n].k = k;
510	t->table[n].v = v;
511	t->count++;
512
513	if(t->head == NULL) {
514		t->head = t->tail = &t->table[n];
515		t->table[n].next = t->table[n].prev = NULL;
516	} else {
517		t->tail->next = &t->table[n];
518		t->table[n].prev = t->tail;
519		t->table[n].next = NULL;
520		t->tail = &t->table[n];
521	}
522
523	return 0;
524}
525
526
527struct lh_entry* lh_table_lookup_entry(struct lh_table *t, const void *k)
528{
529	unsigned long h = t->hash_fn(k);
530	unsigned long n = h % t->size;
531	int count = 0;
532
533	t->lookups++;
534	while( count < t->size ) {
535		if(t->table[n].k == LH_EMPTY) return NULL;
536		if(t->table[n].k != LH_FREED &&
537		   t->equal_fn(t->table[n].k, k)) return &t->table[n];
538		if ((int)++n == t->size) n = 0;
539		count++;
540	}
541	return NULL;
542}
543
544
545const void* lh_table_lookup(struct lh_table *t, const void *k)
546{
547	void *result;
548	lh_table_lookup_ex(t, k, &result);
549	return result;
550}
551
552json_bool lh_table_lookup_ex(struct lh_table* t, const void* k, void **v)
553{
554	struct lh_entry *e = lh_table_lookup_entry(t, k);
555	if (e != NULL) {
556		if (v != NULL) *v = (void *)e->v;
557		return TRUE; /* key found */
558	}
559	if (v != NULL) *v = NULL;
560	return FALSE; /* key not found */
561}
562
563int lh_table_delete_entry(struct lh_table *t, struct lh_entry *e)
564{
565	ptrdiff_t n = (ptrdiff_t)(e - t->table); /* CAW: fixed to be 64bit nice, still need the crazy negative case... */
566
567	/* CAW: this is bad, really bad, maybe stack goes other direction on this machine... */
568	if(n < 0) { return -2; }
569
570	if(t->table[n].k == LH_EMPTY || t->table[n].k == LH_FREED) return -1;
571	t->count--;
572	if(t->free_fn) t->free_fn(e);
573	t->table[n].v = NULL;
574	t->table[n].k = LH_FREED;
575	if(t->tail == &t->table[n] && t->head == &t->table[n]) {
576		t->head = t->tail = NULL;
577	} else if (t->head == &t->table[n]) {
578		t->head->next->prev = NULL;
579		t->head = t->head->next;
580	} else if (t->tail == &t->table[n]) {
581		t->tail->prev->next = NULL;
582		t->tail = t->tail->prev;
583	} else {
584		t->table[n].prev->next = t->table[n].next;
585		t->table[n].next->prev = t->table[n].prev;
586	}
587	t->table[n].next = t->table[n].prev = NULL;
588	return 0;
589}
590
591
592int lh_table_delete(struct lh_table *t, const void *k)
593{
594	struct lh_entry *e = lh_table_lookup_entry(t, k);
595	if(!e) return -1;
596	return lh_table_delete_entry(t, e);
597}
598
599int lh_table_length(struct lh_table *t)
600{
601	return t->count;
602}
603