lookup3.c revision 307729
1/*
2  February 2013(Wouter) patch defines for BSD endianness, from Brad Smith.
3  January 2012(Wouter) added randomised initial value, fallout from 28c3.
4  March 2007(Wouter) adapted from lookup3.c original, add config.h include.
5     added #ifdef VALGRIND to remove 298,384,660 'unused variable k8' warnings.
6     added include of lookup3.h to check definitions match declarations.
7     removed include of stdint - config.h takes care of platform independence.
8  url http://burtleburtle.net/bob/hash/index.html.
9*/
10/*
11-------------------------------------------------------------------------------
12lookup3.c, by Bob Jenkins, May 2006, Public Domain.
13
14These are functions for producing 32-bit hashes for hash table lookup.
15hashword(), hashlittle(), hashlittle2(), hashbig(), mix(), and final()
16are externally useful functions.  Routines to test the hash are included
17if SELF_TEST is defined.  You can use this free for any purpose.  It's in
18the public domain.  It has no warranty.
19
20You probably want to use hashlittle().  hashlittle() and hashbig()
21hash byte arrays.  hashlittle() is is faster than hashbig() on
22little-endian machines.  Intel and AMD are little-endian machines.
23On second thought, you probably want hashlittle2(), which is identical to
24hashlittle() except it returns two 32-bit hashes for the price of one.
25You could implement hashbig2() if you wanted but I haven't bothered here.
26
27If you want to find a hash of, say, exactly 7 integers, do
28  a = i1;  b = i2;  c = i3;
29  mix(a,b,c);
30  a += i4; b += i5; c += i6;
31  mix(a,b,c);
32  a += i7;
33  final(a,b,c);
34then use c as the hash value.  If you have a variable length array of
354-byte integers to hash, use hashword().  If you have a byte array (like
36a character string), use hashlittle().  If you have several byte arrays, or
37a mix of things, see the comments above hashlittle().
38
39Why is this so big?  I read 12 bytes at a time into 3 4-byte integers,
40then mix those integers.  This is fast (you can do a lot more thorough
41mixing with 12*3 instructions on 3 integers than you can with 3 instructions
42on 1 byte), but shoehorning those bytes into integers efficiently is messy.
43-------------------------------------------------------------------------------
44*/
45/*#define SELF_TEST 1*/
46
47#include "config.h"
48#include "util/storage/lookup3.h"
49#include <stdio.h>      /* defines printf for tests */
50#include <time.h>       /* defines time_t for timings in the test */
51/*#include <stdint.h>     defines uint32_t etc  (from config.h) */
52#include <sys/param.h>  /* attempt to define endianness */
53#ifdef HAVE_SYS_TYPES_H
54# include <sys/types.h> /* attempt to define endianness (solaris) */
55#endif
56#if defined(linux) || defined(__OpenBSD__)
57#  ifdef HAVE_ENDIAN_H
58#    include <endian.h>    /* attempt to define endianness */
59#  else
60#    include <machine/endian.h> /* on older OpenBSD */
61#  endif
62#endif
63#if defined(__FreeBSD__) || defined(__NetBSD__) || defined(__DragonFly__)
64#include <sys/endian.h> /* attempt to define endianness */
65#endif
66
67/* random initial value */
68static uint32_t raninit = (uint32_t)0xdeadbeef;
69
70void
71hash_set_raninit(uint32_t v)
72{
73	raninit = v;
74}
75
76/*
77 * My best guess at if you are big-endian or little-endian.  This may
78 * need adjustment.
79 */
80#if (defined(__BYTE_ORDER) && defined(__LITTLE_ENDIAN) && \
81     __BYTE_ORDER == __LITTLE_ENDIAN) || \
82    (defined(i386) || defined(__i386__) || defined(__i486__) || \
83     defined(__i586__) || defined(__i686__) || defined(vax) || defined(MIPSEL) || defined(__x86))
84# define HASH_LITTLE_ENDIAN 1
85# define HASH_BIG_ENDIAN 0
86#elif (defined(__BYTE_ORDER) && defined(__BIG_ENDIAN) && \
87       __BYTE_ORDER == __BIG_ENDIAN) || \
88      (defined(sparc) || defined(__sparc) || defined(__sparc__) || defined(POWERPC) || defined(mc68000) || defined(sel))
89# define HASH_LITTLE_ENDIAN 0
90# define HASH_BIG_ENDIAN 1
91#elif defined(_MACHINE_ENDIAN_H_)
92/* test for machine_endian_h protects failure if some are empty strings */
93# if defined(_BYTE_ORDER) && defined(_BIG_ENDIAN) && _BYTE_ORDER == _BIG_ENDIAN
94#  define HASH_LITTLE_ENDIAN 0
95#  define HASH_BIG_ENDIAN 1
96# endif
97# if defined(_BYTE_ORDER) && defined(_LITTLE_ENDIAN) && _BYTE_ORDER == _LITTLE_ENDIAN
98#  define HASH_LITTLE_ENDIAN 1
99#  define HASH_BIG_ENDIAN 0
100# endif /* _MACHINE_ENDIAN_H_ */
101#else
102# define HASH_LITTLE_ENDIAN 0
103# define HASH_BIG_ENDIAN 0
104#endif
105
106#define hashsize(n) ((uint32_t)1<<(n))
107#define hashmask(n) (hashsize(n)-1)
108#define rot(x,k) (((x)<<(k)) | ((x)>>(32-(k))))
109
110/*
111-------------------------------------------------------------------------------
112mix -- mix 3 32-bit values reversibly.
113
114This is reversible, so any information in (a,b,c) before mix() is
115still in (a,b,c) after mix().
116
117If four pairs of (a,b,c) inputs are run through mix(), or through
118mix() in reverse, there are at least 32 bits of the output that
119are sometimes the same for one pair and different for another pair.
120This was tested for:
121* pairs that differed by one bit, by two bits, in any combination
122  of top bits of (a,b,c), or in any combination of bottom bits of
123  (a,b,c).
124* "differ" is defined as +, -, ^, or ~^.  For + and -, I transformed
125  the output delta to a Gray code (a^(a>>1)) so a string of 1's (as
126  is commonly produced by subtraction) look like a single 1-bit
127  difference.
128* the base values were pseudorandom, all zero but one bit set, or
129  all zero plus a counter that starts at zero.
130
131Some k values for my "a-=c; a^=rot(c,k); c+=b;" arrangement that
132satisfy this are
133    4  6  8 16 19  4
134    9 15  3 18 27 15
135   14  9  3  7 17  3
136Well, "9 15 3 18 27 15" didn't quite get 32 bits diffing
137for "differ" defined as + with a one-bit base and a two-bit delta.  I
138used http://burtleburtle.net/bob/hash/avalanche.html to choose
139the operations, constants, and arrangements of the variables.
140
141This does not achieve avalanche.  There are input bits of (a,b,c)
142that fail to affect some output bits of (a,b,c), especially of a.  The
143most thoroughly mixed value is c, but it doesn't really even achieve
144avalanche in c.
145
146This allows some parallelism.  Read-after-writes are good at doubling
147the number of bits affected, so the goal of mixing pulls in the opposite
148direction as the goal of parallelism.  I did what I could.  Rotates
149seem to cost as much as shifts on every machine I could lay my hands
150on, and rotates are much kinder to the top and bottom bits, so I used
151rotates.
152-------------------------------------------------------------------------------
153*/
154#define mix(a,b,c) \
155{ \
156  a -= c;  a ^= rot(c, 4);  c += b; \
157  b -= a;  b ^= rot(a, 6);  a += c; \
158  c -= b;  c ^= rot(b, 8);  b += a; \
159  a -= c;  a ^= rot(c,16);  c += b; \
160  b -= a;  b ^= rot(a,19);  a += c; \
161  c -= b;  c ^= rot(b, 4);  b += a; \
162}
163
164/*
165-------------------------------------------------------------------------------
166final -- final mixing of 3 32-bit values (a,b,c) into c
167
168Pairs of (a,b,c) values differing in only a few bits will usually
169produce values of c that look totally different.  This was tested for
170* pairs that differed by one bit, by two bits, in any combination
171  of top bits of (a,b,c), or in any combination of bottom bits of
172  (a,b,c).
173* "differ" is defined as +, -, ^, or ~^.  For + and -, I transformed
174  the output delta to a Gray code (a^(a>>1)) so a string of 1's (as
175  is commonly produced by subtraction) look like a single 1-bit
176  difference.
177* the base values were pseudorandom, all zero but one bit set, or
178  all zero plus a counter that starts at zero.
179
180These constants passed:
181 14 11 25 16 4 14 24
182 12 14 25 16 4 14 24
183and these came close:
184  4  8 15 26 3 22 24
185 10  8 15 26 3 22 24
186 11  8 15 26 3 22 24
187-------------------------------------------------------------------------------
188*/
189#define final(a,b,c) \
190{ \
191  c ^= b; c -= rot(b,14); \
192  a ^= c; a -= rot(c,11); \
193  b ^= a; b -= rot(a,25); \
194  c ^= b; c -= rot(b,16); \
195  a ^= c; a -= rot(c,4);  \
196  b ^= a; b -= rot(a,14); \
197  c ^= b; c -= rot(b,24); \
198}
199
200/*
201--------------------------------------------------------------------
202 This works on all machines.  To be useful, it requires
203 -- that the key be an array of uint32_t's, and
204 -- that the length be the number of uint32_t's in the key
205
206 The function hashword() is identical to hashlittle() on little-endian
207 machines, and identical to hashbig() on big-endian machines,
208 except that the length has to be measured in uint32_ts rather than in
209 bytes.  hashlittle() is more complicated than hashword() only because
210 hashlittle() has to dance around fitting the key bytes into registers.
211--------------------------------------------------------------------
212*/
213uint32_t hashword(
214const uint32_t *k,                   /* the key, an array of uint32_t values */
215size_t          length,               /* the length of the key, in uint32_ts */
216uint32_t        initval)         /* the previous hash, or an arbitrary value */
217{
218  uint32_t a,b,c;
219
220  /* Set up the internal state */
221  a = b = c = raninit + (((uint32_t)length)<<2) + initval;
222
223  /*------------------------------------------------- handle most of the key */
224  while (length > 3)
225  {
226    a += k[0];
227    b += k[1];
228    c += k[2];
229    mix(a,b,c);
230    length -= 3;
231    k += 3;
232  }
233
234  /*------------------------------------------- handle the last 3 uint32_t's */
235  switch(length)                     /* all the case statements fall through */
236  {
237  case 3 : c+=k[2];
238  case 2 : b+=k[1];
239  case 1 : a+=k[0];
240    final(a,b,c);
241  case 0:     /* case 0: nothing left to add */
242    break;
243  }
244  /*------------------------------------------------------ report the result */
245  return c;
246}
247
248
249#ifdef SELF_TEST
250
251/*
252--------------------------------------------------------------------
253hashword2() -- same as hashword(), but take two seeds and return two
25432-bit values.  pc and pb must both be nonnull, and *pc and *pb must
255both be initialized with seeds.  If you pass in (*pb)==0, the output
256(*pc) will be the same as the return value from hashword().
257--------------------------------------------------------------------
258*/
259void hashword2 (
260const uint32_t *k,                   /* the key, an array of uint32_t values */
261size_t          length,               /* the length of the key, in uint32_ts */
262uint32_t       *pc,                      /* IN: seed OUT: primary hash value */
263uint32_t       *pb)               /* IN: more seed OUT: secondary hash value */
264{
265  uint32_t a,b,c;
266
267  /* Set up the internal state */
268  a = b = c = raninit + ((uint32_t)(length<<2)) + *pc;
269  c += *pb;
270
271  /*------------------------------------------------- handle most of the key */
272  while (length > 3)
273  {
274    a += k[0];
275    b += k[1];
276    c += k[2];
277    mix(a,b,c);
278    length -= 3;
279    k += 3;
280  }
281
282  /*------------------------------------------- handle the last 3 uint32_t's */
283  switch(length)                     /* all the case statements fall through */
284  {
285  case 3 : c+=k[2];
286  case 2 : b+=k[1];
287  case 1 : a+=k[0];
288    final(a,b,c);
289  case 0:     /* case 0: nothing left to add */
290    break;
291  }
292  /*------------------------------------------------------ report the result */
293  *pc=c; *pb=b;
294}
295
296#endif /* SELF_TEST */
297
298/*
299-------------------------------------------------------------------------------
300hashlittle() -- hash a variable-length key into a 32-bit value
301  k       : the key (the unaligned variable-length array of bytes)
302  length  : the length of the key, counting by bytes
303  initval : can be any 4-byte value
304Returns a 32-bit value.  Every bit of the key affects every bit of
305the return value.  Two keys differing by one or two bits will have
306totally different hash values.
307
308The best hash table sizes are powers of 2.  There is no need to do
309mod a prime (mod is sooo slow!).  If you need less than 32 bits,
310use a bitmask.  For example, if you need only 10 bits, do
311  h = (h & hashmask(10));
312In which case, the hash table should have hashsize(10) elements.
313
314If you are hashing n strings (uint8_t **)k, do it like this:
315  for (i=0, h=0; i<n; ++i) h = hashlittle( k[i], len[i], h);
316
317By Bob Jenkins, 2006.  bob_jenkins@burtleburtle.net.  You may use this
318code any way you wish, private, educational, or commercial.  It's free.
319
320Use for hash table lookup, or anything where one collision in 2^^32 is
321acceptable.  Do NOT use for cryptographic purposes.
322-------------------------------------------------------------------------------
323*/
324
325uint32_t hashlittle( const void *key, size_t length, uint32_t initval)
326{
327  uint32_t a,b,c;                                          /* internal state */
328  union { const void *ptr; size_t i; } u;     /* needed for Mac Powerbook G4 */
329
330  /* Set up the internal state */
331  a = b = c = raninit + ((uint32_t)length) + initval;
332
333  u.ptr = key;
334  if (HASH_LITTLE_ENDIAN && ((u.i & 0x3) == 0)) {
335    const uint32_t *k = (const uint32_t *)key;         /* read 32-bit chunks */
336#ifdef VALGRIND
337    const uint8_t  *k8;
338#endif
339
340    /*------ all but last block: aligned reads and affect 32 bits of (a,b,c) */
341    while (length > 12)
342    {
343      a += k[0];
344      b += k[1];
345      c += k[2];
346      mix(a,b,c);
347      length -= 12;
348      k += 3;
349    }
350
351    /*----------------------------- handle the last (probably partial) block */
352    /*
353     * "k[2]&0xffffff" actually reads beyond the end of the string, but
354     * then masks off the part it's not allowed to read.  Because the
355     * string is aligned, the masked-off tail is in the same word as the
356     * rest of the string.  Every machine with memory protection I've seen
357     * does it on word boundaries, so is OK with this.  But VALGRIND will
358     * still catch it and complain.  The masking trick does make the hash
359     * noticeably faster for short strings (like English words).
360     */
361#ifndef VALGRIND
362
363    switch(length)
364    {
365    case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
366    case 11: c+=k[2]&0xffffff; b+=k[1]; a+=k[0]; break;
367    case 10: c+=k[2]&0xffff; b+=k[1]; a+=k[0]; break;
368    case 9 : c+=k[2]&0xff; b+=k[1]; a+=k[0]; break;
369    case 8 : b+=k[1]; a+=k[0]; break;
370    case 7 : b+=k[1]&0xffffff; a+=k[0]; break;
371    case 6 : b+=k[1]&0xffff; a+=k[0]; break;
372    case 5 : b+=k[1]&0xff; a+=k[0]; break;
373    case 4 : a+=k[0]; break;
374    case 3 : a+=k[0]&0xffffff; break;
375    case 2 : a+=k[0]&0xffff; break;
376    case 1 : a+=k[0]&0xff; break;
377    case 0 : return c;              /* zero length strings require no mixing */
378    }
379
380#else /* make valgrind happy */
381
382    k8 = (const uint8_t *)k;
383    switch(length)
384    {
385    case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
386    case 11: c+=((uint32_t)k8[10])<<16;  /* fall through */
387    case 10: c+=((uint32_t)k8[9])<<8;    /* fall through */
388    case 9 : c+=k8[8];                   /* fall through */
389    case 8 : b+=k[1]; a+=k[0]; break;
390    case 7 : b+=((uint32_t)k8[6])<<16;   /* fall through */
391    case 6 : b+=((uint32_t)k8[5])<<8;    /* fall through */
392    case 5 : b+=k8[4];                   /* fall through */
393    case 4 : a+=k[0]; break;
394    case 3 : a+=((uint32_t)k8[2])<<16;   /* fall through */
395    case 2 : a+=((uint32_t)k8[1])<<8;    /* fall through */
396    case 1 : a+=k8[0]; break;
397    case 0 : return c;
398    }
399
400#endif /* !valgrind */
401
402  } else if (HASH_LITTLE_ENDIAN && ((u.i & 0x1) == 0)) {
403    const uint16_t *k = (const uint16_t *)key;         /* read 16-bit chunks */
404    const uint8_t  *k8;
405
406    /*--------------- all but last block: aligned reads and different mixing */
407    while (length > 12)
408    {
409      a += k[0] + (((uint32_t)k[1])<<16);
410      b += k[2] + (((uint32_t)k[3])<<16);
411      c += k[4] + (((uint32_t)k[5])<<16);
412      mix(a,b,c);
413      length -= 12;
414      k += 6;
415    }
416
417    /*----------------------------- handle the last (probably partial) block */
418    k8 = (const uint8_t *)k;
419    switch(length)
420    {
421    case 12: c+=k[4]+(((uint32_t)k[5])<<16);
422             b+=k[2]+(((uint32_t)k[3])<<16);
423             a+=k[0]+(((uint32_t)k[1])<<16);
424             break;
425    case 11: c+=((uint32_t)k8[10])<<16;     /* fall through */
426    case 10: c+=k[4];
427             b+=k[2]+(((uint32_t)k[3])<<16);
428             a+=k[0]+(((uint32_t)k[1])<<16);
429             break;
430    case 9 : c+=k8[8];                      /* fall through */
431    case 8 : b+=k[2]+(((uint32_t)k[3])<<16);
432             a+=k[0]+(((uint32_t)k[1])<<16);
433             break;
434    case 7 : b+=((uint32_t)k8[6])<<16;      /* fall through */
435    case 6 : b+=k[2];
436             a+=k[0]+(((uint32_t)k[1])<<16);
437             break;
438    case 5 : b+=k8[4];                      /* fall through */
439    case 4 : a+=k[0]+(((uint32_t)k[1])<<16);
440             break;
441    case 3 : a+=((uint32_t)k8[2])<<16;      /* fall through */
442    case 2 : a+=k[0];
443             break;
444    case 1 : a+=k8[0];
445             break;
446    case 0 : return c;                     /* zero length requires no mixing */
447    }
448
449  } else {                        /* need to read the key one byte at a time */
450    const uint8_t *k = (const uint8_t *)key;
451
452    /*--------------- all but the last block: affect some 32 bits of (a,b,c) */
453    while (length > 12)
454    {
455      a += k[0];
456      a += ((uint32_t)k[1])<<8;
457      a += ((uint32_t)k[2])<<16;
458      a += ((uint32_t)k[3])<<24;
459      b += k[4];
460      b += ((uint32_t)k[5])<<8;
461      b += ((uint32_t)k[6])<<16;
462      b += ((uint32_t)k[7])<<24;
463      c += k[8];
464      c += ((uint32_t)k[9])<<8;
465      c += ((uint32_t)k[10])<<16;
466      c += ((uint32_t)k[11])<<24;
467      mix(a,b,c);
468      length -= 12;
469      k += 12;
470    }
471
472    /*-------------------------------- last block: affect all 32 bits of (c) */
473    switch(length)                   /* all the case statements fall through */
474    {
475    case 12: c+=((uint32_t)k[11])<<24;
476    case 11: c+=((uint32_t)k[10])<<16;
477    case 10: c+=((uint32_t)k[9])<<8;
478    case 9 : c+=k[8];
479    case 8 : b+=((uint32_t)k[7])<<24;
480    case 7 : b+=((uint32_t)k[6])<<16;
481    case 6 : b+=((uint32_t)k[5])<<8;
482    case 5 : b+=k[4];
483    case 4 : a+=((uint32_t)k[3])<<24;
484    case 3 : a+=((uint32_t)k[2])<<16;
485    case 2 : a+=((uint32_t)k[1])<<8;
486    case 1 : a+=k[0];
487             break;
488    case 0 : return c;
489    }
490  }
491
492  final(a,b,c);
493  return c;
494}
495
496#ifdef SELF_TEST
497
498/*
499 * hashlittle2: return 2 32-bit hash values
500 *
501 * This is identical to hashlittle(), except it returns two 32-bit hash
502 * values instead of just one.  This is good enough for hash table
503 * lookup with 2^^64 buckets, or if you want a second hash if you're not
504 * happy with the first, or if you want a probably-unique 64-bit ID for
505 * the key.  *pc is better mixed than *pb, so use *pc first.  If you want
506 * a 64-bit value do something like "*pc + (((uint64_t)*pb)<<32)".
507 */
508void hashlittle2(
509  const void *key,       /* the key to hash */
510  size_t      length,    /* length of the key */
511  uint32_t   *pc,        /* IN: primary initval, OUT: primary hash */
512  uint32_t   *pb)        /* IN: secondary initval, OUT: secondary hash */
513{
514  uint32_t a,b,c;                                          /* internal state */
515  union { const void *ptr; size_t i; } u;     /* needed for Mac Powerbook G4 */
516
517  /* Set up the internal state */
518  a = b = c = raninit + ((uint32_t)length) + *pc;
519  c += *pb;
520
521  u.ptr = key;
522  if (HASH_LITTLE_ENDIAN && ((u.i & 0x3) == 0)) {
523    const uint32_t *k = (const uint32_t *)key;         /* read 32-bit chunks */
524#ifdef VALGRIND
525    const uint8_t  *k8;
526#endif
527
528    /*------ all but last block: aligned reads and affect 32 bits of (a,b,c) */
529    while (length > 12)
530    {
531      a += k[0];
532      b += k[1];
533      c += k[2];
534      mix(a,b,c);
535      length -= 12;
536      k += 3;
537    }
538
539    /*----------------------------- handle the last (probably partial) block */
540    /*
541     * "k[2]&0xffffff" actually reads beyond the end of the string, but
542     * then masks off the part it's not allowed to read.  Because the
543     * string is aligned, the masked-off tail is in the same word as the
544     * rest of the string.  Every machine with memory protection I've seen
545     * does it on word boundaries, so is OK with this.  But VALGRIND will
546     * still catch it and complain.  The masking trick does make the hash
547     * noticeably faster for short strings (like English words).
548     */
549#ifndef VALGRIND
550
551    switch(length)
552    {
553    case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
554    case 11: c+=k[2]&0xffffff; b+=k[1]; a+=k[0]; break;
555    case 10: c+=k[2]&0xffff; b+=k[1]; a+=k[0]; break;
556    case 9 : c+=k[2]&0xff; b+=k[1]; a+=k[0]; break;
557    case 8 : b+=k[1]; a+=k[0]; break;
558    case 7 : b+=k[1]&0xffffff; a+=k[0]; break;
559    case 6 : b+=k[1]&0xffff; a+=k[0]; break;
560    case 5 : b+=k[1]&0xff; a+=k[0]; break;
561    case 4 : a+=k[0]; break;
562    case 3 : a+=k[0]&0xffffff; break;
563    case 2 : a+=k[0]&0xffff; break;
564    case 1 : a+=k[0]&0xff; break;
565    case 0 : *pc=c; *pb=b; return;  /* zero length strings require no mixing */
566    }
567
568#else /* make valgrind happy */
569
570    k8 = (const uint8_t *)k;
571    switch(length)
572    {
573    case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
574    case 11: c+=((uint32_t)k8[10])<<16;  /* fall through */
575    case 10: c+=((uint32_t)k8[9])<<8;    /* fall through */
576    case 9 : c+=k8[8];                   /* fall through */
577    case 8 : b+=k[1]; a+=k[0]; break;
578    case 7 : b+=((uint32_t)k8[6])<<16;   /* fall through */
579    case 6 : b+=((uint32_t)k8[5])<<8;    /* fall through */
580    case 5 : b+=k8[4];                   /* fall through */
581    case 4 : a+=k[0]; break;
582    case 3 : a+=((uint32_t)k8[2])<<16;   /* fall through */
583    case 2 : a+=((uint32_t)k8[1])<<8;    /* fall through */
584    case 1 : a+=k8[0]; break;
585    case 0 : *pc=c; *pb=b; return;  /* zero length strings require no mixing */
586    }
587
588#endif /* !valgrind */
589
590  } else if (HASH_LITTLE_ENDIAN && ((u.i & 0x1) == 0)) {
591    const uint16_t *k = (const uint16_t *)key;         /* read 16-bit chunks */
592    const uint8_t  *k8;
593
594    /*--------------- all but last block: aligned reads and different mixing */
595    while (length > 12)
596    {
597      a += k[0] + (((uint32_t)k[1])<<16);
598      b += k[2] + (((uint32_t)k[3])<<16);
599      c += k[4] + (((uint32_t)k[5])<<16);
600      mix(a,b,c);
601      length -= 12;
602      k += 6;
603    }
604
605    /*----------------------------- handle the last (probably partial) block */
606    k8 = (const uint8_t *)k;
607    switch(length)
608    {
609    case 12: c+=k[4]+(((uint32_t)k[5])<<16);
610             b+=k[2]+(((uint32_t)k[3])<<16);
611             a+=k[0]+(((uint32_t)k[1])<<16);
612             break;
613    case 11: c+=((uint32_t)k8[10])<<16;     /* fall through */
614    case 10: c+=k[4];
615             b+=k[2]+(((uint32_t)k[3])<<16);
616             a+=k[0]+(((uint32_t)k[1])<<16);
617             break;
618    case 9 : c+=k8[8];                      /* fall through */
619    case 8 : b+=k[2]+(((uint32_t)k[3])<<16);
620             a+=k[0]+(((uint32_t)k[1])<<16);
621             break;
622    case 7 : b+=((uint32_t)k8[6])<<16;      /* fall through */
623    case 6 : b+=k[2];
624             a+=k[0]+(((uint32_t)k[1])<<16);
625             break;
626    case 5 : b+=k8[4];                      /* fall through */
627    case 4 : a+=k[0]+(((uint32_t)k[1])<<16);
628             break;
629    case 3 : a+=((uint32_t)k8[2])<<16;      /* fall through */
630    case 2 : a+=k[0];
631             break;
632    case 1 : a+=k8[0];
633             break;
634    case 0 : *pc=c; *pb=b; return;  /* zero length strings require no mixing */
635    }
636
637  } else {                        /* need to read the key one byte at a time */
638    const uint8_t *k = (const uint8_t *)key;
639
640    /*--------------- all but the last block: affect some 32 bits of (a,b,c) */
641    while (length > 12)
642    {
643      a += k[0];
644      a += ((uint32_t)k[1])<<8;
645      a += ((uint32_t)k[2])<<16;
646      a += ((uint32_t)k[3])<<24;
647      b += k[4];
648      b += ((uint32_t)k[5])<<8;
649      b += ((uint32_t)k[6])<<16;
650      b += ((uint32_t)k[7])<<24;
651      c += k[8];
652      c += ((uint32_t)k[9])<<8;
653      c += ((uint32_t)k[10])<<16;
654      c += ((uint32_t)k[11])<<24;
655      mix(a,b,c);
656      length -= 12;
657      k += 12;
658    }
659
660    /*-------------------------------- last block: affect all 32 bits of (c) */
661    switch(length)                   /* all the case statements fall through */
662    {
663    case 12: c+=((uint32_t)k[11])<<24;
664    case 11: c+=((uint32_t)k[10])<<16;
665    case 10: c+=((uint32_t)k[9])<<8;
666    case 9 : c+=k[8];
667    case 8 : b+=((uint32_t)k[7])<<24;
668    case 7 : b+=((uint32_t)k[6])<<16;
669    case 6 : b+=((uint32_t)k[5])<<8;
670    case 5 : b+=k[4];
671    case 4 : a+=((uint32_t)k[3])<<24;
672    case 3 : a+=((uint32_t)k[2])<<16;
673    case 2 : a+=((uint32_t)k[1])<<8;
674    case 1 : a+=k[0];
675             break;
676    case 0 : *pc=c; *pb=b; return;  /* zero length strings require no mixing */
677    }
678  }
679
680  final(a,b,c);
681  *pc=c; *pb=b;
682}
683
684#endif /* SELF_TEST */
685
686#if 0	/* currently not used */
687
688/*
689 * hashbig():
690 * This is the same as hashword() on big-endian machines.  It is different
691 * from hashlittle() on all machines.  hashbig() takes advantage of
692 * big-endian byte ordering.
693 */
694uint32_t hashbig( const void *key, size_t length, uint32_t initval)
695{
696  uint32_t a,b,c;
697  union { const void *ptr; size_t i; } u; /* to cast key to (size_t) happily */
698
699  /* Set up the internal state */
700  a = b = c = raninit + ((uint32_t)length) + initval;
701
702  u.ptr = key;
703  if (HASH_BIG_ENDIAN && ((u.i & 0x3) == 0)) {
704    const uint32_t *k = (const uint32_t *)key;         /* read 32-bit chunks */
705#ifdef VALGRIND
706    const uint8_t  *k8;
707#endif
708
709    /*------ all but last block: aligned reads and affect 32 bits of (a,b,c) */
710    while (length > 12)
711    {
712      a += k[0];
713      b += k[1];
714      c += k[2];
715      mix(a,b,c);
716      length -= 12;
717      k += 3;
718    }
719
720    /*----------------------------- handle the last (probably partial) block */
721    /*
722     * "k[2]<<8" actually reads beyond the end of the string, but
723     * then shifts out the part it's not allowed to read.  Because the
724     * string is aligned, the illegal read is in the same word as the
725     * rest of the string.  Every machine with memory protection I've seen
726     * does it on word boundaries, so is OK with this.  But VALGRIND will
727     * still catch it and complain.  The masking trick does make the hash
728     * noticeably faster for short strings (like English words).
729     */
730#ifndef VALGRIND
731
732    switch(length)
733    {
734    case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
735    case 11: c+=k[2]&0xffffff00; b+=k[1]; a+=k[0]; break;
736    case 10: c+=k[2]&0xffff0000; b+=k[1]; a+=k[0]; break;
737    case 9 : c+=k[2]&0xff000000; b+=k[1]; a+=k[0]; break;
738    case 8 : b+=k[1]; a+=k[0]; break;
739    case 7 : b+=k[1]&0xffffff00; a+=k[0]; break;
740    case 6 : b+=k[1]&0xffff0000; a+=k[0]; break;
741    case 5 : b+=k[1]&0xff000000; a+=k[0]; break;
742    case 4 : a+=k[0]; break;
743    case 3 : a+=k[0]&0xffffff00; break;
744    case 2 : a+=k[0]&0xffff0000; break;
745    case 1 : a+=k[0]&0xff000000; break;
746    case 0 : return c;              /* zero length strings require no mixing */
747    }
748
749#else  /* make valgrind happy */
750
751    k8 = (const uint8_t *)k;
752    switch(length)                   /* all the case statements fall through */
753    {
754    case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
755    case 11: c+=((uint32_t)k8[10])<<8;  /* fall through */
756    case 10: c+=((uint32_t)k8[9])<<16;  /* fall through */
757    case 9 : c+=((uint32_t)k8[8])<<24;  /* fall through */
758    case 8 : b+=k[1]; a+=k[0]; break;
759    case 7 : b+=((uint32_t)k8[6])<<8;   /* fall through */
760    case 6 : b+=((uint32_t)k8[5])<<16;  /* fall through */
761    case 5 : b+=((uint32_t)k8[4])<<24;  /* fall through */
762    case 4 : a+=k[0]; break;
763    case 3 : a+=((uint32_t)k8[2])<<8;   /* fall through */
764    case 2 : a+=((uint32_t)k8[1])<<16;  /* fall through */
765    case 1 : a+=((uint32_t)k8[0])<<24; break;
766    case 0 : return c;
767    }
768
769#endif /* !VALGRIND */
770
771  } else {                        /* need to read the key one byte at a time */
772    const uint8_t *k = (const uint8_t *)key;
773
774    /*--------------- all but the last block: affect some 32 bits of (a,b,c) */
775    while (length > 12)
776    {
777      a += ((uint32_t)k[0])<<24;
778      a += ((uint32_t)k[1])<<16;
779      a += ((uint32_t)k[2])<<8;
780      a += ((uint32_t)k[3]);
781      b += ((uint32_t)k[4])<<24;
782      b += ((uint32_t)k[5])<<16;
783      b += ((uint32_t)k[6])<<8;
784      b += ((uint32_t)k[7]);
785      c += ((uint32_t)k[8])<<24;
786      c += ((uint32_t)k[9])<<16;
787      c += ((uint32_t)k[10])<<8;
788      c += ((uint32_t)k[11]);
789      mix(a,b,c);
790      length -= 12;
791      k += 12;
792    }
793
794    /*-------------------------------- last block: affect all 32 bits of (c) */
795    switch(length)                   /* all the case statements fall through */
796    {
797    case 12: c+=k[11];
798    case 11: c+=((uint32_t)k[10])<<8;
799    case 10: c+=((uint32_t)k[9])<<16;
800    case 9 : c+=((uint32_t)k[8])<<24;
801    case 8 : b+=k[7];
802    case 7 : b+=((uint32_t)k[6])<<8;
803    case 6 : b+=((uint32_t)k[5])<<16;
804    case 5 : b+=((uint32_t)k[4])<<24;
805    case 4 : a+=k[3];
806    case 3 : a+=((uint32_t)k[2])<<8;
807    case 2 : a+=((uint32_t)k[1])<<16;
808    case 1 : a+=((uint32_t)k[0])<<24;
809             break;
810    case 0 : return c;
811    }
812  }
813
814  final(a,b,c);
815  return c;
816}
817
818#endif /* 0 == currently not used */
819
820#ifdef SELF_TEST
821
822/* used for timings */
823void driver1(void)
824{
825  uint8_t buf[256];
826  uint32_t i;
827  uint32_t h=0;
828  time_t a,z;
829
830  time(&a);
831  for (i=0; i<256; ++i) buf[i] = 'x';
832  for (i=0; i<1; ++i)
833  {
834    h = hashlittle(&buf[0],1,h);
835  }
836  time(&z);
837  if (z-a > 0) printf("time %d %.8x\n", z-a, h);
838}
839
840/* check that every input bit changes every output bit half the time */
841#define HASHSTATE 1
842#define HASHLEN   1
843#define MAXPAIR 60
844#define MAXLEN  70
845void driver2(void)
846{
847  uint8_t qa[MAXLEN+1], qb[MAXLEN+2], *a = &qa[0], *b = &qb[1];
848  uint32_t c[HASHSTATE], d[HASHSTATE], i=0, j=0, k, l, m=0, z;
849  uint32_t e[HASHSTATE],f[HASHSTATE],g[HASHSTATE],h[HASHSTATE];
850  uint32_t x[HASHSTATE],y[HASHSTATE];
851  uint32_t hlen;
852
853  printf("No more than %d trials should ever be needed \n",MAXPAIR/2);
854  for (hlen=0; hlen < MAXLEN; ++hlen)
855  {
856    z=0;
857    for (i=0; i<hlen; ++i)  /*----------------------- for each input byte, */
858    {
859      for (j=0; j<8; ++j)   /*------------------------ for each input bit, */
860      {
861	for (m=1; m<8; ++m) /*------------ for several possible initvals, */
862	{
863	  for (l=0; l<HASHSTATE; ++l)
864	    e[l]=f[l]=g[l]=h[l]=x[l]=y[l]=~((uint32_t)0);
865
866      	  /*---- check that every output bit is affected by that input bit */
867	  for (k=0; k<MAXPAIR; k+=2)
868	  {
869	    uint32_t finished=1;
870	    /* keys have one bit different */
871	    for (l=0; l<hlen+1; ++l) {a[l] = b[l] = (uint8_t)0;}
872	    /* have a and b be two keys differing in only one bit */
873	    a[i] ^= (k<<j);
874	    a[i] ^= (k>>(8-j));
875	     c[0] = hashlittle(a, hlen, m);
876	    b[i] ^= ((k+1)<<j);
877	    b[i] ^= ((k+1)>>(8-j));
878	     d[0] = hashlittle(b, hlen, m);
879	    /* check every bit is 1, 0, set, and not set at least once */
880	    for (l=0; l<HASHSTATE; ++l)
881	    {
882	      e[l] &= (c[l]^d[l]);
883	      f[l] &= ~(c[l]^d[l]);
884	      g[l] &= c[l];
885	      h[l] &= ~c[l];
886	      x[l] &= d[l];
887	      y[l] &= ~d[l];
888	      if (e[l]|f[l]|g[l]|h[l]|x[l]|y[l]) finished=0;
889	    }
890	    if (finished) break;
891	  }
892	  if (k>z) z=k;
893	  if (k==MAXPAIR)
894	  {
895	     printf("Some bit didn't change: ");
896	     printf("%.8x %.8x %.8x %.8x %.8x %.8x  ",
897	            e[0],f[0],g[0],h[0],x[0],y[0]);
898	     printf("i %d j %d m %d len %d\n", i, j, m, hlen);
899	  }
900	  if (z==MAXPAIR) goto done;
901	}
902      }
903    }
904   done:
905    if (z < MAXPAIR)
906    {
907      printf("Mix success  %2d bytes  %2d initvals  ",i,m);
908      printf("required  %d  trials\n", z/2);
909    }
910  }
911  printf("\n");
912}
913
914/* Check for reading beyond the end of the buffer and alignment problems */
915void driver3(void)
916{
917  uint8_t buf[MAXLEN+20], *b;
918  uint32_t len;
919  uint8_t q[] = "This is the time for all good men to come to the aid of their country...";
920  uint32_t h;
921  uint8_t qq[] = "xThis is the time for all good men to come to the aid of their country...";
922  uint32_t i;
923  uint8_t qqq[] = "xxThis is the time for all good men to come to the aid of their country...";
924  uint32_t j;
925  uint8_t qqqq[] = "xxxThis is the time for all good men to come to the aid of their country...";
926  uint32_t ref,x,y;
927  uint8_t *p;
928
929  printf("Endianness.  These lines should all be the same (for values filled in):\n");
930  printf("%.8x                            %.8x                            %.8x\n",
931         hashword((const uint32_t *)q, (sizeof(q)-1)/4, 13),
932         hashword((const uint32_t *)q, (sizeof(q)-5)/4, 13),
933         hashword((const uint32_t *)q, (sizeof(q)-9)/4, 13));
934  p = q;
935  printf("%.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x\n",
936         hashlittle(p, sizeof(q)-1, 13), hashlittle(p, sizeof(q)-2, 13),
937         hashlittle(p, sizeof(q)-3, 13), hashlittle(p, sizeof(q)-4, 13),
938         hashlittle(p, sizeof(q)-5, 13), hashlittle(p, sizeof(q)-6, 13),
939         hashlittle(p, sizeof(q)-7, 13), hashlittle(p, sizeof(q)-8, 13),
940         hashlittle(p, sizeof(q)-9, 13), hashlittle(p, sizeof(q)-10, 13),
941         hashlittle(p, sizeof(q)-11, 13), hashlittle(p, sizeof(q)-12, 13));
942  p = &qq[1];
943  printf("%.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x\n",
944         hashlittle(p, sizeof(q)-1, 13), hashlittle(p, sizeof(q)-2, 13),
945         hashlittle(p, sizeof(q)-3, 13), hashlittle(p, sizeof(q)-4, 13),
946         hashlittle(p, sizeof(q)-5, 13), hashlittle(p, sizeof(q)-6, 13),
947         hashlittle(p, sizeof(q)-7, 13), hashlittle(p, sizeof(q)-8, 13),
948         hashlittle(p, sizeof(q)-9, 13), hashlittle(p, sizeof(q)-10, 13),
949         hashlittle(p, sizeof(q)-11, 13), hashlittle(p, sizeof(q)-12, 13));
950  p = &qqq[2];
951  printf("%.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x\n",
952         hashlittle(p, sizeof(q)-1, 13), hashlittle(p, sizeof(q)-2, 13),
953         hashlittle(p, sizeof(q)-3, 13), hashlittle(p, sizeof(q)-4, 13),
954         hashlittle(p, sizeof(q)-5, 13), hashlittle(p, sizeof(q)-6, 13),
955         hashlittle(p, sizeof(q)-7, 13), hashlittle(p, sizeof(q)-8, 13),
956         hashlittle(p, sizeof(q)-9, 13), hashlittle(p, sizeof(q)-10, 13),
957         hashlittle(p, sizeof(q)-11, 13), hashlittle(p, sizeof(q)-12, 13));
958  p = &qqqq[3];
959  printf("%.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x\n",
960         hashlittle(p, sizeof(q)-1, 13), hashlittle(p, sizeof(q)-2, 13),
961         hashlittle(p, sizeof(q)-3, 13), hashlittle(p, sizeof(q)-4, 13),
962         hashlittle(p, sizeof(q)-5, 13), hashlittle(p, sizeof(q)-6, 13),
963         hashlittle(p, sizeof(q)-7, 13), hashlittle(p, sizeof(q)-8, 13),
964         hashlittle(p, sizeof(q)-9, 13), hashlittle(p, sizeof(q)-10, 13),
965         hashlittle(p, sizeof(q)-11, 13), hashlittle(p, sizeof(q)-12, 13));
966  printf("\n");
967
968  /* check that hashlittle2 and hashlittle produce the same results */
969  i=47; j=0;
970  hashlittle2(q, sizeof(q), &i, &j);
971  if (hashlittle(q, sizeof(q), 47) != i)
972    printf("hashlittle2 and hashlittle mismatch\n");
973
974  /* check that hashword2 and hashword produce the same results */
975  len = raninit;
976  i=47, j=0;
977  hashword2(&len, 1, &i, &j);
978  if (hashword(&len, 1, 47) != i)
979    printf("hashword2 and hashword mismatch %x %x\n",
980	   i, hashword(&len, 1, 47));
981
982  /* check hashlittle doesn't read before or after the ends of the string */
983  for (h=0, b=buf+1; h<8; ++h, ++b)
984  {
985    for (i=0; i<MAXLEN; ++i)
986    {
987      len = i;
988      for (j=0; j<i; ++j) *(b+j)=0;
989
990      /* these should all be equal */
991      ref = hashlittle(b, len, (uint32_t)1);
992      *(b+i)=(uint8_t)~0;
993      *(b-1)=(uint8_t)~0;
994      x = hashlittle(b, len, (uint32_t)1);
995      y = hashlittle(b, len, (uint32_t)1);
996      if ((ref != x) || (ref != y))
997      {
998	printf("alignment error: %.8x %.8x %.8x %d %d\n",ref,x,y,
999               h, i);
1000      }
1001    }
1002  }
1003}
1004
1005/* check for problems with nulls */
1006 void driver4(void)
1007{
1008  uint8_t buf[1];
1009  uint32_t h,i,state[HASHSTATE];
1010
1011
1012  buf[0] = ~0;
1013  for (i=0; i<HASHSTATE; ++i) state[i] = 1;
1014  printf("These should all be different\n");
1015  for (i=0, h=0; i<8; ++i)
1016  {
1017    h = hashlittle(buf, 0, h);
1018    printf("%2ld  0-byte strings, hash is  %.8x\n", i, h);
1019  }
1020}
1021
1022
1023int main(void)
1024{
1025  driver1();   /* test that the key is hashed: used for timings */
1026  driver2();   /* test that whole key is hashed thoroughly */
1027  driver3();   /* test that nothing but the key is hashed */
1028  driver4();   /* test hashing multiple buffers (all buffers are null) */
1029  return 1;
1030}
1031
1032#endif  /* SELF_TEST */
1033