hash.c revision 219820
1114402Sru/* 2151497Sru * Copyright (c) 2005 Voltaire Inc. All rights reserved. 3114402Sru * 4114402Sru * This software is available to you under a choice of one of two 5114402Sru * licenses. You may choose to be licensed under the terms of the GNU 6114402Sru * General Public License (GPL) Version 2, available from the file 7114402Sru * COPYING in the main directory of this source tree, or the 8114402Sru * OpenIB.org BSD license below: 9114402Sru * 10114402Sru * Redistribution and use in source and binary forms, with or 11114402Sru * without modification, are permitted provided that the following 12114402Sru * conditions are met: 13114402Sru * 14114402Sru * - Redistributions of source code must retain the above 15114402Sru * copyright notice, this list of conditions and the following 16114402Sru * disclaimer. 17114402Sru * 18114402Sru * - Redistributions in binary form must reproduce the above 19151497Sru * copyright notice, this list of conditions and the following 20114402Sru * disclaimer in the documentation and/or other materials 21114402Sru * provided with the distribution. 22114402Sru * 23114402Sru * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 24114402Sru * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 25114402Sru * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 26114402Sru * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 27114402Sru * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 28114402Sru * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 29114402Sru * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 30151497Sru * SOFTWARE. 31114402Sru * 32151497Sru */ 33151497Sru 34114402Sru/* 35114402Sru * By Bob Jenkins, 1996. bob_jenkins@burtleburtle.net. You may use this 36114402Sru * code any way you wish, private, educational, or commercial. It's free. 37114402Sru * 38114402Sru * See http://burtleburtle.net/bob/hash/evahash.html 39114402Sru * Use for hash table lookup, or anything where one collision in 2^^32 is 40114402Sru * acceptable. Do NOT use for cryptographic purposes. 41114402Sru */ 42151497Sru 43114402Sru#include <common.h> 44114402Sru 45151497Sru#define hashsize(n) ((uint32)1<<(n)) 46114402Sru#define hashmask(n) (hashsize(n)-1) 47114402Sru 48151497Sru 49151497Sru/* 50151497Sru-------------------------------------------------------------------- 51114402Srumix -- mix 3 32-bit values reversibly. 52151497SruFor every delta with one or two bits set, and the deltas of all three 53151497Sru high bits or all three low bits, whether the original value of a,b,c 54114402Sru is almost all zero or is uniformly distributed, 55151497Sru* If mix() is run forward or backward, at least 32 bits in a,b,c 56151497Sru have at least 1/4 probability of changing. 57151497Sru* If mix() is run forward, every bit of c will change between 1/3 and 58151497Sru 2/3 of the time. (Well, 22/100 and 78/100 for some 2-bit deltas.) 59151497Srumix() was built out of 36 single-cycle latency instructions in a 60151497Sru structure that could supported 2x parallelism, like so: 61151497Sru a -= b; 62151497Sru a -= c; x = (c>>13); 63114402Sru b -= c; a ^= x; 64114402Sru b -= a; x = (a<<8); 65114402Sru c -= a; b ^= x; 66151497Sru c -= b; x = (b>>13); 67114402Sru ... 68114402Sru Unfortunately, superscalar Pentiums and Sparcs can't take advantage 69114402Sru of that parallelism. They've also turned some of those single-cycle 70114402Sru latency instructions into multi-cycle latency instructions. Still, 71114402Sru this is the fastest good hash I could find. There were about 2^^68 72114402Sru to choose from. I only looked at a billion or so. 73114402Sru-------------------------------------------------------------------- 74151497Sru*/ 75151497Sru#define mix(a,b,c) \ 76151497Sru{ \ 77114402Sru a -= b; a -= c; a ^= (c>>13); \ 78151497Sru b -= c; b -= a; b ^= (a<<8); \ 79114402Sru c -= a; c -= b; c ^= (b>>13); \ 80151497Sru a -= b; a -= c; a ^= (c>>12); \ 81114402Sru b -= c; b -= a; b ^= (a<<16); \ 82114402Sru c -= a; c -= b; c ^= (b>>5); \ 83151497Sru a -= b; a -= c; a ^= (c>>3); \ 84114402Sru b -= c; b -= a; b ^= (a<<10); \ 85114402Sru c -= a; c -= b; c ^= (b>>15); \ 86114402Sru} 87114402Sru 88151497Sru/* 89151497Sru-------------------------------------------------------------------- 90151497Srufhash() -- hash a variable-length key into a 32-bit value 91151497Sru k : the key (the unaligned variable-length array of bytes) 92151497Sru len : the length of the key, counting by bytes 93114402Sru initval : can be any 4-byte value 94151497SruReturns a 32-bit value. Every bit of the key affects every bit of 95151497Sruthe return value. Every 1-bit and 2-bit delta achieves avalanche. 96114402SruAbout 6*len+35 instructions. 97151497Sru 98151497SruThe best hash table sizes are powers of 2. There is no need to do 99114402Srumod a prime (mod is sooo slow!). If you need less than 32 bits, 100114402Sruuse a bitmask. For example, if you need only 10 bits, do 101151497Sru h = (h & hashmask(10)); 102151497SruIn which case, the hash table should have hashsize(10) elements. 103151497Sru 104151497SruIf you are hashing n strings (uint8 **)k, do it like this: 105114402Sru for (i=0, h=0; i<n; ++i) h = hash( k[i], len[i], h); 106151497Sru 107114402Sru-------------------------------------------------------------------- 108114402Sru*/ 109114402Sru 110114402Sruuint32_t 111114402Srufhash(uint8_t *k, int length, uint32_t initval) 112151497Sru{ 113151497Sru uint32_t a, b, c, len; 114114402Sru 115151497Sru /* Set up the internal state */ 116114402Sru len = length; 117151497Sru a = b = 0x9e3779b9; /* the golden ratio; an arbitrary value */ 118114402Sru c = initval; /* the previous hash value */ 119151497Sru 120151497Sru /* handle most of the key */ 121151497Sru while (len >= 12) { 122151497Sru a += (k[0] + ((uint32_t)k[1]<<8) + 123151497Sru ((uint32_t)k[2]<<16) + ((uint32_t)k[3]<<24)); 124114402Sru b += (k[4] + ((uint32_t)k[5]<<8) + ((uint32_t)k[6]<<16) + 125114402Sru ((uint32_t)k[7]<<24)); 126151497Sru c += (k[8] + ((uint32_t)k[9]<<8) + ((uint32_t)k[10]<<16) + 127151497Sru ((uint32_t)k[11]<<24)); 128114402Sru mix(a, b, c); 129151497Sru k += 12; len -= 12; 130151497Sru } 131151497Sru 132151497Sru /* handle the last 11 bytes */ 133151497Sru c += length; 134151497Sru switch (len) { /* all the case statements fall through */ 135114402Sru case 11: c += ((uint32_t)k[10]<<24); 136114402Sru case 10: c += ((uint32_t)k[9]<<16); 137114402Sru case 9 : c += ((uint32_t)k[8]<<8); 138114402Sru /* the first byte of c is reserved for the length */ 139114402Sru case 8 : b += ((uint32_t)k[7]<<24); 140114402Sru case 7 : b += ((uint32_t)k[6]<<16); 141114402Sru case 6 : b += ((uint32_t)k[5]<<8); 142114402Sru case 5 : b += k[4]; 143114402Sru case 4 : a += ((uint32_t)k[3]<<24); 144114402Sru case 3 : a += ((uint32_t)k[2]<<16); 145114402Sru case 2 : a += ((uint32_t)k[1]<<8); 146114402Sru case 1 : a += k[0]; 147114402Sru /* case 0: nothing left to add */ 148151497Sru } 149114402Sru 150114402Sru mix(a, b, c); 151114402Sru 152114402Sru return c; 153114402Sru} 154114402Sru