xxhash.c revision 263032
1/* 2xxHash - Fast Hash algorithm 3Copyright (C) 2012-2013, Yann Collet. 4BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) 5 6Redistribution and use in source and binary forms, with or without 7modification, are permitted provided that the following conditions are 8met: 9 10* Redistributions of source code must retain the above copyright 11notice, this list of conditions and the following disclaimer. 12* Redistributions in binary form must reproduce the above 13copyright notice, this list of conditions and the following disclaimer 14in the documentation and/or other materials provided with the 15distribution. 16 17THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 18"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 19LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 20A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 21OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 22SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 23LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 24DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 25THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 29You can contact the author at : 30- xxHash source repository : http://code.google.com/p/xxhash/ 31*/ 32 33 34//************************************** 35// Tuning parameters 36//************************************** 37// Unaligned memory access is automatically enabled for "common" CPU, such as x86. 38// For others CPU, the compiler will be more cautious, and insert extra code to ensure aligned access is respected. 39// If you know your target CPU supports unaligned memory access, you want to force this option manually to improve performance. 40// You can also enable this parameter if you know your input data will always be aligned (boundaries of 4, for U32). 41#if defined(__ARM_FEATURE_UNALIGNED) || defined(__i386) || defined(_M_IX86) || defined(__x86_64__) || defined(_M_X64) 42# define XXH_USE_UNALIGNED_ACCESS 1 43#endif 44 45// XXH_ACCEPT_NULL_INPUT_POINTER : 46// If the input pointer is a null pointer, xxHash default behavior is to trigger a memory access error, since it is a bad pointer. 47// When this option is enabled, xxHash output for null input pointers will be the same as a null-length input. 48// This option has a very small performance cost (only measurable on small inputs). 49// By default, this option is disabled. To enable it, uncomment below define : 50//#define XXH_ACCEPT_NULL_INPUT_POINTER 1 51 52// XXH_FORCE_NATIVE_FORMAT : 53// By default, xxHash library provides endian-independant Hash values, based on little-endian convention. 54// Results are therefore identical for little-endian and big-endian CPU. 55// This comes at a performance cost for big-endian CPU, since some swapping is required to emulate little-endian format. 56// Should endian-independance be of no importance for your application, you may set the #define below to 1. 57// It will improve speed for Big-endian CPU. 58// This option has no impact on Little_Endian CPU. 59#define XXH_FORCE_NATIVE_FORMAT 0 60 61 62//************************************** 63// Compiler Specific Options 64//************************************** 65// Disable some Visual warning messages 66#ifdef _MSC_VER // Visual Studio 67# pragma warning(disable : 4127) // disable: C4127: conditional expression is constant 68#endif 69 70#ifdef _MSC_VER // Visual Studio 71# define forceinline static __forceinline 72#else 73# ifdef __GNUC__ 74# define forceinline static inline __attribute__((always_inline)) 75# else 76# define forceinline static inline 77# endif 78#endif 79 80 81//************************************** 82// Includes & Memory related functions 83//************************************** 84#include "xxhash.h" 85// Modify the local functions below should you wish to use some other memory related routines 86// for malloc(), free() 87#include <stdlib.h> 88forceinline void* XXH_malloc(size_t s) { return malloc(s); } 89forceinline void XXH_free (void* p) { free(p); } 90// for memcpy() 91#include <string.h> 92forceinline void* XXH_memcpy(void* dest, const void* src, size_t size) { return memcpy(dest,src,size); } 93 94 95//************************************** 96// Basic Types 97//************************************** 98#if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L // C99 99# include <stdint.h> 100 typedef uint8_t BYTE; 101 typedef uint16_t U16; 102 typedef uint32_t U32; 103 typedef int32_t S32; 104 typedef uint64_t U64; 105#else 106 typedef unsigned char BYTE; 107 typedef unsigned short U16; 108 typedef unsigned int U32; 109 typedef signed int S32; 110 typedef unsigned long long U64; 111#endif 112 113#if defined(__GNUC__) && !defined(XXH_USE_UNALIGNED_ACCESS) 114# define _PACKED __attribute__ ((packed)) 115#else 116# define _PACKED 117#endif 118 119#if !defined(XXH_USE_UNALIGNED_ACCESS) && !defined(__GNUC__) 120# ifdef __IBMC__ 121# pragma pack(1) 122# else 123# pragma pack(push, 1) 124# endif 125#endif 126 127typedef struct _U32_S { U32 v; } _PACKED U32_S; 128 129#if !defined(XXH_USE_UNALIGNED_ACCESS) && !defined(__GNUC__) 130# pragma pack(pop) 131#endif 132 133#define A32(x) (((U32_S *)(x))->v) 134 135 136//*************************************** 137// Compiler-specific Functions and Macros 138//*************************************** 139#define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__) 140 141// Note : although _rotl exists for minGW (GCC under windows), performance seems poor 142#if defined(_MSC_VER) 143# define XXH_rotl32(x,r) _rotl(x,r) 144#else 145# define XXH_rotl32(x,r) ((x << r) | (x >> (32 - r))) 146#endif 147 148#if defined(_MSC_VER) // Visual Studio 149# define XXH_swap32 _byteswap_ulong 150#elif GCC_VERSION >= 403 151# define XXH_swap32 __builtin_bswap32 152#else 153static inline U32 XXH_swap32 (U32 x) { 154 return ((x << 24) & 0xff000000 ) | 155 ((x << 8) & 0x00ff0000 ) | 156 ((x >> 8) & 0x0000ff00 ) | 157 ((x >> 24) & 0x000000ff );} 158#endif 159 160 161//************************************** 162// Constants 163//************************************** 164#define PRIME32_1 2654435761U 165#define PRIME32_2 2246822519U 166#define PRIME32_3 3266489917U 167#define PRIME32_4 668265263U 168#define PRIME32_5 374761393U 169 170 171//************************************** 172// Architecture Macros 173//************************************** 174typedef enum { XXH_bigEndian=0, XXH_littleEndian=1 } XXH_endianess; 175#ifndef XXH_CPU_LITTLE_ENDIAN // It is possible to define XXH_CPU_LITTLE_ENDIAN externally, for example using a compiler switch 176 static const int one = 1; 177# define XXH_CPU_LITTLE_ENDIAN (*(char*)(&one)) 178#endif 179 180 181//************************************** 182// Macros 183//************************************** 184#define XXH_STATIC_ASSERT(c) { enum { XXH_static_assert = 1/(!!(c)) }; } // use only *after* variable declarations 185 186 187//**************************** 188// Memory reads 189//**************************** 190typedef enum { XXH_aligned, XXH_unaligned } XXH_alignment; 191 192forceinline U32 XXH_readLE32_align(const U32* ptr, XXH_endianess endian, XXH_alignment align) 193{ 194 if (align==XXH_unaligned) 195 return endian==XXH_littleEndian ? A32(ptr) : XXH_swap32(A32(ptr)); 196 else 197 return endian==XXH_littleEndian ? *ptr : XXH_swap32(*ptr); 198} 199 200forceinline U32 XXH_readLE32(const U32* ptr, XXH_endianess endian) { return XXH_readLE32_align(ptr, endian, XXH_unaligned); } 201 202 203//**************************** 204// Simple Hash Functions 205//**************************** 206forceinline U32 XXH32_endian_align(const void* input, int len, U32 seed, XXH_endianess endian, XXH_alignment align) 207{ 208 const BYTE* p = (const BYTE*)input; 209 const BYTE* const bEnd = p + len; 210 U32 h32; 211 212#ifdef XXH_ACCEPT_NULL_INPUT_POINTER 213 if (p==NULL) { len=0; p=(const BYTE*)(size_t)16; } 214#endif 215 216 if (len>=16) 217 { 218 const BYTE* const limit = bEnd - 16; 219 U32 v1 = seed + PRIME32_1 + PRIME32_2; 220 U32 v2 = seed + PRIME32_2; 221 U32 v3 = seed + 0; 222 U32 v4 = seed - PRIME32_1; 223 224 do 225 { 226 v1 += XXH_readLE32_align((const U32*)p, endian, align) * PRIME32_2; v1 = XXH_rotl32(v1, 13); v1 *= PRIME32_1; p+=4; 227 v2 += XXH_readLE32_align((const U32*)p, endian, align) * PRIME32_2; v2 = XXH_rotl32(v2, 13); v2 *= PRIME32_1; p+=4; 228 v3 += XXH_readLE32_align((const U32*)p, endian, align) * PRIME32_2; v3 = XXH_rotl32(v3, 13); v3 *= PRIME32_1; p+=4; 229 v4 += XXH_readLE32_align((const U32*)p, endian, align) * PRIME32_2; v4 = XXH_rotl32(v4, 13); v4 *= PRIME32_1; p+=4; 230 } while (p<=limit); 231 232 h32 = XXH_rotl32(v1, 1) + XXH_rotl32(v2, 7) + XXH_rotl32(v3, 12) + XXH_rotl32(v4, 18); 233 } 234 else 235 { 236 h32 = seed + PRIME32_5; 237 } 238 239 h32 += (U32) len; 240 241 while (p<=bEnd-4) 242 { 243 h32 += XXH_readLE32_align((const U32*)p, endian, align) * PRIME32_3; 244 h32 = XXH_rotl32(h32, 17) * PRIME32_4 ; 245 p+=4; 246 } 247 248 while (p<bEnd) 249 { 250 h32 += (*p) * PRIME32_5; 251 h32 = XXH_rotl32(h32, 11) * PRIME32_1 ; 252 p++; 253 } 254 255 h32 ^= h32 >> 15; 256 h32 *= PRIME32_2; 257 h32 ^= h32 >> 13; 258 h32 *= PRIME32_3; 259 h32 ^= h32 >> 16; 260 261 return h32; 262} 263 264 265U32 XXH32(const void* input, int len, U32 seed) 266{ 267#if 0 268 // Simple version, good for code maintenance, but unfortunately slow for small inputs 269 void* state = XXH32_init(seed); 270 XXH32_update(state, input, len); 271 return XXH32_digest(state); 272#else 273 XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN; 274 275# if !defined(XXH_USE_UNALIGNED_ACCESS) 276 if (!(((size_t)input) & 3)) // Input is aligned, let's leverage the speed advantage 277 { 278 if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) 279 return XXH32_endian_align(input, len, seed, XXH_littleEndian, XXH_aligned); 280 else 281 return XXH32_endian_align(input, len, seed, XXH_bigEndian, XXH_aligned); 282 } 283# endif 284 285 if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) 286 return XXH32_endian_align(input, len, seed, XXH_littleEndian, XXH_unaligned); 287 else 288 return XXH32_endian_align(input, len, seed, XXH_bigEndian, XXH_unaligned); 289#endif 290} 291 292 293//**************************** 294// Advanced Hash Functions 295//**************************** 296 297struct XXH_state32_t 298{ 299 U64 total_len; 300 U32 seed; 301 U32 v1; 302 U32 v2; 303 U32 v3; 304 U32 v4; 305 int memsize; 306 char memory[16]; 307}; 308 309 310int XXH32_sizeofState(void) 311{ 312 XXH_STATIC_ASSERT(XXH32_SIZEOFSTATE >= sizeof(struct XXH_state32_t)); // A compilation error here means XXH32_SIZEOFSTATE is not large enough 313 return sizeof(struct XXH_state32_t); 314} 315 316 317XXH_errorcode XXH32_resetState(void* state_in, U32 seed) 318{ 319 struct XXH_state32_t * state = (struct XXH_state32_t *) state_in; 320 state->seed = seed; 321 state->v1 = seed + PRIME32_1 + PRIME32_2; 322 state->v2 = seed + PRIME32_2; 323 state->v3 = seed + 0; 324 state->v4 = seed - PRIME32_1; 325 state->total_len = 0; 326 state->memsize = 0; 327 return XXH_OK; 328} 329 330 331void* XXH32_init (U32 seed) 332{ 333 void* state = XXH_malloc (sizeof(struct XXH_state32_t)); 334 XXH32_resetState(state, seed); 335 return state; 336} 337 338 339forceinline XXH_errorcode XXH32_update_endian (void* state_in, const void* input, int len, XXH_endianess endian) 340{ 341 struct XXH_state32_t * state = (struct XXH_state32_t *) state_in; 342 const BYTE* p = (const BYTE*)input; 343 const BYTE* const bEnd = p + len; 344 345#ifdef XXH_ACCEPT_NULL_INPUT_POINTER 346 if (input==NULL) return XXH_ERROR; 347#endif 348 349 state->total_len += len; 350 351 if (state->memsize + len < 16) // fill in tmp buffer 352 { 353 XXH_memcpy(state->memory + state->memsize, input, len); 354 state->memsize += len; 355 return XXH_OK; 356 } 357 358 if (state->memsize) // some data left from previous update 359 { 360 XXH_memcpy(state->memory + state->memsize, input, 16-state->memsize); 361 { 362 const U32* p32 = (const U32*)state->memory; 363 state->v1 += XXH_readLE32(p32, endian) * PRIME32_2; state->v1 = XXH_rotl32(state->v1, 13); state->v1 *= PRIME32_1; p32++; 364 state->v2 += XXH_readLE32(p32, endian) * PRIME32_2; state->v2 = XXH_rotl32(state->v2, 13); state->v2 *= PRIME32_1; p32++; 365 state->v3 += XXH_readLE32(p32, endian) * PRIME32_2; state->v3 = XXH_rotl32(state->v3, 13); state->v3 *= PRIME32_1; p32++; 366 state->v4 += XXH_readLE32(p32, endian) * PRIME32_2; state->v4 = XXH_rotl32(state->v4, 13); state->v4 *= PRIME32_1; p32++; 367 } 368 p += 16-state->memsize; 369 state->memsize = 0; 370 } 371 372 if (p <= bEnd-16) 373 { 374 const BYTE* const limit = bEnd - 16; 375 U32 v1 = state->v1; 376 U32 v2 = state->v2; 377 U32 v3 = state->v3; 378 U32 v4 = state->v4; 379 380 do 381 { 382 v1 += XXH_readLE32((const U32*)p, endian) * PRIME32_2; v1 = XXH_rotl32(v1, 13); v1 *= PRIME32_1; p+=4; 383 v2 += XXH_readLE32((const U32*)p, endian) * PRIME32_2; v2 = XXH_rotl32(v2, 13); v2 *= PRIME32_1; p+=4; 384 v3 += XXH_readLE32((const U32*)p, endian) * PRIME32_2; v3 = XXH_rotl32(v3, 13); v3 *= PRIME32_1; p+=4; 385 v4 += XXH_readLE32((const U32*)p, endian) * PRIME32_2; v4 = XXH_rotl32(v4, 13); v4 *= PRIME32_1; p+=4; 386 } while (p<=limit); 387 388 state->v1 = v1; 389 state->v2 = v2; 390 state->v3 = v3; 391 state->v4 = v4; 392 } 393 394 if (p < bEnd) 395 { 396 XXH_memcpy(state->memory, p, bEnd-p); 397 state->memsize = (int)(bEnd-p); 398 } 399 400 return XXH_OK; 401} 402 403XXH_errorcode XXH32_update (void* state_in, const void* input, int len) 404{ 405 XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN; 406 407 if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) 408 return XXH32_update_endian(state_in, input, len, XXH_littleEndian); 409 else 410 return XXH32_update_endian(state_in, input, len, XXH_bigEndian); 411} 412 413 414 415forceinline U32 XXH32_intermediateDigest_endian (void* state_in, XXH_endianess endian) 416{ 417 struct XXH_state32_t * state = (struct XXH_state32_t *) state_in; 418 const BYTE * p = (const BYTE*)state->memory; 419 BYTE* bEnd = (BYTE*)state->memory + state->memsize; 420 U32 h32; 421 422 if (state->total_len >= 16) 423 { 424 h32 = XXH_rotl32(state->v1, 1) + XXH_rotl32(state->v2, 7) + XXH_rotl32(state->v3, 12) + XXH_rotl32(state->v4, 18); 425 } 426 else 427 { 428 h32 = state->seed + PRIME32_5; 429 } 430 431 h32 += (U32) state->total_len; 432 433 while (p<=bEnd-4) 434 { 435 h32 += XXH_readLE32((const U32*)p, endian) * PRIME32_3; 436 h32 = XXH_rotl32(h32, 17) * PRIME32_4; 437 p+=4; 438 } 439 440 while (p<bEnd) 441 { 442 h32 += (*p) * PRIME32_5; 443 h32 = XXH_rotl32(h32, 11) * PRIME32_1; 444 p++; 445 } 446 447 h32 ^= h32 >> 15; 448 h32 *= PRIME32_2; 449 h32 ^= h32 >> 13; 450 h32 *= PRIME32_3; 451 h32 ^= h32 >> 16; 452 453 return h32; 454} 455 456 457U32 XXH32_intermediateDigest (void* state_in) 458{ 459 XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN; 460 461 if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) 462 return XXH32_intermediateDigest_endian(state_in, XXH_littleEndian); 463 else 464 return XXH32_intermediateDigest_endian(state_in, XXH_bigEndian); 465} 466 467 468U32 XXH32_digest (void* state_in) 469{ 470 U32 h32 = XXH32_intermediateDigest(state_in); 471 472 XXH_free(state_in); 473 474 return h32; 475} 476