mum.h revision 301333
1301333Sbapt/* Copyright (c) 2016 Vladimir Makarov <vmakarov@gcc.gnu.org>
2301333Sbapt
3301333Sbapt   Permission is hereby granted, free of charge, to any person
4301333Sbapt   obtaining a copy of this software and associated documentation
5301333Sbapt   files (the "Software"), to deal in the Software without
6301333Sbapt   restriction, including without limitation the rights to use, copy,
7301333Sbapt   modify, merge, publish, distribute, sublicense, and/or sell copies
8301333Sbapt   of the Software, and to permit persons to whom the Software is
9301333Sbapt   furnished to do so, subject to the following conditions:
10301333Sbapt
11301333Sbapt   The above copyright notice and this permission notice shall be
12301333Sbapt   included in all copies or substantial portions of the Software.
13301333Sbapt
14301333Sbapt   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15301333Sbapt   EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16301333Sbapt   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17301333Sbapt   NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
18301333Sbapt   BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
19301333Sbapt   ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
20301333Sbapt   CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21301333Sbapt   SOFTWARE.
22301333Sbapt*/
23301333Sbapt
24301333Sbapt/* This file implements MUM (MUltiply and Mix) hashing.  We randomize
25301333Sbapt   input data by 64x64-bit multiplication and mixing hi- and low-parts
26301333Sbapt   of the multiplication result by using an addition and then mix it
27301333Sbapt   into the current state.  We use prime numbers randomly generated
28301333Sbapt   with the equal probability of their bit values for the
29301333Sbapt   multiplication.  When all primes are used once, the state is
30301333Sbapt   randomized and the same prime numbers are used again for data
31301333Sbapt   randomization.
32301333Sbapt
33301333Sbapt   The MUM hashing passes all SMHasher tests.  Pseudo Random Number
34301333Sbapt   Generator based on MUM also passes NIST Statistical Test Suite for
35301333Sbapt   Random and Pseudorandom Number Generators for Cryptographic
36301333Sbapt   Applications (version 2.2.1) with 1000 bitstreams each containing
37301333Sbapt   1M bits.  MUM hashing is also faster Spooky64 and City64 on small
38301333Sbapt   strings (at least upto 512-bit) on Haswell and Power7.  The MUM bulk
39301333Sbapt   speed (speed on very long data) is bigger than Spooky and City on
40301333Sbapt   Power7.  On Haswell the bulk speed is bigger than Spooky one and
41301333Sbapt   close to City speed.  */
42301333Sbapt
43301333Sbapt#ifndef __MUM_HASH__
44301333Sbapt#define __MUM_HASH__
45301333Sbapt
46301333Sbapt#include <stddef.h>
47301333Sbapt#include <stdlib.h>
48301333Sbapt#include <string.h>
49301333Sbapt#include <limits.h>
50301333Sbapt
51301333Sbapt#ifdef _MSC_VER
52301333Sbapttypedef unsigned __int16 uint16_t;
53301333Sbapttypedef unsigned __int32 uint32_t;
54301333Sbapttypedef unsigned __int64 uint64_t;
55301333Sbapt#else
56301333Sbapt#include <stdint.h>
57301333Sbapt#endif
58301333Sbapt
59301333Sbapt/* Macro saying to use 128-bit integers implemented by GCC for some
60301333Sbapt   targets.  */
61301333Sbapt#ifndef _MUM_USE_INT128
62301333Sbapt/* In GCC uint128_t is defined if HOST_BITS_PER_WIDE_INT >= 64.
63301333Sbapt   HOST_WIDE_INT is long if HOST_BITS_PER_LONG > HOST_BITS_PER_INT,
64301333Sbapt   otherwise int. */
65301333Sbapt#if defined(__GNUC__) && UINT_MAX != ULONG_MAX
66301333Sbapt#define _MUM_USE_INT128 1
67301333Sbapt#else
68301333Sbapt#define _MUM_USE_INT128 0
69301333Sbapt#endif
70301333Sbapt#endif
71301333Sbapt
72301333Sbapt#if defined(__GNUC__) && ((__GNUC__ == 4) &&  (__GNUC_MINOR__ >= 9) || (__GNUC__ > 4))
73301333Sbapt#define _MUM_FRESH_GCC
74301333Sbapt#endif
75301333Sbapt
76301333Sbapt#if defined(__GNUC__) && !defined(__llvm__)
77301333Sbapt#define _MUM_ATTRIBUTE_UNUSED  __attribute__((unused))
78301333Sbapt#define _MUM_OPTIMIZE(opts) __attribute__((__optimize__ (opts)))
79301333Sbapt#define _MUM_TARGET(opts) __attribute__((__target__ (opts)))
80301333Sbapt#else
81301333Sbapt#define _MUM_ATTRIBUTE_UNUSED
82301333Sbapt#define _MUM_OPTIMIZE(opts)
83301333Sbapt#define _MUM_TARGET(opts)
84301333Sbapt#endif
85301333Sbapt
86301333Sbapt
87301333Sbapt/* Here are different primes randomly generated with the equal
88301333Sbapt   probability of their bit values.  They are used to randomize input
89301333Sbapt   values.  */
90301333Sbaptstatic uint64_t _mum_hash_step_prime = 0x2e0bb864e9ea7df5ULL;
91301333Sbaptstatic uint64_t _mum_key_step_prime = 0xcdb32970830fcaa1ULL;
92301333Sbaptstatic uint64_t _mum_block_start_prime = 0xc42b5e2e6480b23bULL;
93301333Sbaptstatic uint64_t _mum_unroll_prime = 0x7b51ec3d22f7096fULL;
94301333Sbaptstatic uint64_t _mum_tail_prime = 0xaf47d47c99b1461bULL;
95301333Sbaptstatic uint64_t _mum_finish_prime1 = 0xa9a7ae7ceff79f3fULL;
96301333Sbaptstatic uint64_t _mum_finish_prime2 = 0xaf47d47c99b1461bULL;
97301333Sbapt
98301333Sbaptstatic uint64_t _mum_primes [] = {
99301333Sbapt  0X9ebdcae10d981691, 0X32b9b9b97a27ac7d, 0X29b5584d83d35bbd, 0X4b04e0e61401255f,
100301333Sbapt  0X25e8f7b1f1c9d027, 0X80d4c8c000f3e881, 0Xbd1255431904b9dd, 0X8a3bd4485eee6d81,
101301333Sbapt  0X3bc721b2aad05197, 0X71b1a19b907d6e33, 0X525e6c1084a8534b, 0X9e4c2cd340c1299f,
102301333Sbapt  0Xde3add92e94caa37, 0X7e14eadb1f65311d, 0X3f5aa40f89812853, 0X33b15a3b587d15c9,
103301333Sbapt};
104301333Sbapt
105301333Sbapt/* Multiply 64-bit V and P and return sum of high and low parts of the
106301333Sbapt   result.  */
107301333Sbaptstatic inline uint64_t
108301333Sbapt_mum (uint64_t v, uint64_t p) {
109301333Sbapt  uint64_t hi, lo;
110301333Sbapt#if _MUM_USE_INT128
111301333Sbapt#if defined(__aarch64__)
112301333Sbapt  /* AARCH64 needs 2 insns to calculate 128-bit result of the
113301333Sbapt     multiplication.  If we use a generic code we actually call a
114301333Sbapt     function doing 128x128->128 bit multiplication.  The function is
115301333Sbapt     very slow.  */
116301333Sbapt  lo = v * p, hi;
117301333Sbapt  asm ("umulh %0, %1, %2" : "=r" (hi) : "r" (v), "r" (p));
118301333Sbapt#else
119301333Sbapt  __uint128_t r = (__uint128_t) v * (__uint128_t) p;
120301333Sbapt  hi = (uint64_t) (r >> 64);
121301333Sbapt  lo = (uint64_t) r;
122301333Sbapt#endif
123301333Sbapt#else
124301333Sbapt  /* Implementation of 64x64->128-bit multiplication by four 32x32->64
125301333Sbapt     bit multiplication.  */
126301333Sbapt  uint64_t hv = v >> 32, hp = p >> 32;
127301333Sbapt  uint64_t lv = (uint32_t) v, lp = (uint32_t) p;
128301333Sbapt  uint64_t rh =  hv * hp;
129301333Sbapt  uint64_t rm_0 = hv * lp;
130301333Sbapt  uint64_t rm_1 = hp * lv;
131301333Sbapt  uint64_t rl =  lv * lp;
132301333Sbapt  uint64_t t, carry = 0;
133301333Sbapt
134301333Sbapt  /* We could ignore a carry bit here if we did not care about the
135301333Sbapt     same hash for 32-bit and 64-bit targets.  */
136301333Sbapt  t = rl + (rm_0 << 32);
137301333Sbapt#ifdef MUM_TARGET_INDEPENDENT_HASH
138301333Sbapt  carry = t < rl;
139301333Sbapt#endif
140301333Sbapt  lo = t + (rm_1 << 32);
141301333Sbapt#ifdef MUM_TARGET_INDEPENDENT_HASH
142301333Sbapt  carry += lo < t;
143301333Sbapt#endif
144301333Sbapt  hi = rh + (rm_0 >> 32) + (rm_1 >> 32) + carry;
145301333Sbapt#endif
146301333Sbapt  /* We could use XOR here too but, for some reasons, on Haswell and
147301333Sbapt     Power7 using an addition improves hashing performance by 10% for
148301333Sbapt     small strings.  */
149301333Sbapt  return hi + lo;
150301333Sbapt}
151301333Sbapt
152301333Sbapt#if defined(_MSC_VER)
153301333Sbapt#define _mum_bswap_32(x) _byteswap_uint32_t (x)
154301333Sbapt#define _mum_bswap_64(x) _byteswap_uint64_t (x)
155301333Sbapt#elif defined(__APPLE__)
156301333Sbapt#include <libkern/OSByteOrder.h>
157301333Sbapt#define _mum_bswap_32(x) OSSwapInt32 (x)
158301333Sbapt#define _mum_bswap_64(x) OSSwapInt64 (x)
159301333Sbapt#elif defined(__GNUC__)
160301333Sbapt#define _mum_bswap32(x) __builtin_bswap32 (x)
161301333Sbapt#define _mum_bswap64(x) __builtin_bswap64 (x)
162301333Sbapt#else
163301333Sbapt#include <byteswap.h>
164301333Sbapt#define _mum_bswap32(x) bswap32 (x)
165301333Sbapt#define _mum_bswap64(x) bswap64 (x)
166301333Sbapt#endif
167301333Sbapt
168301333Sbaptstatic inline uint64_t
169301333Sbapt_mum_le (uint64_t v) {
170301333Sbapt#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ || !defined(MUM_TARGET_INDEPENDENT_HASH)
171301333Sbapt  return v;
172301333Sbapt#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
173301333Sbapt  return _mum_bswap64 (v);
174301333Sbapt#else
175301333Sbapt#error "Unknown endianess"
176301333Sbapt#endif
177301333Sbapt}
178301333Sbapt
179301333Sbaptstatic inline uint32_t
180301333Sbapt_mum_le32 (uint32_t v) {
181301333Sbapt#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ || !defined(MUM_TARGET_INDEPENDENT_HASH)
182301333Sbapt  return v;
183301333Sbapt#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
184301333Sbapt  return _mum_bswap32 (v);
185301333Sbapt#else
186301333Sbapt#error "Unknown endianess"
187301333Sbapt#endif
188301333Sbapt}
189301333Sbapt
190301333Sbapt/* Macro defining how many times the most nested loop in
191301333Sbapt   _mum_hash_aligned will be unrolled by the compiler (although it can
192301333Sbapt   make an own decision:).  Use only a constant here to help a
193301333Sbapt   compiler to unroll a major loop.
194301333Sbapt
195301333Sbapt   The macro value affects the result hash for strings > 128 bit.  The
196301333Sbapt   unroll factor greatly affects the hashing speed.  We prefer the
197301333Sbapt   speed.  */
198301333Sbapt#ifndef _MUM_UNROLL_FACTOR_POWER
199301333Sbapt#if defined(__PPC64__) && !defined(MUM_TARGET_INDEPENDENT_HASH)
200301333Sbapt#define _MUM_UNROLL_FACTOR_POWER 3
201301333Sbapt#elif defined(__aarch64__) && !defined(MUM_TARGET_INDEPENDENT_HASH)
202301333Sbapt#define _MUM_UNROLL_FACTOR_POWER 4
203301333Sbapt#else
204301333Sbapt#define _MUM_UNROLL_FACTOR_POWER 2
205301333Sbapt#endif
206301333Sbapt#endif
207301333Sbapt
208301333Sbapt#if _MUM_UNROLL_FACTOR_POWER < 1
209301333Sbapt#error "too small unroll factor"
210301333Sbapt#elif _MUM_UNROLL_FACTOR_POWER > 4
211301333Sbapt#error "We have not enough primes for such unroll factor"
212301333Sbapt#endif
213301333Sbapt
214301333Sbapt#define _MUM_UNROLL_FACTOR (1 << _MUM_UNROLL_FACTOR_POWER)
215301333Sbapt
216301333Sbaptstatic inline uint64_t _MUM_OPTIMIZE("unroll-loops")
217301333Sbapt_mum_hash_aligned (uint64_t start, const void *key, size_t len) {
218301333Sbapt  uint64_t result = start;
219301333Sbapt  const unsigned char *str = (const unsigned char *) key;
220301333Sbapt  uint64_t u64;
221301333Sbapt  int i;
222301333Sbapt  size_t n;
223301333Sbapt
224301333Sbapt  result = _mum (result, _mum_block_start_prime);
225301333Sbapt  while  (len > _MUM_UNROLL_FACTOR * sizeof (uint64_t)) {
226301333Sbapt    /* This loop could be vectorized when we have vector insns for
227301333Sbapt       64x64->128-bit multiplication.  AVX2 currently only have a
228301333Sbapt       vector insn for 4 32x32->64-bit multiplication.  */
229301333Sbapt    for (i = 0; i < _MUM_UNROLL_FACTOR; i++)
230301333Sbapt      result ^= _mum (_mum_le (((uint64_t *) str)[i]), _mum_primes[i]);
231301333Sbapt    len -= _MUM_UNROLL_FACTOR * sizeof (uint64_t);
232301333Sbapt    str += _MUM_UNROLL_FACTOR * sizeof (uint64_t);
233301333Sbapt    /* We will use the same prime numbers on the next iterations --
234301333Sbapt       randomize the state.  */
235301333Sbapt    result = _mum (result, _mum_unroll_prime);
236301333Sbapt  }
237301333Sbapt  n = len / sizeof (uint64_t);
238301333Sbapt  for (i = 0; i < (int)n; i++)
239301333Sbapt    result ^= _mum (_mum_le (((uint64_t *) str)[i]), _mum_primes[i]);
240301333Sbapt  len -= n * sizeof (uint64_t); str += n * sizeof (uint64_t);
241301333Sbapt  switch (len) {
242301333Sbapt  case 7:
243301333Sbapt    u64 = _mum_le32 (*(uint32_t *) str);
244301333Sbapt    u64 |= (uint64_t) str[4] << 32;
245301333Sbapt    u64 |= (uint64_t) str[5] << 40;
246301333Sbapt    u64 |= (uint64_t) str[6] << 48;
247301333Sbapt    return result ^ _mum (u64, _mum_tail_prime);
248301333Sbapt  case 6:
249301333Sbapt    u64 = _mum_le32 (*(uint32_t *) str);
250301333Sbapt    u64 |= (uint64_t) str[4] << 32;
251301333Sbapt    u64 |= (uint64_t) str[5] << 40;
252301333Sbapt    return result ^ _mum (u64, _mum_tail_prime);
253301333Sbapt  case 5:
254301333Sbapt    u64 = _mum_le32 (*(uint32_t *) str);
255301333Sbapt    u64 |= (uint64_t) str[4] << 32;
256301333Sbapt    return result ^ _mum (u64, _mum_tail_prime);
257301333Sbapt  case 4:
258301333Sbapt    u64 = _mum_le32 (*(uint32_t *) str);
259301333Sbapt    return result ^ _mum (u64, _mum_tail_prime);
260301333Sbapt  case 3:
261301333Sbapt    u64 = str[0];
262301333Sbapt    u64 |= (uint64_t) str[1] << 8;
263301333Sbapt    u64 |= (uint64_t) str[2] << 16;
264301333Sbapt    return result ^ _mum (u64, _mum_tail_prime);
265301333Sbapt  case 2:
266301333Sbapt    u64 = str[0];
267301333Sbapt    u64 |= (uint64_t) str[1] << 8;
268301333Sbapt    return result ^ _mum (u64, _mum_tail_prime);
269301333Sbapt  case 1:
270301333Sbapt    u64 = str[0];
271301333Sbapt    return result ^ _mum (u64, _mum_tail_prime);
272301333Sbapt  }
273301333Sbapt  return result;
274301333Sbapt}
275301333Sbapt
276301333Sbapt/* Final randomization of H.  */
277301333Sbaptstatic inline uint64_t
278301333Sbapt_mum_final (uint64_t h) {
279301333Sbapt  h ^= _mum (h, _mum_finish_prime1);
280301333Sbapt  h ^= _mum (h, _mum_finish_prime2);
281301333Sbapt  return h;
282301333Sbapt}
283301333Sbapt
284301333Sbapt#if defined(__x86_64__) && defined(_MUM_FRESH_GCC)
285301333Sbapt
286301333Sbapt/* We want to use AVX2 insn MULX instead of generic x86-64 MULQ where
287301333Sbapt   it is possible.  Although on modern Intel processors MULQ takes
288301333Sbapt   3-cycles vs. 4 for MULX, MULX permits more freedom in insn
289301333Sbapt   scheduling as it uses less fixed registers.  */
290301333Sbaptstatic inline uint64_t _MUM_TARGET("arch=haswell")
291301333Sbapt_mum_hash_avx2 (const void * key, size_t len, uint64_t seed) {
292301333Sbapt  return _mum_final (_mum_hash_aligned (seed + len, key, len));
293301333Sbapt}
294301333Sbapt#endif
295301333Sbapt
296301333Sbapt#ifndef _MUM_UNALIGNED_ACCESS
297301333Sbapt#if defined(__x86_64__) || defined(__i386__) || defined(__PPC64__) \
298301333Sbapt    || defined(__s390__) || defined(__m32c__) || defined(cris)     \
299301333Sbapt    || defined(__CR16__) || defined(__vax__) || defined(__m68k__) \
300301333Sbapt    || defined(__aarch64__)
301301333Sbapt#define _MUM_UNALIGNED_ACCESS 1
302301333Sbapt#else
303301333Sbapt#define _MUM_UNALIGNED_ACCESS 0
304301333Sbapt#endif
305301333Sbapt#endif
306301333Sbapt
307301333Sbapt/* When we need an aligned access to data being hashed we move part of
308301333Sbapt   the unaligned data to an aligned block of given size and then
309301333Sbapt   process it, repeating processing the data by the block.  */
310301333Sbapt#ifndef _MUM_BLOCK_LEN
311301333Sbapt#define _MUM_BLOCK_LEN 1024
312301333Sbapt#endif
313301333Sbapt
314301333Sbapt#if _MUM_BLOCK_LEN < 8
315301333Sbapt#error "too small block length"
316301333Sbapt#endif
317301333Sbapt
318301333Sbaptstatic inline uint64_t
319301333Sbapt#if defined(__x86_64__)
320301333Sbapt_MUM_TARGET("inline-all-stringops")
321301333Sbapt#endif
322301333Sbapt_mum_hash_default (const void *key, size_t len, uint64_t seed) {
323301333Sbapt  uint64_t result;
324301333Sbapt  const unsigned char *str = (const unsigned char *) key;
325301333Sbapt  size_t block_len;
326301333Sbapt  uint64_t buf[_MUM_BLOCK_LEN / sizeof (uint64_t)];
327301333Sbapt
328301333Sbapt  result = seed + len;
329301333Sbapt  if (_MUM_UNALIGNED_ACCESS || ((size_t) str & 0x7) == 0)
330301333Sbapt    result = _mum_hash_aligned (result, key, len);
331301333Sbapt  else {
332301333Sbapt    while (len != 0) {
333301333Sbapt      block_len = len < _MUM_BLOCK_LEN ? len : _MUM_BLOCK_LEN;
334301333Sbapt      memmove (buf, str, block_len);
335301333Sbapt      result = _mum_hash_aligned (result, buf, block_len);
336301333Sbapt      len -= block_len;
337301333Sbapt      str += block_len;
338301333Sbapt    }
339301333Sbapt  }
340301333Sbapt  return _mum_final (result);
341301333Sbapt}
342301333Sbapt
343301333Sbaptstatic inline uint64_t
344301333Sbapt_mum_next_factor (void) {
345301333Sbapt  uint64_t start = 0;
346301333Sbapt  int i;
347301333Sbapt
348301333Sbapt  for (i = 0; i < 8; i++)
349301333Sbapt    start = (start << 8) | rand() % 256;
350301333Sbapt  return start;
351301333Sbapt}
352301333Sbapt
353301333Sbapt/* ++++++++++++++++++++++++++ Interface functions: +++++++++++++++++++  */
354301333Sbapt
355301333Sbapt/* Set random multiplicators depending on SEED.  */
356301333Sbaptstatic inline void
357301333Sbaptmum_hash_randomize (uint64_t seed) {
358301333Sbapt  int i;
359301333Sbapt
360301333Sbapt  srand (seed);
361301333Sbapt  _mum_hash_step_prime = _mum_next_factor ();
362301333Sbapt  _mum_key_step_prime = _mum_next_factor ();
363301333Sbapt  _mum_finish_prime1 = _mum_next_factor ();
364301333Sbapt  _mum_finish_prime2 = _mum_next_factor ();
365301333Sbapt  _mum_block_start_prime = _mum_next_factor ();
366301333Sbapt  _mum_unroll_prime = _mum_next_factor ();
367301333Sbapt  _mum_tail_prime = _mum_next_factor ();
368301333Sbapt  for (i = 0; i < (int)(sizeof (_mum_primes) / sizeof (uint64_t)); i++)
369301333Sbapt    _mum_primes[i] = _mum_next_factor ();
370301333Sbapt}
371301333Sbapt
372301333Sbapt/* Start hashing data with SEED.  Return the state.  */
373301333Sbaptstatic inline uint64_t
374301333Sbaptmum_hash_init (uint64_t seed) {
375301333Sbapt  return seed;
376301333Sbapt}
377301333Sbapt
378301333Sbapt/* Process data KEY with the state H and return the updated state.  */
379301333Sbaptstatic inline uint64_t
380301333Sbaptmum_hash_step (uint64_t h, uint64_t key)
381301333Sbapt{
382301333Sbapt  return _mum (h, _mum_hash_step_prime) ^ _mum (key, _mum_key_step_prime);
383301333Sbapt}
384301333Sbapt
385301333Sbapt/* Return the result of hashing using the current state H.  */
386301333Sbaptstatic inline uint64_t
387301333Sbaptmum_hash_finish (uint64_t h) {
388301333Sbapt  return _mum_final (h);
389301333Sbapt}
390301333Sbapt
391301333Sbapt/* Fast hashing of KEY with SEED.  The hash is always the same for the
392301333Sbapt   same key on any target. */
393301333Sbaptstatic inline size_t
394301333Sbaptmum_hash64 (uint64_t key, uint64_t seed) {
395301333Sbapt  return mum_hash_finish (mum_hash_step (mum_hash_init (seed), key));
396301333Sbapt}
397301333Sbapt
398301333Sbapt/* Hash data KEY of length LEN and SEED.  The hash depends on the
399301333Sbapt   target endianess and the unroll factor.  */
400301333Sbaptstatic inline uint64_t
401301333Sbaptmum_hash (const void *key, size_t len, uint64_t seed) {
402301333Sbapt#if defined(__x86_64__) && defined(_MUM_FRESH_GCC)
403301333Sbapt  static int avx2_support = 0;
404301333Sbapt
405301333Sbapt  if (avx2_support > 0)
406301333Sbapt    return _mum_hash_avx2 (key, len, seed);
407301333Sbapt  else if (! avx2_support) {
408301333Sbapt    __builtin_cpu_init ();
409301333Sbapt    avx2_support =  __builtin_cpu_supports ("avx2") ? 1 : -1;
410301333Sbapt    if (avx2_support > 0)
411301333Sbapt      return _mum_hash_avx2 (key, len, seed);
412301333Sbapt  }
413301333Sbapt#endif
414301333Sbapt  return _mum_hash_default (key, len, seed);
415301333Sbapt}
416301333Sbapt
417301333Sbapt#endif
418