1301333Sbapt/* Copyright (c) 2016 Vladimir Makarov <vmakarov@gcc.gnu.org>
2301333Sbapt
3301333Sbapt   Permission is hereby granted, free of charge, to any person
4301333Sbapt   obtaining a copy of this software and associated documentation
5301333Sbapt   files (the "Software"), to deal in the Software without
6301333Sbapt   restriction, including without limitation the rights to use, copy,
7301333Sbapt   modify, merge, publish, distribute, sublicense, and/or sell copies
8301333Sbapt   of the Software, and to permit persons to whom the Software is
9301333Sbapt   furnished to do so, subject to the following conditions:
10301333Sbapt
11301333Sbapt   The above copyright notice and this permission notice shall be
12301333Sbapt   included in all copies or substantial portions of the Software.
13301333Sbapt
14301333Sbapt   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15301333Sbapt   EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16301333Sbapt   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17301333Sbapt   NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
18301333Sbapt   BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
19301333Sbapt   ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
20301333Sbapt   CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21301333Sbapt   SOFTWARE.
22301333Sbapt*/
23301333Sbapt
24301333Sbapt/* This file implements MUM (MUltiply and Mix) hashing.  We randomize
25301333Sbapt   input data by 64x64-bit multiplication and mixing hi- and low-parts
26301333Sbapt   of the multiplication result by using an addition and then mix it
27301333Sbapt   into the current state.  We use prime numbers randomly generated
28301333Sbapt   with the equal probability of their bit values for the
29301333Sbapt   multiplication.  When all primes are used once, the state is
30301333Sbapt   randomized and the same prime numbers are used again for data
31301333Sbapt   randomization.
32301333Sbapt
33301333Sbapt   The MUM hashing passes all SMHasher tests.  Pseudo Random Number
34301333Sbapt   Generator based on MUM also passes NIST Statistical Test Suite for
35301333Sbapt   Random and Pseudorandom Number Generators for Cryptographic
36301333Sbapt   Applications (version 2.2.1) with 1000 bitstreams each containing
37301333Sbapt   1M bits.  MUM hashing is also faster Spooky64 and City64 on small
38301333Sbapt   strings (at least upto 512-bit) on Haswell and Power7.  The MUM bulk
39301333Sbapt   speed (speed on very long data) is bigger than Spooky and City on
40301333Sbapt   Power7.  On Haswell the bulk speed is bigger than Spooky one and
41301333Sbapt   close to City speed.  */
42301333Sbapt
43301333Sbapt#ifndef __MUM_HASH__
44301333Sbapt#define __MUM_HASH__
45301333Sbapt
46301333Sbapt#include <stddef.h>
47301333Sbapt#include <stdlib.h>
48301333Sbapt#include <string.h>
49301333Sbapt#include <limits.h>
50301333Sbapt
51301333Sbapt#ifdef _MSC_VER
52301333Sbapttypedef unsigned __int16 uint16_t;
53301333Sbapttypedef unsigned __int32 uint32_t;
54301333Sbapttypedef unsigned __int64 uint64_t;
55301333Sbapt#else
56301333Sbapt#include <stdint.h>
57301333Sbapt#endif
58301333Sbapt
59301333Sbapt/* Macro saying to use 128-bit integers implemented by GCC for some
60301333Sbapt   targets.  */
61301333Sbapt#ifndef _MUM_USE_INT128
62301333Sbapt/* In GCC uint128_t is defined if HOST_BITS_PER_WIDE_INT >= 64.
63301333Sbapt   HOST_WIDE_INT is long if HOST_BITS_PER_LONG > HOST_BITS_PER_INT,
64301333Sbapt   otherwise int. */
65301333Sbapt#if defined(__GNUC__) && UINT_MAX != ULONG_MAX
66301333Sbapt#define _MUM_USE_INT128 1
67301333Sbapt#else
68301333Sbapt#define _MUM_USE_INT128 0
69301333Sbapt#endif
70301333Sbapt#endif
71301333Sbapt
72301442Sbapt#if 0
73301333Sbapt#if defined(__GNUC__) && ((__GNUC__ == 4) &&  (__GNUC_MINOR__ >= 9) || (__GNUC__ > 4))
74301333Sbapt#define _MUM_FRESH_GCC
75301333Sbapt#endif
76301442Sbapt#endif
77301333Sbapt
78301439Sbapt#if defined(__GNUC__) && !defined(__llvm__) && defined(_MUM_FRESH_GCC)
79301333Sbapt#define _MUM_ATTRIBUTE_UNUSED  __attribute__((unused))
80301333Sbapt#define _MUM_OPTIMIZE(opts) __attribute__((__optimize__ (opts)))
81301333Sbapt#define _MUM_TARGET(opts) __attribute__((__target__ (opts)))
82301333Sbapt#else
83301333Sbapt#define _MUM_ATTRIBUTE_UNUSED
84301333Sbapt#define _MUM_OPTIMIZE(opts)
85301333Sbapt#define _MUM_TARGET(opts)
86301333Sbapt#endif
87301333Sbapt
88301333Sbapt
89301333Sbapt/* Here are different primes randomly generated with the equal
90301333Sbapt   probability of their bit values.  They are used to randomize input
91301333Sbapt   values.  */
92301333Sbaptstatic uint64_t _mum_hash_step_prime = 0x2e0bb864e9ea7df5ULL;
93301333Sbaptstatic uint64_t _mum_key_step_prime = 0xcdb32970830fcaa1ULL;
94301333Sbaptstatic uint64_t _mum_block_start_prime = 0xc42b5e2e6480b23bULL;
95301333Sbaptstatic uint64_t _mum_unroll_prime = 0x7b51ec3d22f7096fULL;
96301333Sbaptstatic uint64_t _mum_tail_prime = 0xaf47d47c99b1461bULL;
97301333Sbaptstatic uint64_t _mum_finish_prime1 = 0xa9a7ae7ceff79f3fULL;
98301333Sbaptstatic uint64_t _mum_finish_prime2 = 0xaf47d47c99b1461bULL;
99301333Sbapt
100301333Sbaptstatic uint64_t _mum_primes [] = {
101301333Sbapt  0X9ebdcae10d981691, 0X32b9b9b97a27ac7d, 0X29b5584d83d35bbd, 0X4b04e0e61401255f,
102301333Sbapt  0X25e8f7b1f1c9d027, 0X80d4c8c000f3e881, 0Xbd1255431904b9dd, 0X8a3bd4485eee6d81,
103301333Sbapt  0X3bc721b2aad05197, 0X71b1a19b907d6e33, 0X525e6c1084a8534b, 0X9e4c2cd340c1299f,
104301333Sbapt  0Xde3add92e94caa37, 0X7e14eadb1f65311d, 0X3f5aa40f89812853, 0X33b15a3b587d15c9,
105301333Sbapt};
106301333Sbapt
107301333Sbapt/* Multiply 64-bit V and P and return sum of high and low parts of the
108301333Sbapt   result.  */
109301333Sbaptstatic inline uint64_t
110301333Sbapt_mum (uint64_t v, uint64_t p) {
111301333Sbapt  uint64_t hi, lo;
112301333Sbapt#if _MUM_USE_INT128
113301333Sbapt#if defined(__aarch64__)
114301333Sbapt  /* AARCH64 needs 2 insns to calculate 128-bit result of the
115301333Sbapt     multiplication.  If we use a generic code we actually call a
116301333Sbapt     function doing 128x128->128 bit multiplication.  The function is
117301333Sbapt     very slow.  */
118301333Sbapt  lo = v * p, hi;
119301333Sbapt  asm ("umulh %0, %1, %2" : "=r" (hi) : "r" (v), "r" (p));
120301333Sbapt#else
121301333Sbapt  __uint128_t r = (__uint128_t) v * (__uint128_t) p;
122301333Sbapt  hi = (uint64_t) (r >> 64);
123301333Sbapt  lo = (uint64_t) r;
124301333Sbapt#endif
125301333Sbapt#else
126301333Sbapt  /* Implementation of 64x64->128-bit multiplication by four 32x32->64
127301333Sbapt     bit multiplication.  */
128301333Sbapt  uint64_t hv = v >> 32, hp = p >> 32;
129301333Sbapt  uint64_t lv = (uint32_t) v, lp = (uint32_t) p;
130301333Sbapt  uint64_t rh =  hv * hp;
131301333Sbapt  uint64_t rm_0 = hv * lp;
132301333Sbapt  uint64_t rm_1 = hp * lv;
133301333Sbapt  uint64_t rl =  lv * lp;
134301333Sbapt  uint64_t t, carry = 0;
135301333Sbapt
136301333Sbapt  /* We could ignore a carry bit here if we did not care about the
137301333Sbapt     same hash for 32-bit and 64-bit targets.  */
138301333Sbapt  t = rl + (rm_0 << 32);
139301333Sbapt#ifdef MUM_TARGET_INDEPENDENT_HASH
140301333Sbapt  carry = t < rl;
141301333Sbapt#endif
142301333Sbapt  lo = t + (rm_1 << 32);
143301333Sbapt#ifdef MUM_TARGET_INDEPENDENT_HASH
144301333Sbapt  carry += lo < t;
145301333Sbapt#endif
146301333Sbapt  hi = rh + (rm_0 >> 32) + (rm_1 >> 32) + carry;
147301333Sbapt#endif
148301333Sbapt  /* We could use XOR here too but, for some reasons, on Haswell and
149301333Sbapt     Power7 using an addition improves hashing performance by 10% for
150301333Sbapt     small strings.  */
151301333Sbapt  return hi + lo;
152301333Sbapt}
153301333Sbapt
154301333Sbapt#if defined(_MSC_VER)
155301333Sbapt#define _mum_bswap_32(x) _byteswap_uint32_t (x)
156301333Sbapt#define _mum_bswap_64(x) _byteswap_uint64_t (x)
157301333Sbapt#elif defined(__APPLE__)
158301333Sbapt#include <libkern/OSByteOrder.h>
159301333Sbapt#define _mum_bswap_32(x) OSSwapInt32 (x)
160301333Sbapt#define _mum_bswap_64(x) OSSwapInt64 (x)
161301333Sbapt#elif defined(__GNUC__)
162301333Sbapt#define _mum_bswap32(x) __builtin_bswap32 (x)
163301333Sbapt#define _mum_bswap64(x) __builtin_bswap64 (x)
164301333Sbapt#else
165301333Sbapt#include <byteswap.h>
166301333Sbapt#define _mum_bswap32(x) bswap32 (x)
167301333Sbapt#define _mum_bswap64(x) bswap64 (x)
168301333Sbapt#endif
169301333Sbapt
170301333Sbaptstatic inline uint64_t
171301333Sbapt_mum_le (uint64_t v) {
172301333Sbapt#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ || !defined(MUM_TARGET_INDEPENDENT_HASH)
173301333Sbapt  return v;
174301333Sbapt#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
175301333Sbapt  return _mum_bswap64 (v);
176301333Sbapt#else
177301333Sbapt#error "Unknown endianess"
178301333Sbapt#endif
179301333Sbapt}
180301333Sbapt
181301333Sbaptstatic inline uint32_t
182301333Sbapt_mum_le32 (uint32_t v) {
183301333Sbapt#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ || !defined(MUM_TARGET_INDEPENDENT_HASH)
184301333Sbapt  return v;
185301333Sbapt#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
186301333Sbapt  return _mum_bswap32 (v);
187301333Sbapt#else
188301333Sbapt#error "Unknown endianess"
189301333Sbapt#endif
190301333Sbapt}
191301333Sbapt
192301333Sbapt/* Macro defining how many times the most nested loop in
193301333Sbapt   _mum_hash_aligned will be unrolled by the compiler (although it can
194301333Sbapt   make an own decision:).  Use only a constant here to help a
195301333Sbapt   compiler to unroll a major loop.
196301333Sbapt
197301333Sbapt   The macro value affects the result hash for strings > 128 bit.  The
198301333Sbapt   unroll factor greatly affects the hashing speed.  We prefer the
199301333Sbapt   speed.  */
200301333Sbapt#ifndef _MUM_UNROLL_FACTOR_POWER
201301333Sbapt#if defined(__PPC64__) && !defined(MUM_TARGET_INDEPENDENT_HASH)
202301333Sbapt#define _MUM_UNROLL_FACTOR_POWER 3
203301333Sbapt#elif defined(__aarch64__) && !defined(MUM_TARGET_INDEPENDENT_HASH)
204301333Sbapt#define _MUM_UNROLL_FACTOR_POWER 4
205301333Sbapt#else
206301333Sbapt#define _MUM_UNROLL_FACTOR_POWER 2
207301333Sbapt#endif
208301333Sbapt#endif
209301333Sbapt
210301333Sbapt#if _MUM_UNROLL_FACTOR_POWER < 1
211301333Sbapt#error "too small unroll factor"
212301333Sbapt#elif _MUM_UNROLL_FACTOR_POWER > 4
213301333Sbapt#error "We have not enough primes for such unroll factor"
214301333Sbapt#endif
215301333Sbapt
216301333Sbapt#define _MUM_UNROLL_FACTOR (1 << _MUM_UNROLL_FACTOR_POWER)
217301333Sbapt
218301333Sbaptstatic inline uint64_t _MUM_OPTIMIZE("unroll-loops")
219301333Sbapt_mum_hash_aligned (uint64_t start, const void *key, size_t len) {
220301333Sbapt  uint64_t result = start;
221301333Sbapt  const unsigned char *str = (const unsigned char *) key;
222301333Sbapt  uint64_t u64;
223301333Sbapt  int i;
224301333Sbapt  size_t n;
225301333Sbapt
226301333Sbapt  result = _mum (result, _mum_block_start_prime);
227301333Sbapt  while  (len > _MUM_UNROLL_FACTOR * sizeof (uint64_t)) {
228301333Sbapt    /* This loop could be vectorized when we have vector insns for
229301333Sbapt       64x64->128-bit multiplication.  AVX2 currently only have a
230301333Sbapt       vector insn for 4 32x32->64-bit multiplication.  */
231301333Sbapt    for (i = 0; i < _MUM_UNROLL_FACTOR; i++)
232301333Sbapt      result ^= _mum (_mum_le (((uint64_t *) str)[i]), _mum_primes[i]);
233301333Sbapt    len -= _MUM_UNROLL_FACTOR * sizeof (uint64_t);
234301333Sbapt    str += _MUM_UNROLL_FACTOR * sizeof (uint64_t);
235301333Sbapt    /* We will use the same prime numbers on the next iterations --
236301333Sbapt       randomize the state.  */
237301333Sbapt    result = _mum (result, _mum_unroll_prime);
238301333Sbapt  }
239301333Sbapt  n = len / sizeof (uint64_t);
240301333Sbapt  for (i = 0; i < (int)n; i++)
241301333Sbapt    result ^= _mum (_mum_le (((uint64_t *) str)[i]), _mum_primes[i]);
242301333Sbapt  len -= n * sizeof (uint64_t); str += n * sizeof (uint64_t);
243301333Sbapt  switch (len) {
244301333Sbapt  case 7:
245301333Sbapt    u64 = _mum_le32 (*(uint32_t *) str);
246301333Sbapt    u64 |= (uint64_t) str[4] << 32;
247301333Sbapt    u64 |= (uint64_t) str[5] << 40;
248301333Sbapt    u64 |= (uint64_t) str[6] << 48;
249301333Sbapt    return result ^ _mum (u64, _mum_tail_prime);
250301333Sbapt  case 6:
251301333Sbapt    u64 = _mum_le32 (*(uint32_t *) str);
252301333Sbapt    u64 |= (uint64_t) str[4] << 32;
253301333Sbapt    u64 |= (uint64_t) str[5] << 40;
254301333Sbapt    return result ^ _mum (u64, _mum_tail_prime);
255301333Sbapt  case 5:
256301333Sbapt    u64 = _mum_le32 (*(uint32_t *) str);
257301333Sbapt    u64 |= (uint64_t) str[4] << 32;
258301333Sbapt    return result ^ _mum (u64, _mum_tail_prime);
259301333Sbapt  case 4:
260301333Sbapt    u64 = _mum_le32 (*(uint32_t *) str);
261301333Sbapt    return result ^ _mum (u64, _mum_tail_prime);
262301333Sbapt  case 3:
263301333Sbapt    u64 = str[0];
264301333Sbapt    u64 |= (uint64_t) str[1] << 8;
265301333Sbapt    u64 |= (uint64_t) str[2] << 16;
266301333Sbapt    return result ^ _mum (u64, _mum_tail_prime);
267301333Sbapt  case 2:
268301333Sbapt    u64 = str[0];
269301333Sbapt    u64 |= (uint64_t) str[1] << 8;
270301333Sbapt    return result ^ _mum (u64, _mum_tail_prime);
271301333Sbapt  case 1:
272301333Sbapt    u64 = str[0];
273301333Sbapt    return result ^ _mum (u64, _mum_tail_prime);
274301333Sbapt  }
275301333Sbapt  return result;
276301333Sbapt}
277301333Sbapt
278301333Sbapt/* Final randomization of H.  */
279301333Sbaptstatic inline uint64_t
280301333Sbapt_mum_final (uint64_t h) {
281301333Sbapt  h ^= _mum (h, _mum_finish_prime1);
282301333Sbapt  h ^= _mum (h, _mum_finish_prime2);
283301333Sbapt  return h;
284301333Sbapt}
285301333Sbapt
286301333Sbapt#if defined(__x86_64__) && defined(_MUM_FRESH_GCC)
287301333Sbapt
288301333Sbapt/* We want to use AVX2 insn MULX instead of generic x86-64 MULQ where
289301333Sbapt   it is possible.  Although on modern Intel processors MULQ takes
290301333Sbapt   3-cycles vs. 4 for MULX, MULX permits more freedom in insn
291301333Sbapt   scheduling as it uses less fixed registers.  */
292301333Sbaptstatic inline uint64_t _MUM_TARGET("arch=haswell")
293301333Sbapt_mum_hash_avx2 (const void * key, size_t len, uint64_t seed) {
294301333Sbapt  return _mum_final (_mum_hash_aligned (seed + len, key, len));
295301333Sbapt}
296301333Sbapt#endif
297301333Sbapt
298301333Sbapt#ifndef _MUM_UNALIGNED_ACCESS
299301333Sbapt#if defined(__x86_64__) || defined(__i386__) || defined(__PPC64__) \
300301333Sbapt    || defined(__s390__) || defined(__m32c__) || defined(cris)     \
301301333Sbapt    || defined(__CR16__) || defined(__vax__) || defined(__m68k__) \
302301333Sbapt    || defined(__aarch64__)
303301333Sbapt#define _MUM_UNALIGNED_ACCESS 1
304301333Sbapt#else
305301333Sbapt#define _MUM_UNALIGNED_ACCESS 0
306301333Sbapt#endif
307301333Sbapt#endif
308301333Sbapt
309301333Sbapt/* When we need an aligned access to data being hashed we move part of
310301333Sbapt   the unaligned data to an aligned block of given size and then
311301333Sbapt   process it, repeating processing the data by the block.  */
312301333Sbapt#ifndef _MUM_BLOCK_LEN
313301333Sbapt#define _MUM_BLOCK_LEN 1024
314301333Sbapt#endif
315301333Sbapt
316301333Sbapt#if _MUM_BLOCK_LEN < 8
317301333Sbapt#error "too small block length"
318301333Sbapt#endif
319301333Sbapt
320301333Sbaptstatic inline uint64_t
321301333Sbapt#if defined(__x86_64__)
322301333Sbapt_MUM_TARGET("inline-all-stringops")
323301333Sbapt#endif
324301333Sbapt_mum_hash_default (const void *key, size_t len, uint64_t seed) {
325301333Sbapt  uint64_t result;
326301333Sbapt  const unsigned char *str = (const unsigned char *) key;
327301333Sbapt  size_t block_len;
328301333Sbapt  uint64_t buf[_MUM_BLOCK_LEN / sizeof (uint64_t)];
329301333Sbapt
330301333Sbapt  result = seed + len;
331301333Sbapt  if (_MUM_UNALIGNED_ACCESS || ((size_t) str & 0x7) == 0)
332301333Sbapt    result = _mum_hash_aligned (result, key, len);
333301333Sbapt  else {
334301333Sbapt    while (len != 0) {
335301333Sbapt      block_len = len < _MUM_BLOCK_LEN ? len : _MUM_BLOCK_LEN;
336301333Sbapt      memmove (buf, str, block_len);
337301333Sbapt      result = _mum_hash_aligned (result, buf, block_len);
338301333Sbapt      len -= block_len;
339301333Sbapt      str += block_len;
340301333Sbapt    }
341301333Sbapt  }
342301333Sbapt  return _mum_final (result);
343301333Sbapt}
344301333Sbapt
345301333Sbaptstatic inline uint64_t
346301333Sbapt_mum_next_factor (void) {
347301333Sbapt  uint64_t start = 0;
348301333Sbapt  int i;
349301333Sbapt
350301333Sbapt  for (i = 0; i < 8; i++)
351301333Sbapt    start = (start << 8) | rand() % 256;
352301333Sbapt  return start;
353301333Sbapt}
354301333Sbapt
355301333Sbapt/* ++++++++++++++++++++++++++ Interface functions: +++++++++++++++++++  */
356301333Sbapt
357301333Sbapt/* Set random multiplicators depending on SEED.  */
358301333Sbaptstatic inline void
359301333Sbaptmum_hash_randomize (uint64_t seed) {
360301333Sbapt  int i;
361301333Sbapt
362301333Sbapt  srand (seed);
363301333Sbapt  _mum_hash_step_prime = _mum_next_factor ();
364301333Sbapt  _mum_key_step_prime = _mum_next_factor ();
365301333Sbapt  _mum_finish_prime1 = _mum_next_factor ();
366301333Sbapt  _mum_finish_prime2 = _mum_next_factor ();
367301333Sbapt  _mum_block_start_prime = _mum_next_factor ();
368301333Sbapt  _mum_unroll_prime = _mum_next_factor ();
369301333Sbapt  _mum_tail_prime = _mum_next_factor ();
370301333Sbapt  for (i = 0; i < (int)(sizeof (_mum_primes) / sizeof (uint64_t)); i++)
371301333Sbapt    _mum_primes[i] = _mum_next_factor ();
372301333Sbapt}
373301333Sbapt
374301333Sbapt/* Start hashing data with SEED.  Return the state.  */
375301333Sbaptstatic inline uint64_t
376301333Sbaptmum_hash_init (uint64_t seed) {
377301333Sbapt  return seed;
378301333Sbapt}
379301333Sbapt
380301333Sbapt/* Process data KEY with the state H and return the updated state.  */
381301333Sbaptstatic inline uint64_t
382301333Sbaptmum_hash_step (uint64_t h, uint64_t key)
383301333Sbapt{
384301333Sbapt  return _mum (h, _mum_hash_step_prime) ^ _mum (key, _mum_key_step_prime);
385301333Sbapt}
386301333Sbapt
387301333Sbapt/* Return the result of hashing using the current state H.  */
388301333Sbaptstatic inline uint64_t
389301333Sbaptmum_hash_finish (uint64_t h) {
390301333Sbapt  return _mum_final (h);
391301333Sbapt}
392301333Sbapt
393301333Sbapt/* Fast hashing of KEY with SEED.  The hash is always the same for the
394301333Sbapt   same key on any target. */
395301333Sbaptstatic inline size_t
396301333Sbaptmum_hash64 (uint64_t key, uint64_t seed) {
397301333Sbapt  return mum_hash_finish (mum_hash_step (mum_hash_init (seed), key));
398301333Sbapt}
399301333Sbapt
400301333Sbapt/* Hash data KEY of length LEN and SEED.  The hash depends on the
401301333Sbapt   target endianess and the unroll factor.  */
402301333Sbaptstatic inline uint64_t
403301333Sbaptmum_hash (const void *key, size_t len, uint64_t seed) {
404301333Sbapt#if defined(__x86_64__) && defined(_MUM_FRESH_GCC)
405301333Sbapt  static int avx2_support = 0;
406301333Sbapt
407301333Sbapt  if (avx2_support > 0)
408301333Sbapt    return _mum_hash_avx2 (key, len, seed);
409301333Sbapt  else if (! avx2_support) {
410301333Sbapt    __builtin_cpu_init ();
411301333Sbapt    avx2_support =  __builtin_cpu_supports ("avx2") ? 1 : -1;
412301333Sbapt    if (avx2_support > 0)
413301333Sbapt      return _mum_hash_avx2 (key, len, seed);
414301333Sbapt  }
415301333Sbapt#endif
416301333Sbapt  return _mum_hash_default (key, len, seed);
417301333Sbapt}
418301333Sbapt
419301333Sbapt#endif
420