1/*===---- immintrin.h - Intel intrinsics -----------------------------------===
2 *
3 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 * See https://llvm.org/LICENSE.txt for license information.
5 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 *
7 *===-----------------------------------------------------------------------===
8 */
9
10#ifndef __IMMINTRIN_H
11#define __IMMINTRIN_H
12
13#include <x86gprintrin.h>
14
15#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) ||      \
16    defined(__MMX__)
17#include <mmintrin.h>
18#endif
19
20#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) ||      \
21    defined(__SSE__)
22#include <xmmintrin.h>
23#endif
24
25#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) ||      \
26    defined(__SSE2__)
27#include <emmintrin.h>
28#endif
29
30#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) ||      \
31    defined(__SSE3__)
32#include <pmmintrin.h>
33#endif
34
35#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) ||      \
36    defined(__SSSE3__)
37#include <tmmintrin.h>
38#endif
39
40#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) ||      \
41    (defined(__SSE4_2__) || defined(__SSE4_1__))
42#include <smmintrin.h>
43#endif
44
45#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) ||      \
46    (defined(__AES__) || defined(__PCLMUL__))
47#include <wmmintrin.h>
48#endif
49
50#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) ||      \
51    defined(__CLFLUSHOPT__)
52#include <clflushoptintrin.h>
53#endif
54
55#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) ||      \
56    defined(__CLWB__)
57#include <clwbintrin.h>
58#endif
59
60#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) ||      \
61    defined(__AVX__)
62#include <avxintrin.h>
63#endif
64
65#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) ||      \
66    defined(__AVX2__)
67#include <avx2intrin.h>
68#endif
69
70#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) ||      \
71    defined(__F16C__)
72#include <f16cintrin.h>
73#endif
74
75/* No feature check desired due to internal checks */
76#include <bmiintrin.h>
77
78#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) ||      \
79    defined(__BMI2__)
80#include <bmi2intrin.h>
81#endif
82
83#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) ||      \
84    defined(__LZCNT__)
85#include <lzcntintrin.h>
86#endif
87
88#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) ||      \
89    defined(__POPCNT__)
90#include <popcntintrin.h>
91#endif
92
93#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) ||      \
94    defined(__FMA__)
95#include <fmaintrin.h>
96#endif
97
98#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) ||      \
99    defined(__AVX512F__)
100#include <avx512fintrin.h>
101#endif
102
103#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) ||      \
104    defined(__AVX512VL__)
105#include <avx512vlintrin.h>
106#endif
107
108#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) ||      \
109    defined(__AVX512BW__)
110#include <avx512bwintrin.h>
111#endif
112
113#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) ||      \
114    defined(__AVX512BITALG__)
115#include <avx512bitalgintrin.h>
116#endif
117
118#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) ||      \
119    defined(__AVX512CD__)
120#include <avx512cdintrin.h>
121#endif
122
123#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) ||      \
124    defined(__AVX512VPOPCNTDQ__)
125#include <avx512vpopcntdqintrin.h>
126#endif
127
128#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) ||      \
129    (defined(__AVX512VL__) && defined(__AVX512VPOPCNTDQ__))
130#include <avx512vpopcntdqvlintrin.h>
131#endif
132
133#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) ||      \
134    defined(__AVX512VNNI__)
135#include <avx512vnniintrin.h>
136#endif
137
138#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) ||      \
139    (defined(__AVX512VL__) && defined(__AVX512VNNI__))
140#include <avx512vlvnniintrin.h>
141#endif
142
143#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) ||      \
144    defined(__AVXVNNI__)
145#include <avxvnniintrin.h>
146#endif
147
148#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) ||      \
149    defined(__AVX512DQ__)
150#include <avx512dqintrin.h>
151#endif
152
153#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) ||      \
154    (defined(__AVX512VL__) && defined(__AVX512BITALG__))
155#include <avx512vlbitalgintrin.h>
156#endif
157
158#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) ||      \
159    (defined(__AVX512VL__) && defined(__AVX512BW__))
160#include <avx512vlbwintrin.h>
161#endif
162
163#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) ||      \
164    (defined(__AVX512VL__) && defined(__AVX512CD__))
165#include <avx512vlcdintrin.h>
166#endif
167
168#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) ||      \
169    (defined(__AVX512VL__) && defined(__AVX512DQ__))
170#include <avx512vldqintrin.h>
171#endif
172
173#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) ||      \
174    defined(__AVX512ER__)
175#include <avx512erintrin.h>
176#endif
177
178#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) ||      \
179    defined(__AVX512IFMA__)
180#include <avx512ifmaintrin.h>
181#endif
182
183#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) ||      \
184    (defined(__AVX512IFMA__) && defined(__AVX512VL__))
185#include <avx512ifmavlintrin.h>
186#endif
187
188#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) ||      \
189    defined(__AVX512VBMI__)
190#include <avx512vbmiintrin.h>
191#endif
192
193#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) ||      \
194    (defined(__AVX512VBMI__) && defined(__AVX512VL__))
195#include <avx512vbmivlintrin.h>
196#endif
197
198#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) ||      \
199    defined(__AVX512VBMI2__)
200#include <avx512vbmi2intrin.h>
201#endif
202
203#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) ||      \
204    (defined(__AVX512VBMI2__) && defined(__AVX512VL__))
205#include <avx512vlvbmi2intrin.h>
206#endif
207
208#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) ||      \
209    defined(__AVX512PF__)
210#include <avx512pfintrin.h>
211#endif
212
213#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) ||      \
214    defined(__AVX512BF16__)
215#include <avx512bf16intrin.h>
216#endif
217
218#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) ||      \
219    (defined(__AVX512VL__) && defined(__AVX512BF16__))
220#include <avx512vlbf16intrin.h>
221#endif
222
223#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) ||      \
224    defined(__PKU__)
225#include <pkuintrin.h>
226#endif
227
228#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) ||      \
229    defined(__VPCLMULQDQ__)
230#include <vpclmulqdqintrin.h>
231#endif
232
233#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) ||      \
234    defined(__VAES__)
235#include <vaesintrin.h>
236#endif
237
238#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) ||      \
239    defined(__GFNI__)
240#include <gfniintrin.h>
241#endif
242
243#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) ||      \
244    defined(__RDPID__)
245/// Returns the value of the IA32_TSC_AUX MSR (0xc0000103).
246///
247/// \headerfile <immintrin.h>
248///
249/// This intrinsic corresponds to the <c> RDPID </c> instruction.
250static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__, __target__("rdpid")))
251_rdpid_u32(void) {
252  return __builtin_ia32_rdpid();
253}
254#endif // __RDPID__
255
256#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) ||      \
257    defined(__RDRND__)
258static __inline__ int __attribute__((__always_inline__, __nodebug__, __target__("rdrnd")))
259_rdrand16_step(unsigned short *__p)
260{
261  return __builtin_ia32_rdrand16_step(__p);
262}
263
264static __inline__ int __attribute__((__always_inline__, __nodebug__, __target__("rdrnd")))
265_rdrand32_step(unsigned int *__p)
266{
267  return __builtin_ia32_rdrand32_step(__p);
268}
269
270#ifdef __x86_64__
271static __inline__ int __attribute__((__always_inline__, __nodebug__, __target__("rdrnd")))
272_rdrand64_step(unsigned long long *__p)
273{
274  return __builtin_ia32_rdrand64_step(__p);
275}
276#endif
277#endif /* __RDRND__ */
278
279#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) ||      \
280    defined(__FSGSBASE__)
281#ifdef __x86_64__
282static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase")))
283_readfsbase_u32(void)
284{
285  return __builtin_ia32_rdfsbase32();
286}
287
288static __inline__ unsigned long long __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase")))
289_readfsbase_u64(void)
290{
291  return __builtin_ia32_rdfsbase64();
292}
293
294static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase")))
295_readgsbase_u32(void)
296{
297  return __builtin_ia32_rdgsbase32();
298}
299
300static __inline__ unsigned long long __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase")))
301_readgsbase_u64(void)
302{
303  return __builtin_ia32_rdgsbase64();
304}
305
306static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase")))
307_writefsbase_u32(unsigned int __V)
308{
309  __builtin_ia32_wrfsbase32(__V);
310}
311
312static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase")))
313_writefsbase_u64(unsigned long long __V)
314{
315  __builtin_ia32_wrfsbase64(__V);
316}
317
318static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase")))
319_writegsbase_u32(unsigned int __V)
320{
321  __builtin_ia32_wrgsbase32(__V);
322}
323
324static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase")))
325_writegsbase_u64(unsigned long long __V)
326{
327  __builtin_ia32_wrgsbase64(__V);
328}
329
330#endif
331#endif /* __FSGSBASE__ */
332
333#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) ||      \
334    defined(__MOVBE__)
335
336/* The structs used below are to force the load/store to be unaligned. This
337 * is accomplished with the __packed__ attribute. The __may_alias__ prevents
338 * tbaa metadata from being generated based on the struct and the type of the
339 * field inside of it.
340 */
341
342static __inline__ short __attribute__((__always_inline__, __nodebug__, __target__("movbe")))
343_loadbe_i16(void const * __P) {
344  struct __loadu_i16 {
345    short __v;
346  } __attribute__((__packed__, __may_alias__));
347  return __builtin_bswap16(((const struct __loadu_i16*)__P)->__v);
348}
349
350static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("movbe")))
351_storebe_i16(void * __P, short __D) {
352  struct __storeu_i16 {
353    short __v;
354  } __attribute__((__packed__, __may_alias__));
355  ((struct __storeu_i16*)__P)->__v = __builtin_bswap16(__D);
356}
357
358static __inline__ int __attribute__((__always_inline__, __nodebug__, __target__("movbe")))
359_loadbe_i32(void const * __P) {
360  struct __loadu_i32 {
361    int __v;
362  } __attribute__((__packed__, __may_alias__));
363  return __builtin_bswap32(((const struct __loadu_i32*)__P)->__v);
364}
365
366static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("movbe")))
367_storebe_i32(void * __P, int __D) {
368  struct __storeu_i32 {
369    int __v;
370  } __attribute__((__packed__, __may_alias__));
371  ((struct __storeu_i32*)__P)->__v = __builtin_bswap32(__D);
372}
373
374#ifdef __x86_64__
375static __inline__ long long __attribute__((__always_inline__, __nodebug__, __target__("movbe")))
376_loadbe_i64(void const * __P) {
377  struct __loadu_i64 {
378    long long __v;
379  } __attribute__((__packed__, __may_alias__));
380  return __builtin_bswap64(((const struct __loadu_i64*)__P)->__v);
381}
382
383static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("movbe")))
384_storebe_i64(void * __P, long long __D) {
385  struct __storeu_i64 {
386    long long __v;
387  } __attribute__((__packed__, __may_alias__));
388  ((struct __storeu_i64*)__P)->__v = __builtin_bswap64(__D);
389}
390#endif
391#endif /* __MOVBE */
392
393#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) ||      \
394    defined(__RTM__)
395#include <rtmintrin.h>
396#include <xtestintrin.h>
397#endif
398
399#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) ||      \
400    defined(__SHA__)
401#include <shaintrin.h>
402#endif
403
404#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) ||      \
405    defined(__FXSR__)
406#include <fxsrintrin.h>
407#endif
408
409/* No feature check desired due to internal MSC_VER checks */
410#include <xsaveintrin.h>
411
412#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) ||      \
413    defined(__XSAVEOPT__)
414#include <xsaveoptintrin.h>
415#endif
416
417#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) ||      \
418    defined(__XSAVEC__)
419#include <xsavecintrin.h>
420#endif
421
422#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) ||      \
423    defined(__XSAVES__)
424#include <xsavesintrin.h>
425#endif
426
427#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) ||      \
428    defined(__SHSTK__)
429#include <cetintrin.h>
430#endif
431
432/* Some intrinsics inside adxintrin.h are available only on processors with ADX,
433 * whereas others are also available at all times. */
434#include <adxintrin.h>
435
436#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) ||      \
437    defined(__RDSEED__)
438#include <rdseedintrin.h>
439#endif
440
441#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) ||      \
442    defined(__WBNOINVD__)
443#include <wbnoinvdintrin.h>
444#endif
445
446#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) ||      \
447    defined(__CLDEMOTE__)
448#include <cldemoteintrin.h>
449#endif
450
451#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) ||      \
452    defined(__WAITPKG__)
453#include <waitpkgintrin.h>
454#endif
455
456#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) ||      \
457    defined(__MOVDIRI__) || defined(__MOVDIR64B__)
458#include <movdirintrin.h>
459#endif
460
461#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) ||      \
462    defined(__PCONFIG__)
463#include <pconfigintrin.h>
464#endif
465
466#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) ||      \
467    defined(__SGX__)
468#include <sgxintrin.h>
469#endif
470
471#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) ||      \
472    defined(__PTWRITE__)
473#include <ptwriteintrin.h>
474#endif
475
476#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) ||      \
477    defined(__INVPCID__)
478#include <invpcidintrin.h>
479#endif
480
481#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) ||      \
482    defined(__KL__) || defined(__WIDEKL__)
483#include <keylockerintrin.h>
484#endif
485
486#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) ||      \
487    defined(__AMXTILE__) || defined(__AMXINT8__) || defined(__AMXBF16__)
488#include <amxintrin.h>
489#endif
490
491#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) ||      \
492    defined(__AVX512VP2INTERSECT__)
493#include <avx512vp2intersectintrin.h>
494#endif
495
496#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) ||      \
497    (defined(__AVX512VL__) && defined(__AVX512VP2INTERSECT__))
498#include <avx512vlvp2intersectintrin.h>
499#endif
500
501#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) ||      \
502    defined(__ENQCMD__)
503#include <enqcmdintrin.h>
504#endif
505
506#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) ||      \
507    defined(__SERIALIZE__)
508#include <serializeintrin.h>
509#endif
510
511#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) ||      \
512    defined(__TSXLDTRK__)
513#include <tsxldtrkintrin.h>
514#endif
515
516#if defined(_MSC_VER) && __has_extension(gnu_asm)
517/* Define the default attributes for these intrinsics */
518#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__))
519#ifdef __cplusplus
520extern "C" {
521#endif
522/*----------------------------------------------------------------------------*\
523|* Interlocked Exchange HLE
524\*----------------------------------------------------------------------------*/
525#if defined(__i386__) || defined(__x86_64__)
526static __inline__ long __DEFAULT_FN_ATTRS
527_InterlockedExchange_HLEAcquire(long volatile *_Target, long _Value) {
528  __asm__ __volatile__(".byte 0xf2 ; lock ; xchg %0, %1"
529                       : "+r" (_Value), "+m" (*_Target) :: "memory");
530  return _Value;
531}
532static __inline__ long __DEFAULT_FN_ATTRS
533_InterlockedExchange_HLERelease(long volatile *_Target, long _Value) {
534  __asm__ __volatile__(".byte 0xf3 ; lock ; xchg %0, %1"
535                       : "+r" (_Value), "+m" (*_Target) :: "memory");
536  return _Value;
537}
538#endif
539#if defined(__x86_64__)
540static __inline__ __int64 __DEFAULT_FN_ATTRS
541_InterlockedExchange64_HLEAcquire(__int64 volatile *_Target, __int64 _Value) {
542  __asm__ __volatile__(".byte 0xf2 ; lock ; xchg %0, %1"
543                       : "+r" (_Value), "+m" (*_Target) :: "memory");
544  return _Value;
545}
546static __inline__ __int64 __DEFAULT_FN_ATTRS
547_InterlockedExchange64_HLERelease(__int64 volatile *_Target, __int64 _Value) {
548  __asm__ __volatile__(".byte 0xf3 ; lock ; xchg %0, %1"
549                       : "+r" (_Value), "+m" (*_Target) :: "memory");
550  return _Value;
551}
552#endif
553/*----------------------------------------------------------------------------*\
554|* Interlocked Compare Exchange HLE
555\*----------------------------------------------------------------------------*/
556#if defined(__i386__) || defined(__x86_64__)
557static __inline__ long __DEFAULT_FN_ATTRS
558_InterlockedCompareExchange_HLEAcquire(long volatile *_Destination,
559                              long _Exchange, long _Comparand) {
560  __asm__ __volatile__(".byte 0xf2 ; lock ; cmpxchg %2, %1"
561                       : "+a" (_Comparand), "+m" (*_Destination)
562                       : "r" (_Exchange) : "memory");
563  return _Comparand;
564}
565static __inline__ long __DEFAULT_FN_ATTRS
566_InterlockedCompareExchange_HLERelease(long volatile *_Destination,
567                              long _Exchange, long _Comparand) {
568  __asm__ __volatile__(".byte 0xf3 ; lock ; cmpxchg %2, %1"
569                       : "+a" (_Comparand), "+m" (*_Destination)
570                       : "r" (_Exchange) : "memory");
571  return _Comparand;
572}
573#endif
574#if defined(__x86_64__)
575static __inline__ __int64 __DEFAULT_FN_ATTRS
576_InterlockedCompareExchange64_HLEAcquire(__int64 volatile *_Destination,
577                              __int64 _Exchange, __int64 _Comparand) {
578  __asm__ __volatile__(".byte 0xf2 ; lock ; cmpxchg %2, %1"
579                       : "+a" (_Comparand), "+m" (*_Destination)
580                       : "r" (_Exchange) : "memory");
581  return _Comparand;
582}
583static __inline__ __int64 __DEFAULT_FN_ATTRS
584_InterlockedCompareExchange64_HLERelease(__int64 volatile *_Destination,
585                              __int64 _Exchange, __int64 _Comparand) {
586  __asm__ __volatile__(".byte 0xf3 ; lock ; cmpxchg %2, %1"
587                       : "+a" (_Comparand), "+m" (*_Destination)
588                       : "r" (_Exchange) : "memory");
589  return _Comparand;
590}
591#endif
592#ifdef __cplusplus
593}
594#endif
595
596#undef __DEFAULT_FN_ATTRS
597
598#endif /* defined(_MSC_VER) && __has_extension(gnu_asm) */
599
600#endif /* __IMMINTRIN_H */
601