1/*===---- immintrin.h - Intel intrinsics -----------------------------------===
2 *
3 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 * See https://llvm.org/LICENSE.txt for license information.
5 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 *
7 *===-----------------------------------------------------------------------===
8 */
9
10#ifndef __IMMINTRIN_H
11#define __IMMINTRIN_H
12
13#if !defined(_MSC_VER) || __has_feature(modules) || defined(__MMX__)
14#include <mmintrin.h>
15#endif
16
17#if !defined(_MSC_VER) || __has_feature(modules) || defined(__SSE__)
18#include <xmmintrin.h>
19#endif
20
21#if !defined(_MSC_VER) || __has_feature(modules) || defined(__SSE2__)
22#include <emmintrin.h>
23#endif
24
25#if !defined(_MSC_VER) || __has_feature(modules) || defined(__SSE3__)
26#include <pmmintrin.h>
27#endif
28
29#if !defined(_MSC_VER) || __has_feature(modules) || defined(__SSSE3__)
30#include <tmmintrin.h>
31#endif
32
33#if !defined(_MSC_VER) || __has_feature(modules) || \
34    (defined(__SSE4_2__) || defined(__SSE4_1__))
35#include <smmintrin.h>
36#endif
37
38#if !defined(_MSC_VER) || __has_feature(modules) || \
39    (defined(__AES__) || defined(__PCLMUL__))
40#include <wmmintrin.h>
41#endif
42
43#if !defined(_MSC_VER) || __has_feature(modules) || defined(__CLFLUSHOPT__)
44#include <clflushoptintrin.h>
45#endif
46
47#if !defined(_MSC_VER) || __has_feature(modules) || defined(__CLWB__)
48#include <clwbintrin.h>
49#endif
50
51#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX__)
52#include <avxintrin.h>
53#endif
54
55#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX2__)
56#include <avx2intrin.h>
57#endif
58
59#if !defined(_MSC_VER) || __has_feature(modules) || defined(__F16C__)
60#include <f16cintrin.h>
61#endif
62
63#if !defined(_MSC_VER) || __has_feature(modules) || defined(__VPCLMULQDQ__)
64#include <vpclmulqdqintrin.h>
65#endif
66
67/* No feature check desired due to internal checks */
68#include <bmiintrin.h>
69
70#if !defined(_MSC_VER) || __has_feature(modules) || defined(__BMI2__)
71#include <bmi2intrin.h>
72#endif
73
74#if !defined(_MSC_VER) || __has_feature(modules) || defined(__LZCNT__)
75#include <lzcntintrin.h>
76#endif
77
78#if !defined(_MSC_VER) || __has_feature(modules) || defined(__POPCNT__)
79#include <popcntintrin.h>
80#endif
81
82#if !defined(_MSC_VER) || __has_feature(modules) || defined(__FMA__)
83#include <fmaintrin.h>
84#endif
85
86#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512F__)
87#include <avx512fintrin.h>
88#endif
89
90#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512VL__)
91#include <avx512vlintrin.h>
92#endif
93
94#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512BW__)
95#include <avx512bwintrin.h>
96#endif
97
98#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512BITALG__)
99#include <avx512bitalgintrin.h>
100#endif
101
102#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512CD__)
103#include <avx512cdintrin.h>
104#endif
105
106#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512VPOPCNTDQ__)
107#include <avx512vpopcntdqintrin.h>
108#endif
109
110#if !defined(_MSC_VER) || __has_feature(modules) || \
111    (defined(__AVX512VL__) && defined(__AVX512VPOPCNTDQ__))
112#include <avx512vpopcntdqvlintrin.h>
113#endif
114
115#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512VNNI__)
116#include <avx512vnniintrin.h>
117#endif
118
119#if !defined(_MSC_VER) || __has_feature(modules) || \
120    (defined(__AVX512VL__) && defined(__AVX512VNNI__))
121#include <avx512vlvnniintrin.h>
122#endif
123
124#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512DQ__)
125#include <avx512dqintrin.h>
126#endif
127
128#if !defined(_MSC_VER) || __has_feature(modules) || \
129    (defined(__AVX512VL__) && defined(__AVX512BITALG__))
130#include <avx512vlbitalgintrin.h>
131#endif
132
133#if !defined(_MSC_VER) || __has_feature(modules) || \
134    (defined(__AVX512VL__) && defined(__AVX512BW__))
135#include <avx512vlbwintrin.h>
136#endif
137
138#if !defined(_MSC_VER) || __has_feature(modules) || \
139    (defined(__AVX512VL__) && defined(__AVX512CD__))
140#include <avx512vlcdintrin.h>
141#endif
142
143#if !defined(_MSC_VER) || __has_feature(modules) || \
144    (defined(__AVX512VL__) && defined(__AVX512DQ__))
145#include <avx512vldqintrin.h>
146#endif
147
148#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512ER__)
149#include <avx512erintrin.h>
150#endif
151
152#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512IFMA__)
153#include <avx512ifmaintrin.h>
154#endif
155
156#if !defined(_MSC_VER) || __has_feature(modules) || \
157    (defined(__AVX512IFMA__) && defined(__AVX512VL__))
158#include <avx512ifmavlintrin.h>
159#endif
160
161#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512VBMI__)
162#include <avx512vbmiintrin.h>
163#endif
164
165#if !defined(_MSC_VER) || __has_feature(modules) || \
166    (defined(__AVX512VBMI__) && defined(__AVX512VL__))
167#include <avx512vbmivlintrin.h>
168#endif
169
170#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512VBMI2__)
171#include <avx512vbmi2intrin.h>
172#endif
173
174#if !defined(_MSC_VER) || __has_feature(modules) || \
175    (defined(__AVX512VBMI2__) && defined(__AVX512VL__))
176#include <avx512vlvbmi2intrin.h>
177#endif
178
179#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512PF__)
180#include <avx512pfintrin.h>
181#endif
182
183#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512BF16__)
184#include <avx512bf16intrin.h>
185#endif
186
187#if !defined(_MSC_VER) || __has_feature(modules) || \
188    (defined(__AVX512VL__) && defined(__AVX512BF16__))
189#include <avx512vlbf16intrin.h>
190#endif
191
192#if !defined(_MSC_VER) || __has_feature(modules) || defined(__PKU__)
193#include <pkuintrin.h>
194#endif
195
196#if !defined(_MSC_VER) || __has_feature(modules) || defined(__VAES__)
197#include <vaesintrin.h>
198#endif
199
200#if !defined(_MSC_VER) || __has_feature(modules) || defined(__GFNI__)
201#include <gfniintrin.h>
202#endif
203
204#if !defined(_MSC_VER) || __has_feature(modules) || defined(__RDPID__)
205/// Returns the value of the IA32_TSC_AUX MSR (0xc0000103).
206///
207/// \headerfile <immintrin.h>
208///
209/// This intrinsic corresponds to the <c> RDPID </c> instruction.
210static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__, __target__("rdpid")))
211_rdpid_u32(void) {
212  return __builtin_ia32_rdpid();
213}
214#endif // __RDPID__
215
216#if !defined(_MSC_VER) || __has_feature(modules) || defined(__RDRND__)
217static __inline__ int __attribute__((__always_inline__, __nodebug__, __target__("rdrnd")))
218_rdrand16_step(unsigned short *__p)
219{
220  return __builtin_ia32_rdrand16_step(__p);
221}
222
223static __inline__ int __attribute__((__always_inline__, __nodebug__, __target__("rdrnd")))
224_rdrand32_step(unsigned int *__p)
225{
226  return __builtin_ia32_rdrand32_step(__p);
227}
228
229#ifdef __x86_64__
230static __inline__ int __attribute__((__always_inline__, __nodebug__, __target__("rdrnd")))
231_rdrand64_step(unsigned long long *__p)
232{
233  return __builtin_ia32_rdrand64_step(__p);
234}
235#endif
236#endif /* __RDRND__ */
237
238#if !defined(_MSC_VER) || __has_feature(modules) || defined(__FSGSBASE__)
239#ifdef __x86_64__
240static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase")))
241_readfsbase_u32(void)
242{
243  return __builtin_ia32_rdfsbase32();
244}
245
246static __inline__ unsigned long long __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase")))
247_readfsbase_u64(void)
248{
249  return __builtin_ia32_rdfsbase64();
250}
251
252static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase")))
253_readgsbase_u32(void)
254{
255  return __builtin_ia32_rdgsbase32();
256}
257
258static __inline__ unsigned long long __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase")))
259_readgsbase_u64(void)
260{
261  return __builtin_ia32_rdgsbase64();
262}
263
264static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase")))
265_writefsbase_u32(unsigned int __V)
266{
267  __builtin_ia32_wrfsbase32(__V);
268}
269
270static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase")))
271_writefsbase_u64(unsigned long long __V)
272{
273  __builtin_ia32_wrfsbase64(__V);
274}
275
276static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase")))
277_writegsbase_u32(unsigned int __V)
278{
279  __builtin_ia32_wrgsbase32(__V);
280}
281
282static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase")))
283_writegsbase_u64(unsigned long long __V)
284{
285  __builtin_ia32_wrgsbase64(__V);
286}
287
288#endif
289#endif /* __FSGSBASE__ */
290
291#if !defined(_MSC_VER) || __has_feature(modules) || defined(__MOVBE__)
292
293/* The structs used below are to force the load/store to be unaligned. This
294 * is accomplished with the __packed__ attribute. The __may_alias__ prevents
295 * tbaa metadata from being generated based on the struct and the type of the
296 * field inside of it.
297 */
298
299static __inline__ short __attribute__((__always_inline__, __nodebug__, __target__("movbe")))
300_loadbe_i16(void const * __P) {
301  struct __loadu_i16 {
302    short __v;
303  } __attribute__((__packed__, __may_alias__));
304  return __builtin_bswap16(((const struct __loadu_i16*)__P)->__v);
305}
306
307static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("movbe")))
308_storebe_i16(void * __P, short __D) {
309  struct __storeu_i16 {
310    short __v;
311  } __attribute__((__packed__, __may_alias__));
312  ((struct __storeu_i16*)__P)->__v = __builtin_bswap16(__D);
313}
314
315static __inline__ int __attribute__((__always_inline__, __nodebug__, __target__("movbe")))
316_loadbe_i32(void const * __P) {
317  struct __loadu_i32 {
318    int __v;
319  } __attribute__((__packed__, __may_alias__));
320  return __builtin_bswap32(((const struct __loadu_i32*)__P)->__v);
321}
322
323static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("movbe")))
324_storebe_i32(void * __P, int __D) {
325  struct __storeu_i32 {
326    int __v;
327  } __attribute__((__packed__, __may_alias__));
328  ((struct __storeu_i32*)__P)->__v = __builtin_bswap32(__D);
329}
330
331#ifdef __x86_64__
332static __inline__ long long __attribute__((__always_inline__, __nodebug__, __target__("movbe")))
333_loadbe_i64(void const * __P) {
334  struct __loadu_i64 {
335    long long __v;
336  } __attribute__((__packed__, __may_alias__));
337  return __builtin_bswap64(((const struct __loadu_i64*)__P)->__v);
338}
339
340static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("movbe")))
341_storebe_i64(void * __P, long long __D) {
342  struct __storeu_i64 {
343    long long __v;
344  } __attribute__((__packed__, __may_alias__));
345  ((struct __storeu_i64*)__P)->__v = __builtin_bswap64(__D);
346}
347#endif
348#endif /* __MOVBE */
349
350#if !defined(_MSC_VER) || __has_feature(modules) || defined(__RTM__)
351#include <rtmintrin.h>
352#include <xtestintrin.h>
353#endif
354
355#if !defined(_MSC_VER) || __has_feature(modules) || defined(__SHA__)
356#include <shaintrin.h>
357#endif
358
359#if !defined(_MSC_VER) || __has_feature(modules) || defined(__FXSR__)
360#include <fxsrintrin.h>
361#endif
362
363/* No feature check desired due to internal MSC_VER checks */
364#include <xsaveintrin.h>
365
366#if !defined(_MSC_VER) || __has_feature(modules) || defined(__XSAVEOPT__)
367#include <xsaveoptintrin.h>
368#endif
369
370#if !defined(_MSC_VER) || __has_feature(modules) || defined(__XSAVEC__)
371#include <xsavecintrin.h>
372#endif
373
374#if !defined(_MSC_VER) || __has_feature(modules) || defined(__XSAVES__)
375#include <xsavesintrin.h>
376#endif
377
378#if !defined(_MSC_VER) || __has_feature(modules) || defined(__SHSTK__)
379#include <cetintrin.h>
380#endif
381
382/* Some intrinsics inside adxintrin.h are available only on processors with ADX,
383 * whereas others are also available at all times. */
384#include <adxintrin.h>
385
386#if !defined(_MSC_VER) || __has_feature(modules) || defined(__RDSEED__)
387#include <rdseedintrin.h>
388#endif
389
390#if !defined(_MSC_VER) || __has_feature(modules) || defined(__WBNOINVD__)
391#include <wbnoinvdintrin.h>
392#endif
393
394#if !defined(_MSC_VER) || __has_feature(modules) || defined(__CLDEMOTE__)
395#include <cldemoteintrin.h>
396#endif
397
398#if !defined(_MSC_VER) || __has_feature(modules) || defined(__WAITPKG__)
399#include <waitpkgintrin.h>
400#endif
401
402#if !defined(_MSC_VER) || __has_feature(modules) || \
403  defined(__MOVDIRI__) || defined(__MOVDIR64B__)
404#include <movdirintrin.h>
405#endif
406
407#if !defined(_MSC_VER) || __has_feature(modules) || defined(__PCONFIG__)
408#include <pconfigintrin.h>
409#endif
410
411#if !defined(_MSC_VER) || __has_feature(modules) || defined(__SGX__)
412#include <sgxintrin.h>
413#endif
414
415#if !defined(_MSC_VER) || __has_feature(modules) || defined(__PTWRITE__)
416#include <ptwriteintrin.h>
417#endif
418
419#if !defined(_MSC_VER) || __has_feature(modules) || defined(__INVPCID__)
420#include <invpcidintrin.h>
421#endif
422
423#if !defined(_MSC_VER) || __has_feature(modules) || \
424  defined(__AVX512VP2INTERSECT__)
425#include <avx512vp2intersectintrin.h>
426#endif
427
428#if !defined(_MSC_VER) || __has_feature(modules) || \
429  (defined(__AVX512VL__) && defined(__AVX512VP2INTERSECT__))
430#include <avx512vlvp2intersectintrin.h>
431#endif
432
433#if !defined(_MSC_VER) || __has_feature(modules) || defined(__ENQCMD__)
434#include <enqcmdintrin.h>
435#endif
436
437#if defined(_MSC_VER) && __has_extension(gnu_asm)
438/* Define the default attributes for these intrinsics */
439#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__))
440#ifdef __cplusplus
441extern "C" {
442#endif
443/*----------------------------------------------------------------------------*\
444|* Interlocked Exchange HLE
445\*----------------------------------------------------------------------------*/
446#if defined(__i386__) || defined(__x86_64__)
447static __inline__ long __DEFAULT_FN_ATTRS
448_InterlockedExchange_HLEAcquire(long volatile *_Target, long _Value) {
449  __asm__ __volatile__(".byte 0xf2 ; lock ; xchg %0, %1"
450                       : "+r" (_Value), "+m" (*_Target) :: "memory");
451  return _Value;
452}
453static __inline__ long __DEFAULT_FN_ATTRS
454_InterlockedExchange_HLERelease(long volatile *_Target, long _Value) {
455  __asm__ __volatile__(".byte 0xf3 ; lock ; xchg %0, %1"
456                       : "+r" (_Value), "+m" (*_Target) :: "memory");
457  return _Value;
458}
459#endif
460#if defined(__x86_64__)
461static __inline__ __int64 __DEFAULT_FN_ATTRS
462_InterlockedExchange64_HLEAcquire(__int64 volatile *_Target, __int64 _Value) {
463  __asm__ __volatile__(".byte 0xf2 ; lock ; xchg %0, %1"
464                       : "+r" (_Value), "+m" (*_Target) :: "memory");
465  return _Value;
466}
467static __inline__ __int64 __DEFAULT_FN_ATTRS
468_InterlockedExchange64_HLERelease(__int64 volatile *_Target, __int64 _Value) {
469  __asm__ __volatile__(".byte 0xf3 ; lock ; xchg %0, %1"
470                       : "+r" (_Value), "+m" (*_Target) :: "memory");
471  return _Value;
472}
473#endif
474/*----------------------------------------------------------------------------*\
475|* Interlocked Compare Exchange HLE
476\*----------------------------------------------------------------------------*/
477#if defined(__i386__) || defined(__x86_64__)
478static __inline__ long __DEFAULT_FN_ATTRS
479_InterlockedCompareExchange_HLEAcquire(long volatile *_Destination,
480                              long _Exchange, long _Comparand) {
481  __asm__ __volatile__(".byte 0xf2 ; lock ; cmpxchg %2, %1"
482                       : "+a" (_Comparand), "+m" (*_Destination)
483                       : "r" (_Exchange) : "memory");
484  return _Comparand;
485}
486static __inline__ long __DEFAULT_FN_ATTRS
487_InterlockedCompareExchange_HLERelease(long volatile *_Destination,
488                              long _Exchange, long _Comparand) {
489  __asm__ __volatile__(".byte 0xf3 ; lock ; cmpxchg %2, %1"
490                       : "+a" (_Comparand), "+m" (*_Destination)
491                       : "r" (_Exchange) : "memory");
492  return _Comparand;
493}
494#endif
495#if defined(__x86_64__)
496static __inline__ __int64 __DEFAULT_FN_ATTRS
497_InterlockedCompareExchange64_HLEAcquire(__int64 volatile *_Destination,
498                              __int64 _Exchange, __int64 _Comparand) {
499  __asm__ __volatile__(".byte 0xf2 ; lock ; cmpxchg %2, %1"
500                       : "+a" (_Comparand), "+m" (*_Destination)
501                       : "r" (_Exchange) : "memory");
502  return _Comparand;
503}
504static __inline__ __int64 __DEFAULT_FN_ATTRS
505_InterlockedCompareExchange64_HLERelease(__int64 volatile *_Destination,
506                              __int64 _Exchange, __int64 _Comparand) {
507  __asm__ __volatile__(".byte 0xf3 ; lock ; cmpxchg %2, %1"
508                       : "+a" (_Comparand), "+m" (*_Destination)
509                       : "r" (_Exchange) : "memory");
510  return _Comparand;
511}
512#endif
513#ifdef __cplusplus
514}
515#endif
516
517#undef __DEFAULT_FN_ATTRS
518
519#endif /* defined(_MSC_VER) && __has_extension(gnu_asm) */
520
521#endif /* __IMMINTRIN_H */
522