immintrin.h revision 355940
1/*===---- immintrin.h - Intel intrinsics -----------------------------------===
2 *
3 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 * See https://llvm.org/LICENSE.txt for license information.
5 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 *
7 *===-----------------------------------------------------------------------===
8 */
9
10#ifndef __IMMINTRIN_H
11#define __IMMINTRIN_H
12
13#if !defined(_MSC_VER) || __has_feature(modules) || defined(__MMX__)
14#include <mmintrin.h>
15#endif
16
17#if !defined(_MSC_VER) || __has_feature(modules) || defined(__SSE__)
18#include <xmmintrin.h>
19#endif
20
21#if !defined(_MSC_VER) || __has_feature(modules) || defined(__SSE2__)
22#include <emmintrin.h>
23#endif
24
25#if !defined(_MSC_VER) || __has_feature(modules) || defined(__SSE3__)
26#include <pmmintrin.h>
27#endif
28
29#if !defined(_MSC_VER) || __has_feature(modules) || defined(__SSSE3__)
30#include <tmmintrin.h>
31#endif
32
33#if !defined(_MSC_VER) || __has_feature(modules) || \
34    (defined(__SSE4_2__) || defined(__SSE4_1__))
35#include <smmintrin.h>
36#endif
37
38#if !defined(_MSC_VER) || __has_feature(modules) || \
39    (defined(__AES__) || defined(__PCLMUL__))
40#include <wmmintrin.h>
41#endif
42
43#if !defined(_MSC_VER) || __has_feature(modules) || defined(__CLFLUSHOPT__)
44#include <clflushoptintrin.h>
45#endif
46
47#if !defined(_MSC_VER) || __has_feature(modules) || defined(__CLWB__)
48#include <clwbintrin.h>
49#endif
50
51#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX__)
52#include <avxintrin.h>
53#endif
54
55#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX2__)
56#include <avx2intrin.h>
57#endif
58
59#if !defined(_MSC_VER) || __has_feature(modules) || defined(__F16C__)
60#include <f16cintrin.h>
61#endif
62
63#if !defined(_MSC_VER) || __has_feature(modules) || defined(__VPCLMULQDQ__)
64#include <vpclmulqdqintrin.h>
65#endif
66
67#if !defined(_MSC_VER) || __has_feature(modules) || defined(__BMI__)
68#include <bmiintrin.h>
69#endif
70
71#if !defined(_MSC_VER) || __has_feature(modules) || defined(__BMI2__)
72#include <bmi2intrin.h>
73#endif
74
75#if !defined(_MSC_VER) || __has_feature(modules) || defined(__LZCNT__)
76#include <lzcntintrin.h>
77#endif
78
79#if !defined(_MSC_VER) || __has_feature(modules) || defined(__POPCNT__)
80#include <popcntintrin.h>
81#endif
82
83#if !defined(_MSC_VER) || __has_feature(modules) || defined(__FMA__)
84#include <fmaintrin.h>
85#endif
86
87#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512F__)
88#include <avx512fintrin.h>
89#endif
90
91#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512VL__)
92#include <avx512vlintrin.h>
93#endif
94
95#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512BW__)
96#include <avx512bwintrin.h>
97#endif
98
99#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512BITALG__)
100#include <avx512bitalgintrin.h>
101#endif
102
103#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512CD__)
104#include <avx512cdintrin.h>
105#endif
106
107#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512VPOPCNTDQ__)
108#include <avx512vpopcntdqintrin.h>
109#endif
110
111#if !defined(_MSC_VER) || __has_feature(modules) || \
112    (defined(__AVX512VL__) && defined(__AVX512VPOPCNTDQ__))
113#include <avx512vpopcntdqvlintrin.h>
114#endif
115
116#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512VNNI__)
117#include <avx512vnniintrin.h>
118#endif
119
120#if !defined(_MSC_VER) || __has_feature(modules) || \
121    (defined(__AVX512VL__) && defined(__AVX512VNNI__))
122#include <avx512vlvnniintrin.h>
123#endif
124
125#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512DQ__)
126#include <avx512dqintrin.h>
127#endif
128
129#if !defined(_MSC_VER) || __has_feature(modules) || \
130    (defined(__AVX512VL__) && defined(__AVX512BITALG__))
131#include <avx512vlbitalgintrin.h>
132#endif
133
134#if !defined(_MSC_VER) || __has_feature(modules) || \
135    (defined(__AVX512VL__) && defined(__AVX512BW__))
136#include <avx512vlbwintrin.h>
137#endif
138
139#if !defined(_MSC_VER) || __has_feature(modules) || \
140    (defined(__AVX512VL__) && defined(__AVX512CD__))
141#include <avx512vlcdintrin.h>
142#endif
143
144#if !defined(_MSC_VER) || __has_feature(modules) || \
145    (defined(__AVX512VL__) && defined(__AVX512DQ__))
146#include <avx512vldqintrin.h>
147#endif
148
149#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512ER__)
150#include <avx512erintrin.h>
151#endif
152
153#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512IFMA__)
154#include <avx512ifmaintrin.h>
155#endif
156
157#if !defined(_MSC_VER) || __has_feature(modules) || \
158    (defined(__AVX512IFMA__) && defined(__AVX512VL__))
159#include <avx512ifmavlintrin.h>
160#endif
161
162#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512VBMI__)
163#include <avx512vbmiintrin.h>
164#endif
165
166#if !defined(_MSC_VER) || __has_feature(modules) || \
167    (defined(__AVX512VBMI__) && defined(__AVX512VL__))
168#include <avx512vbmivlintrin.h>
169#endif
170
171#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512VBMI2__)
172#include <avx512vbmi2intrin.h>
173#endif
174
175#if !defined(_MSC_VER) || __has_feature(modules) || \
176    (defined(__AVX512VBMI2__) && defined(__AVX512VL__))
177#include <avx512vlvbmi2intrin.h>
178#endif
179
180#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512PF__)
181#include <avx512pfintrin.h>
182#endif
183
184#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512BF16__)
185#include <avx512bf16intrin.h>
186#endif
187
188#if !defined(_MSC_VER) || __has_feature(modules) || \
189    (defined(__AVX512VL__) && defined(__AVX512BF16__))
190#include <avx512vlbf16intrin.h>
191#endif
192
193#if !defined(_MSC_VER) || __has_feature(modules) || defined(__PKU__)
194#include <pkuintrin.h>
195#endif
196
197#if !defined(_MSC_VER) || __has_feature(modules) || defined(__VAES__)
198#include <vaesintrin.h>
199#endif
200
201#if !defined(_MSC_VER) || __has_feature(modules) || defined(__GFNI__)
202#include <gfniintrin.h>
203#endif
204
205#if !defined(_MSC_VER) || __has_feature(modules) || defined(__RDPID__)
206/// Returns the value of the IA32_TSC_AUX MSR (0xc0000103).
207///
208/// \headerfile <immintrin.h>
209///
210/// This intrinsic corresponds to the <c> RDPID </c> instruction.
211static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__, __target__("rdpid")))
212_rdpid_u32(void) {
213  return __builtin_ia32_rdpid();
214}
215#endif // __RDPID__
216
217#if !defined(_MSC_VER) || __has_feature(modules) || defined(__RDRND__)
218static __inline__ int __attribute__((__always_inline__, __nodebug__, __target__("rdrnd")))
219_rdrand16_step(unsigned short *__p)
220{
221  return __builtin_ia32_rdrand16_step(__p);
222}
223
224static __inline__ int __attribute__((__always_inline__, __nodebug__, __target__("rdrnd")))
225_rdrand32_step(unsigned int *__p)
226{
227  return __builtin_ia32_rdrand32_step(__p);
228}
229
230#ifdef __x86_64__
231static __inline__ int __attribute__((__always_inline__, __nodebug__, __target__("rdrnd")))
232_rdrand64_step(unsigned long long *__p)
233{
234  return __builtin_ia32_rdrand64_step(__p);
235}
236#endif
237#endif /* __RDRND__ */
238
239#if !defined(_MSC_VER) || __has_feature(modules) || defined(__FSGSBASE__)
240#ifdef __x86_64__
241static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase")))
242_readfsbase_u32(void)
243{
244  return __builtin_ia32_rdfsbase32();
245}
246
247static __inline__ unsigned long long __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase")))
248_readfsbase_u64(void)
249{
250  return __builtin_ia32_rdfsbase64();
251}
252
253static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase")))
254_readgsbase_u32(void)
255{
256  return __builtin_ia32_rdgsbase32();
257}
258
259static __inline__ unsigned long long __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase")))
260_readgsbase_u64(void)
261{
262  return __builtin_ia32_rdgsbase64();
263}
264
265static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase")))
266_writefsbase_u32(unsigned int __V)
267{
268  __builtin_ia32_wrfsbase32(__V);
269}
270
271static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase")))
272_writefsbase_u64(unsigned long long __V)
273{
274  __builtin_ia32_wrfsbase64(__V);
275}
276
277static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase")))
278_writegsbase_u32(unsigned int __V)
279{
280  __builtin_ia32_wrgsbase32(__V);
281}
282
283static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase")))
284_writegsbase_u64(unsigned long long __V)
285{
286  __builtin_ia32_wrgsbase64(__V);
287}
288
289#endif
290#endif /* __FSGSBASE__ */
291
292#if !defined(_MSC_VER) || __has_feature(modules) || defined(__MOVBE__)
293
294/* The structs used below are to force the load/store to be unaligned. This
295 * is accomplished with the __packed__ attribute. The __may_alias__ prevents
296 * tbaa metadata from being generated based on the struct and the type of the
297 * field inside of it.
298 */
299
300static __inline__ short __attribute__((__always_inline__, __nodebug__, __target__("movbe")))
301_loadbe_i16(void const * __P) {
302  struct __loadu_i16 {
303    short __v;
304  } __attribute__((__packed__, __may_alias__));
305  return __builtin_bswap16(((struct __loadu_i16*)__P)->__v);
306}
307
308static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("movbe")))
309_storebe_i16(void * __P, short __D) {
310  struct __storeu_i16 {
311    short __v;
312  } __attribute__((__packed__, __may_alias__));
313  ((struct __storeu_i16*)__P)->__v = __builtin_bswap16(__D);
314}
315
316static __inline__ int __attribute__((__always_inline__, __nodebug__, __target__("movbe")))
317_loadbe_i32(void const * __P) {
318  struct __loadu_i32 {
319    int __v;
320  } __attribute__((__packed__, __may_alias__));
321  return __builtin_bswap32(((struct __loadu_i32*)__P)->__v);
322}
323
324static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("movbe")))
325_storebe_i32(void * __P, int __D) {
326  struct __storeu_i32 {
327    int __v;
328  } __attribute__((__packed__, __may_alias__));
329  ((struct __storeu_i32*)__P)->__v = __builtin_bswap32(__D);
330}
331
332#ifdef __x86_64__
333static __inline__ long long __attribute__((__always_inline__, __nodebug__, __target__("movbe")))
334_loadbe_i64(void const * __P) {
335  struct __loadu_i64 {
336    long long __v;
337  } __attribute__((__packed__, __may_alias__));
338  return __builtin_bswap64(((struct __loadu_i64*)__P)->__v);
339}
340
341static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("movbe")))
342_storebe_i64(void * __P, long long __D) {
343  struct __storeu_i64 {
344    long long __v;
345  } __attribute__((__packed__, __may_alias__));
346  ((struct __storeu_i64*)__P)->__v = __builtin_bswap64(__D);
347}
348#endif
349#endif /* __MOVBE */
350
351#if !defined(_MSC_VER) || __has_feature(modules) || defined(__RTM__)
352#include <rtmintrin.h>
353#include <xtestintrin.h>
354#endif
355
356#if !defined(_MSC_VER) || __has_feature(modules) || defined(__SHA__)
357#include <shaintrin.h>
358#endif
359
360#if !defined(_MSC_VER) || __has_feature(modules) || defined(__FXSR__)
361#include <fxsrintrin.h>
362#endif
363
364/* No feature check desired due to internal MSC_VER checks */
365#include <xsaveintrin.h>
366
367#if !defined(_MSC_VER) || __has_feature(modules) || defined(__XSAVEOPT__)
368#include <xsaveoptintrin.h>
369#endif
370
371#if !defined(_MSC_VER) || __has_feature(modules) || defined(__XSAVEC__)
372#include <xsavecintrin.h>
373#endif
374
375#if !defined(_MSC_VER) || __has_feature(modules) || defined(__XSAVES__)
376#include <xsavesintrin.h>
377#endif
378
379#if !defined(_MSC_VER) || __has_feature(modules) || defined(__SHSTK__)
380#include <cetintrin.h>
381#endif
382
383/* Some intrinsics inside adxintrin.h are available only on processors with ADX,
384 * whereas others are also available at all times. */
385#include <adxintrin.h>
386
387#if !defined(_MSC_VER) || __has_feature(modules) || defined(__RDSEED__)
388#include <rdseedintrin.h>
389#endif
390
391#if !defined(_MSC_VER) || __has_feature(modules) || defined(__WBNOINVD__)
392#include <wbnoinvdintrin.h>
393#endif
394
395#if !defined(_MSC_VER) || __has_feature(modules) || defined(__CLDEMOTE__)
396#include <cldemoteintrin.h>
397#endif
398
399#if !defined(_MSC_VER) || __has_feature(modules) || defined(__WAITPKG__)
400#include <waitpkgintrin.h>
401#endif
402
403#if !defined(_MSC_VER) || __has_feature(modules) || \
404  defined(__MOVDIRI__) || defined(__MOVDIR64B__)
405#include <movdirintrin.h>
406#endif
407
408#if !defined(_MSC_VER) || __has_feature(modules) || defined(__PCONFIG__)
409#include <pconfigintrin.h>
410#endif
411
412#if !defined(_MSC_VER) || __has_feature(modules) || defined(__SGX__)
413#include <sgxintrin.h>
414#endif
415
416#if !defined(_MSC_VER) || __has_feature(modules) || defined(__PTWRITE__)
417#include <ptwriteintrin.h>
418#endif
419
420#if !defined(_MSC_VER) || __has_feature(modules) || defined(__INVPCID__)
421#include <invpcidintrin.h>
422#endif
423
424#if !defined(_MSC_VER) || __has_feature(modules) || \
425  defined(__AVX512VP2INTERSECT__)
426#include <avx512vp2intersectintrin.h>
427#endif
428
429#if !defined(_MSC_VER) || __has_feature(modules) || \
430  (defined(__AVX512VL__) && defined(__AVX512VP2INTERSECT__))
431#include <avx512vlvp2intersectintrin.h>
432#endif
433
434#if !defined(_MSC_VER) || __has_feature(modules) || defined(__ENQCMD__)
435#include <enqcmdintrin.h>
436#endif
437
438#if defined(_MSC_VER) && __has_extension(gnu_asm)
439/* Define the default attributes for these intrinsics */
440#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__))
441#ifdef __cplusplus
442extern "C" {
443#endif
444/*----------------------------------------------------------------------------*\
445|* Interlocked Exchange HLE
446\*----------------------------------------------------------------------------*/
447#if defined(__i386__) || defined(__x86_64__)
448static __inline__ long __DEFAULT_FN_ATTRS
449_InterlockedExchange_HLEAcquire(long volatile *_Target, long _Value) {
450  __asm__ __volatile__(".byte 0xf2 ; lock ; xchg %0, %1"
451                       : "+r" (_Value), "+m" (*_Target) :: "memory");
452  return _Value;
453}
454static __inline__ long __DEFAULT_FN_ATTRS
455_InterlockedExchange_HLERelease(long volatile *_Target, long _Value) {
456  __asm__ __volatile__(".byte 0xf3 ; lock ; xchg %0, %1"
457                       : "+r" (_Value), "+m" (*_Target) :: "memory");
458  return _Value;
459}
460#endif
461#if defined(__x86_64__)
462static __inline__ __int64 __DEFAULT_FN_ATTRS
463_InterlockedExchange64_HLEAcquire(__int64 volatile *_Target, __int64 _Value) {
464  __asm__ __volatile__(".byte 0xf2 ; lock ; xchg %0, %1"
465                       : "+r" (_Value), "+m" (*_Target) :: "memory");
466  return _Value;
467}
468static __inline__ __int64 __DEFAULT_FN_ATTRS
469_InterlockedExchange64_HLERelease(__int64 volatile *_Target, __int64 _Value) {
470  __asm__ __volatile__(".byte 0xf3 ; lock ; xchg %0, %1"
471                       : "+r" (_Value), "+m" (*_Target) :: "memory");
472  return _Value;
473}
474#endif
475/*----------------------------------------------------------------------------*\
476|* Interlocked Compare Exchange HLE
477\*----------------------------------------------------------------------------*/
478#if defined(__i386__) || defined(__x86_64__)
479static __inline__ long __DEFAULT_FN_ATTRS
480_InterlockedCompareExchange_HLEAcquire(long volatile *_Destination,
481                              long _Exchange, long _Comparand) {
482  __asm__ __volatile__(".byte 0xf2 ; lock ; cmpxchg %2, %1"
483                       : "+a" (_Comparand), "+m" (*_Destination)
484                       : "r" (_Exchange) : "memory");
485  return _Comparand;
486}
487static __inline__ long __DEFAULT_FN_ATTRS
488_InterlockedCompareExchange_HLERelease(long volatile *_Destination,
489                              long _Exchange, long _Comparand) {
490  __asm__ __volatile__(".byte 0xf3 ; lock ; cmpxchg %2, %1"
491                       : "+a" (_Comparand), "+m" (*_Destination)
492                       : "r" (_Exchange) : "memory");
493  return _Comparand;
494}
495#endif
496#if defined(__x86_64__)
497static __inline__ __int64 __DEFAULT_FN_ATTRS
498_InterlockedCompareExchange64_HLEAcquire(__int64 volatile *_Destination,
499                              __int64 _Exchange, __int64 _Comparand) {
500  __asm__ __volatile__(".byte 0xf2 ; lock ; cmpxchg %2, %1"
501                       : "+a" (_Comparand), "+m" (*_Destination)
502                       : "r" (_Exchange) : "memory");
503  return _Comparand;
504}
505static __inline__ __int64 __DEFAULT_FN_ATTRS
506_InterlockedCompareExchange64_HLERelease(__int64 volatile *_Destination,
507                              __int64 _Exchange, __int64 _Comparand) {
508  __asm__ __volatile__(".byte 0xf3 ; lock ; cmpxchg %2, %1"
509                       : "+a" (_Comparand), "+m" (*_Destination)
510                       : "r" (_Exchange) : "memory");
511  return _Comparand;
512}
513#endif
514#ifdef __cplusplus
515}
516#endif
517
518#undef __DEFAULT_FN_ATTRS
519
520#endif /* defined(_MSC_VER) && __has_extension(gnu_asm) */
521
522#endif /* __IMMINTRIN_H */
523