immintrin.h revision 344779
1/*===---- immintrin.h - Intel intrinsics -----------------------------------===
2 *
3 * Permission is hereby granted, free of charge, to any person obtaining a copy
4 * of this software and associated documentation files (the "Software"), to deal
5 * in the Software without restriction, including without limitation the rights
6 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7 * copies of the Software, and to permit persons to whom the Software is
8 * furnished to do so, subject to the following conditions:
9 *
10 * The above copyright notice and this permission notice shall be included in
11 * all copies or substantial portions of the Software.
12 *
13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19 * THE SOFTWARE.
20 *
21 *===-----------------------------------------------------------------------===
22 */
23
24#ifndef __IMMINTRIN_H
25#define __IMMINTRIN_H
26
27#if !defined(_MSC_VER) || __has_feature(modules) || defined(__MMX__)
28#include <mmintrin.h>
29#endif
30
31#if !defined(_MSC_VER) || __has_feature(modules) || defined(__SSE__)
32#include <xmmintrin.h>
33#endif
34
35#if !defined(_MSC_VER) || __has_feature(modules) || defined(__SSE2__)
36#include <emmintrin.h>
37#endif
38
39#if !defined(_MSC_VER) || __has_feature(modules) || defined(__SSE3__)
40#include <pmmintrin.h>
41#endif
42
43#if !defined(_MSC_VER) || __has_feature(modules) || defined(__SSSE3__)
44#include <tmmintrin.h>
45#endif
46
47#if !defined(_MSC_VER) || __has_feature(modules) || \
48    (defined(__SSE4_2__) || defined(__SSE4_1__))
49#include <smmintrin.h>
50#endif
51
52#if !defined(_MSC_VER) || __has_feature(modules) || \
53    (defined(__AES__) || defined(__PCLMUL__))
54#include <wmmintrin.h>
55#endif
56
57#if !defined(_MSC_VER) || __has_feature(modules) || defined(__CLFLUSHOPT__)
58#include <clflushoptintrin.h>
59#endif
60
61#if !defined(_MSC_VER) || __has_feature(modules) || defined(__CLWB__)
62#include <clwbintrin.h>
63#endif
64
65#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX__)
66#include <avxintrin.h>
67#endif
68
69#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX2__)
70#include <avx2intrin.h>
71#endif
72
73#if !defined(_MSC_VER) || __has_feature(modules) || defined(__F16C__)
74#include <f16cintrin.h>
75#endif
76
77#if !defined(_MSC_VER) || __has_feature(modules) || defined(__VPCLMULQDQ__)
78#include <vpclmulqdqintrin.h>
79#endif
80
81#if !defined(_MSC_VER) || __has_feature(modules) || defined(__BMI__)
82#include <bmiintrin.h>
83#endif
84
85#if !defined(_MSC_VER) || __has_feature(modules) || defined(__BMI2__)
86#include <bmi2intrin.h>
87#endif
88
89#if !defined(_MSC_VER) || __has_feature(modules) || defined(__LZCNT__)
90#include <lzcntintrin.h>
91#endif
92
93#if !defined(_MSC_VER) || __has_feature(modules) || defined(__POPCNT__)
94#include <popcntintrin.h>
95#endif
96
97#if !defined(_MSC_VER) || __has_feature(modules) || defined(__FMA__)
98#include <fmaintrin.h>
99#endif
100
101#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512F__)
102#include <avx512fintrin.h>
103#endif
104
105#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512VL__)
106#include <avx512vlintrin.h>
107#endif
108
109#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512BW__)
110#include <avx512bwintrin.h>
111#endif
112
113#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512BITALG__)
114#include <avx512bitalgintrin.h>
115#endif
116
117#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512CD__)
118#include <avx512cdintrin.h>
119#endif
120
121#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512VPOPCNTDQ__)
122#include <avx512vpopcntdqintrin.h>
123#endif
124
125#if !defined(_MSC_VER) || __has_feature(modules) || \
126    (defined(__AVX512VL__) && defined(__AVX512VPOPCNTDQ__))
127#include <avx512vpopcntdqvlintrin.h>
128#endif
129
130#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512VNNI__)
131#include <avx512vnniintrin.h>
132#endif
133
134#if !defined(_MSC_VER) || __has_feature(modules) || \
135    (defined(__AVX512VL__) && defined(__AVX512VNNI__))
136#include <avx512vlvnniintrin.h>
137#endif
138
139#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512DQ__)
140#include <avx512dqintrin.h>
141#endif
142
143#if !defined(_MSC_VER) || __has_feature(modules) || \
144    (defined(__AVX512VL__) && defined(__AVX512BITALG__))
145#include <avx512vlbitalgintrin.h>
146#endif
147
148#if !defined(_MSC_VER) || __has_feature(modules) || \
149    (defined(__AVX512VL__) && defined(__AVX512BW__))
150#include <avx512vlbwintrin.h>
151#endif
152
153#if !defined(_MSC_VER) || __has_feature(modules) || \
154    (defined(__AVX512VL__) && defined(__AVX512CD__))
155#include <avx512vlcdintrin.h>
156#endif
157
158#if !defined(_MSC_VER) || __has_feature(modules) || \
159    (defined(__AVX512VL__) && defined(__AVX512DQ__))
160#include <avx512vldqintrin.h>
161#endif
162
163#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512ER__)
164#include <avx512erintrin.h>
165#endif
166
167#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512IFMA__)
168#include <avx512ifmaintrin.h>
169#endif
170
171#if !defined(_MSC_VER) || __has_feature(modules) || \
172    (defined(__AVX512IFMA__) && defined(__AVX512VL__))
173#include <avx512ifmavlintrin.h>
174#endif
175
176#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512VBMI__)
177#include <avx512vbmiintrin.h>
178#endif
179
180#if !defined(_MSC_VER) || __has_feature(modules) || \
181    (defined(__AVX512VBMI__) && defined(__AVX512VL__))
182#include <avx512vbmivlintrin.h>
183#endif
184
185#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512VBMI2__)
186#include <avx512vbmi2intrin.h>
187#endif
188
189#if !defined(_MSC_VER) || __has_feature(modules) || \
190    (defined(__AVX512VBMI2__) && defined(__AVX512VL__))
191#include <avx512vlvbmi2intrin.h>
192#endif
193
194#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512PF__)
195#include <avx512pfintrin.h>
196#endif
197
198#if !defined(_MSC_VER) || __has_feature(modules) || defined(__PKU__)
199#include <pkuintrin.h>
200#endif
201
202#if !defined(_MSC_VER) || __has_feature(modules) || defined(__VAES__)
203#include <vaesintrin.h>
204#endif
205
206#if !defined(_MSC_VER) || __has_feature(modules) || defined(__GFNI__)
207#include <gfniintrin.h>
208#endif
209
210#if !defined(_MSC_VER) || __has_feature(modules) || defined(__RDPID__)
211/// Returns the value of the IA32_TSC_AUX MSR (0xc0000103).
212///
213/// \headerfile <immintrin.h>
214///
215/// This intrinsic corresponds to the <c> RDPID </c> instruction.
216static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__, __target__("rdpid")))
217_rdpid_u32(void) {
218  return __builtin_ia32_rdpid();
219}
220#endif // __RDPID__
221
222#if !defined(_MSC_VER) || __has_feature(modules) || defined(__RDRND__)
223static __inline__ int __attribute__((__always_inline__, __nodebug__, __target__("rdrnd")))
224_rdrand16_step(unsigned short *__p)
225{
226  return __builtin_ia32_rdrand16_step(__p);
227}
228
229static __inline__ int __attribute__((__always_inline__, __nodebug__, __target__("rdrnd")))
230_rdrand32_step(unsigned int *__p)
231{
232  return __builtin_ia32_rdrand32_step(__p);
233}
234
235#ifdef __x86_64__
236static __inline__ int __attribute__((__always_inline__, __nodebug__, __target__("rdrnd")))
237_rdrand64_step(unsigned long long *__p)
238{
239  return __builtin_ia32_rdrand64_step(__p);
240}
241#endif
242#endif /* __RDRND__ */
243
244/* __bit_scan_forward */
245static __inline__ int __attribute__((__always_inline__, __nodebug__))
246_bit_scan_forward(int __A) {
247  return __builtin_ctz(__A);
248}
249
250/* __bit_scan_reverse */
251static __inline__ int __attribute__((__always_inline__, __nodebug__))
252_bit_scan_reverse(int __A) {
253  return 31 - __builtin_clz(__A);
254}
255
256#if !defined(_MSC_VER) || __has_feature(modules) || defined(__FSGSBASE__)
257#ifdef __x86_64__
258static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase")))
259_readfsbase_u32(void)
260{
261  return __builtin_ia32_rdfsbase32();
262}
263
264static __inline__ unsigned long long __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase")))
265_readfsbase_u64(void)
266{
267  return __builtin_ia32_rdfsbase64();
268}
269
270static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase")))
271_readgsbase_u32(void)
272{
273  return __builtin_ia32_rdgsbase32();
274}
275
276static __inline__ unsigned long long __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase")))
277_readgsbase_u64(void)
278{
279  return __builtin_ia32_rdgsbase64();
280}
281
282static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase")))
283_writefsbase_u32(unsigned int __V)
284{
285  __builtin_ia32_wrfsbase32(__V);
286}
287
288static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase")))
289_writefsbase_u64(unsigned long long __V)
290{
291  __builtin_ia32_wrfsbase64(__V);
292}
293
294static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase")))
295_writegsbase_u32(unsigned int __V)
296{
297  __builtin_ia32_wrgsbase32(__V);
298}
299
300static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase")))
301_writegsbase_u64(unsigned long long __V)
302{
303  __builtin_ia32_wrgsbase64(__V);
304}
305
306#endif
307#endif /* __FSGSBASE__ */
308
309#if !defined(_MSC_VER) || __has_feature(modules) || defined(__MOVBE__)
310
311/* The structs used below are to force the load/store to be unaligned. This
312 * is accomplished with the __packed__ attribute. The __may_alias__ prevents
313 * tbaa metadata from being generated based on the struct and the type of the
314 * field inside of it.
315 */
316
317static __inline__ short __attribute__((__always_inline__, __nodebug__, __target__("movbe")))
318_loadbe_i16(void const * __P) {
319  struct __loadu_i16 {
320    short __v;
321  } __attribute__((__packed__, __may_alias__));
322  return __builtin_bswap16(((struct __loadu_i16*)__P)->__v);
323}
324
325static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("movbe")))
326_storebe_i16(void * __P, short __D) {
327  struct __storeu_i16 {
328    short __v;
329  } __attribute__((__packed__, __may_alias__));
330  ((struct __storeu_i16*)__P)->__v = __builtin_bswap16(__D);
331}
332
333static __inline__ int __attribute__((__always_inline__, __nodebug__, __target__("movbe")))
334_loadbe_i32(void const * __P) {
335  struct __loadu_i32 {
336    int __v;
337  } __attribute__((__packed__, __may_alias__));
338  return __builtin_bswap32(((struct __loadu_i32*)__P)->__v);
339}
340
341static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("movbe")))
342_storebe_i32(void * __P, int __D) {
343  struct __storeu_i32 {
344    int __v;
345  } __attribute__((__packed__, __may_alias__));
346  ((struct __storeu_i32*)__P)->__v = __builtin_bswap32(__D);
347}
348
349#ifdef __x86_64__
350static __inline__ long long __attribute__((__always_inline__, __nodebug__, __target__("movbe")))
351_loadbe_i64(void const * __P) {
352  struct __loadu_i64 {
353    long long __v;
354  } __attribute__((__packed__, __may_alias__));
355  return __builtin_bswap64(((struct __loadu_i64*)__P)->__v);
356}
357
358static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("movbe")))
359_storebe_i64(void * __P, long long __D) {
360  struct __storeu_i64 {
361    long long __v;
362  } __attribute__((__packed__, __may_alias__));
363  ((struct __storeu_i64*)__P)->__v = __builtin_bswap64(__D);
364}
365#endif
366#endif /* __MOVBE */
367
368#if !defined(_MSC_VER) || __has_feature(modules) || defined(__RTM__)
369#include <rtmintrin.h>
370#include <xtestintrin.h>
371#endif
372
373#if !defined(_MSC_VER) || __has_feature(modules) || defined(__SHA__)
374#include <shaintrin.h>
375#endif
376
377#if !defined(_MSC_VER) || __has_feature(modules) || defined(__FXSR__)
378#include <fxsrintrin.h>
379#endif
380
381#if !defined(_MSC_VER) || __has_feature(modules) || defined(__XSAVE__)
382#include <xsaveintrin.h>
383#endif
384
385#if !defined(_MSC_VER) || __has_feature(modules) || defined(__XSAVEOPT__)
386#include <xsaveoptintrin.h>
387#endif
388
389#if !defined(_MSC_VER) || __has_feature(modules) || defined(__XSAVEC__)
390#include <xsavecintrin.h>
391#endif
392
393#if !defined(_MSC_VER) || __has_feature(modules) || defined(__XSAVES__)
394#include <xsavesintrin.h>
395#endif
396
397#if !defined(_MSC_VER) || __has_feature(modules) || defined(__SHSTK__)
398#include <cetintrin.h>
399#endif
400
401/* Some intrinsics inside adxintrin.h are available only on processors with ADX,
402 * whereas others are also available at all times. */
403#include <adxintrin.h>
404
405#if !defined(_MSC_VER) || __has_feature(modules) || defined(__RDSEED__)
406#include <rdseedintrin.h>
407#endif
408
409#if !defined(_MSC_VER) || __has_feature(modules) || defined(__WBNOINVD__)
410#include <wbnoinvdintrin.h>
411#endif
412
413#if !defined(_MSC_VER) || __has_feature(modules) || defined(__CLDEMOTE__)
414#include <cldemoteintrin.h>
415#endif
416
417#if !defined(_MSC_VER) || __has_feature(modules) || defined(__WAITPKG__)
418#include <waitpkgintrin.h>
419#endif
420
421#if !defined(_MSC_VER) || __has_feature(modules) || \
422  defined(__MOVDIRI__) || defined(__MOVDIR64B__)
423#include <movdirintrin.h>
424#endif
425
426#if !defined(_MSC_VER) || __has_feature(modules) || defined(__PCONFIG__)
427#include <pconfigintrin.h>
428#endif
429
430#if !defined(_MSC_VER) || __has_feature(modules) || defined(__SGX__)
431#include <sgxintrin.h>
432#endif
433
434#if !defined(_MSC_VER) || __has_feature(modules) || defined(__PTWRITE__)
435#include <ptwriteintrin.h>
436#endif
437
438#if !defined(_MSC_VER) || __has_feature(modules) || defined(__INVPCID__)
439#include <invpcidintrin.h>
440#endif
441
442#ifdef _MSC_VER
443/* Define the default attributes for these intrinsics */
444#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__))
445#ifdef __cplusplus
446extern "C" {
447#endif
448/*----------------------------------------------------------------------------*\
449|* Interlocked Exchange HLE
450\*----------------------------------------------------------------------------*/
451#if defined(__i386__) || defined(__x86_64__)
452static __inline__ long __DEFAULT_FN_ATTRS
453_InterlockedExchange_HLEAcquire(long volatile *_Target, long _Value) {
454  __asm__ __volatile__(".byte 0xf2 ; lock ; xchg %0, %1"
455                       : "+r" (_Value), "+m" (*_Target) :: "memory");
456  return _Value;
457}
458static __inline__ long __DEFAULT_FN_ATTRS
459_InterlockedExchange_HLERelease(long volatile *_Target, long _Value) {
460  __asm__ __volatile__(".byte 0xf3 ; lock ; xchg %0, %1"
461                       : "+r" (_Value), "+m" (*_Target) :: "memory");
462  return _Value;
463}
464#endif
465#if defined(__x86_64__)
466static __inline__ __int64 __DEFAULT_FN_ATTRS
467_InterlockedExchange64_HLEAcquire(__int64 volatile *_Target, __int64 _Value) {
468  __asm__ __volatile__(".byte 0xf2 ; lock ; xchg %0, %1"
469                       : "+r" (_Value), "+m" (*_Target) :: "memory");
470  return _Value;
471}
472static __inline__ __int64 __DEFAULT_FN_ATTRS
473_InterlockedExchange64_HLERelease(__int64 volatile *_Target, __int64 _Value) {
474  __asm__ __volatile__(".byte 0xf3 ; lock ; xchg %0, %1"
475                       : "+r" (_Value), "+m" (*_Target) :: "memory");
476  return _Value;
477}
478#endif
479/*----------------------------------------------------------------------------*\
480|* Interlocked Compare Exchange HLE
481\*----------------------------------------------------------------------------*/
482#if defined(__i386__) || defined(__x86_64__)
483static __inline__ long __DEFAULT_FN_ATTRS
484_InterlockedCompareExchange_HLEAcquire(long volatile *_Destination,
485                              long _Exchange, long _Comparand) {
486  __asm__ __volatile__(".byte 0xf2 ; lock ; cmpxchg %2, %1"
487                       : "+a" (_Comparand), "+m" (*_Destination)
488                       : "r" (_Exchange) : "memory");
489  return _Comparand;
490}
491static __inline__ long __DEFAULT_FN_ATTRS
492_InterlockedCompareExchange_HLERelease(long volatile *_Destination,
493                              long _Exchange, long _Comparand) {
494  __asm__ __volatile__(".byte 0xf3 ; lock ; cmpxchg %2, %1"
495                       : "+a" (_Comparand), "+m" (*_Destination)
496                       : "r" (_Exchange) : "memory");
497  return _Comparand;
498}
499#endif
500#if defined(__x86_64__)
501static __inline__ __int64 __DEFAULT_FN_ATTRS
502_InterlockedCompareExchange64_HLEAcquire(__int64 volatile *_Destination,
503                              __int64 _Exchange, __int64 _Comparand) {
504  __asm__ __volatile__(".byte 0xf2 ; lock ; cmpxchg %2, %1"
505                       : "+a" (_Comparand), "+m" (*_Destination)
506                       : "r" (_Exchange) : "memory");
507  return _Comparand;
508}
509static __inline__ __int64 __DEFAULT_FN_ATTRS
510_InterlockedCompareExchange64_HLERelease(__int64 volatile *_Destination,
511                              __int64 _Exchange, __int64 _Comparand) {
512  __asm__ __volatile__(".byte 0xf3 ; lock ; cmpxchg %2, %1"
513                       : "+a" (_Comparand), "+m" (*_Destination)
514                       : "r" (_Exchange) : "memory");
515  return _Comparand;
516}
517#endif
518#ifdef __cplusplus
519}
520#endif
521
522#undef __DEFAULT_FN_ATTRS
523
524#endif /* _MSC_VER */
525
526#endif /* __IMMINTRIN_H */
527