immintrin.h revision 344779
1/*===---- immintrin.h - Intel intrinsics -----------------------------------=== 2 * 3 * Permission is hereby granted, free of charge, to any person obtaining a copy 4 * of this software and associated documentation files (the "Software"), to deal 5 * in the Software without restriction, including without limitation the rights 6 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 * copies of the Software, and to permit persons to whom the Software is 8 * furnished to do so, subject to the following conditions: 9 * 10 * The above copyright notice and this permission notice shall be included in 11 * all copies or substantial portions of the Software. 12 * 13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 * THE SOFTWARE. 20 * 21 *===-----------------------------------------------------------------------=== 22 */ 23 24#ifndef __IMMINTRIN_H 25#define __IMMINTRIN_H 26 27#if !defined(_MSC_VER) || __has_feature(modules) || defined(__MMX__) 28#include <mmintrin.h> 29#endif 30 31#if !defined(_MSC_VER) || __has_feature(modules) || defined(__SSE__) 32#include <xmmintrin.h> 33#endif 34 35#if !defined(_MSC_VER) || __has_feature(modules) || defined(__SSE2__) 36#include <emmintrin.h> 37#endif 38 39#if !defined(_MSC_VER) || __has_feature(modules) || defined(__SSE3__) 40#include <pmmintrin.h> 41#endif 42 43#if !defined(_MSC_VER) || __has_feature(modules) || defined(__SSSE3__) 44#include <tmmintrin.h> 45#endif 46 47#if !defined(_MSC_VER) || __has_feature(modules) || \ 48 (defined(__SSE4_2__) || defined(__SSE4_1__)) 49#include <smmintrin.h> 50#endif 51 52#if !defined(_MSC_VER) || __has_feature(modules) || \ 53 (defined(__AES__) || defined(__PCLMUL__)) 54#include <wmmintrin.h> 55#endif 56 57#if !defined(_MSC_VER) || __has_feature(modules) || defined(__CLFLUSHOPT__) 58#include <clflushoptintrin.h> 59#endif 60 61#if !defined(_MSC_VER) || __has_feature(modules) || defined(__CLWB__) 62#include <clwbintrin.h> 63#endif 64 65#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX__) 66#include <avxintrin.h> 67#endif 68 69#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX2__) 70#include <avx2intrin.h> 71#endif 72 73#if !defined(_MSC_VER) || __has_feature(modules) || defined(__F16C__) 74#include <f16cintrin.h> 75#endif 76 77#if !defined(_MSC_VER) || __has_feature(modules) || defined(__VPCLMULQDQ__) 78#include <vpclmulqdqintrin.h> 79#endif 80 81#if !defined(_MSC_VER) || __has_feature(modules) || defined(__BMI__) 82#include <bmiintrin.h> 83#endif 84 85#if !defined(_MSC_VER) || __has_feature(modules) || defined(__BMI2__) 86#include <bmi2intrin.h> 87#endif 88 89#if !defined(_MSC_VER) || __has_feature(modules) || defined(__LZCNT__) 90#include <lzcntintrin.h> 91#endif 92 93#if !defined(_MSC_VER) || __has_feature(modules) || defined(__POPCNT__) 94#include <popcntintrin.h> 95#endif 96 97#if !defined(_MSC_VER) || __has_feature(modules) || defined(__FMA__) 98#include <fmaintrin.h> 99#endif 100 101#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512F__) 102#include <avx512fintrin.h> 103#endif 104 105#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512VL__) 106#include <avx512vlintrin.h> 107#endif 108 109#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512BW__) 110#include <avx512bwintrin.h> 111#endif 112 113#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512BITALG__) 114#include <avx512bitalgintrin.h> 115#endif 116 117#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512CD__) 118#include <avx512cdintrin.h> 119#endif 120 121#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512VPOPCNTDQ__) 122#include <avx512vpopcntdqintrin.h> 123#endif 124 125#if !defined(_MSC_VER) || __has_feature(modules) || \ 126 (defined(__AVX512VL__) && defined(__AVX512VPOPCNTDQ__)) 127#include <avx512vpopcntdqvlintrin.h> 128#endif 129 130#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512VNNI__) 131#include <avx512vnniintrin.h> 132#endif 133 134#if !defined(_MSC_VER) || __has_feature(modules) || \ 135 (defined(__AVX512VL__) && defined(__AVX512VNNI__)) 136#include <avx512vlvnniintrin.h> 137#endif 138 139#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512DQ__) 140#include <avx512dqintrin.h> 141#endif 142 143#if !defined(_MSC_VER) || __has_feature(modules) || \ 144 (defined(__AVX512VL__) && defined(__AVX512BITALG__)) 145#include <avx512vlbitalgintrin.h> 146#endif 147 148#if !defined(_MSC_VER) || __has_feature(modules) || \ 149 (defined(__AVX512VL__) && defined(__AVX512BW__)) 150#include <avx512vlbwintrin.h> 151#endif 152 153#if !defined(_MSC_VER) || __has_feature(modules) || \ 154 (defined(__AVX512VL__) && defined(__AVX512CD__)) 155#include <avx512vlcdintrin.h> 156#endif 157 158#if !defined(_MSC_VER) || __has_feature(modules) || \ 159 (defined(__AVX512VL__) && defined(__AVX512DQ__)) 160#include <avx512vldqintrin.h> 161#endif 162 163#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512ER__) 164#include <avx512erintrin.h> 165#endif 166 167#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512IFMA__) 168#include <avx512ifmaintrin.h> 169#endif 170 171#if !defined(_MSC_VER) || __has_feature(modules) || \ 172 (defined(__AVX512IFMA__) && defined(__AVX512VL__)) 173#include <avx512ifmavlintrin.h> 174#endif 175 176#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512VBMI__) 177#include <avx512vbmiintrin.h> 178#endif 179 180#if !defined(_MSC_VER) || __has_feature(modules) || \ 181 (defined(__AVX512VBMI__) && defined(__AVX512VL__)) 182#include <avx512vbmivlintrin.h> 183#endif 184 185#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512VBMI2__) 186#include <avx512vbmi2intrin.h> 187#endif 188 189#if !defined(_MSC_VER) || __has_feature(modules) || \ 190 (defined(__AVX512VBMI2__) && defined(__AVX512VL__)) 191#include <avx512vlvbmi2intrin.h> 192#endif 193 194#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512PF__) 195#include <avx512pfintrin.h> 196#endif 197 198#if !defined(_MSC_VER) || __has_feature(modules) || defined(__PKU__) 199#include <pkuintrin.h> 200#endif 201 202#if !defined(_MSC_VER) || __has_feature(modules) || defined(__VAES__) 203#include <vaesintrin.h> 204#endif 205 206#if !defined(_MSC_VER) || __has_feature(modules) || defined(__GFNI__) 207#include <gfniintrin.h> 208#endif 209 210#if !defined(_MSC_VER) || __has_feature(modules) || defined(__RDPID__) 211/// Returns the value of the IA32_TSC_AUX MSR (0xc0000103). 212/// 213/// \headerfile <immintrin.h> 214/// 215/// This intrinsic corresponds to the <c> RDPID </c> instruction. 216static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__, __target__("rdpid"))) 217_rdpid_u32(void) { 218 return __builtin_ia32_rdpid(); 219} 220#endif // __RDPID__ 221 222#if !defined(_MSC_VER) || __has_feature(modules) || defined(__RDRND__) 223static __inline__ int __attribute__((__always_inline__, __nodebug__, __target__("rdrnd"))) 224_rdrand16_step(unsigned short *__p) 225{ 226 return __builtin_ia32_rdrand16_step(__p); 227} 228 229static __inline__ int __attribute__((__always_inline__, __nodebug__, __target__("rdrnd"))) 230_rdrand32_step(unsigned int *__p) 231{ 232 return __builtin_ia32_rdrand32_step(__p); 233} 234 235#ifdef __x86_64__ 236static __inline__ int __attribute__((__always_inline__, __nodebug__, __target__("rdrnd"))) 237_rdrand64_step(unsigned long long *__p) 238{ 239 return __builtin_ia32_rdrand64_step(__p); 240} 241#endif 242#endif /* __RDRND__ */ 243 244/* __bit_scan_forward */ 245static __inline__ int __attribute__((__always_inline__, __nodebug__)) 246_bit_scan_forward(int __A) { 247 return __builtin_ctz(__A); 248} 249 250/* __bit_scan_reverse */ 251static __inline__ int __attribute__((__always_inline__, __nodebug__)) 252_bit_scan_reverse(int __A) { 253 return 31 - __builtin_clz(__A); 254} 255 256#if !defined(_MSC_VER) || __has_feature(modules) || defined(__FSGSBASE__) 257#ifdef __x86_64__ 258static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase"))) 259_readfsbase_u32(void) 260{ 261 return __builtin_ia32_rdfsbase32(); 262} 263 264static __inline__ unsigned long long __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase"))) 265_readfsbase_u64(void) 266{ 267 return __builtin_ia32_rdfsbase64(); 268} 269 270static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase"))) 271_readgsbase_u32(void) 272{ 273 return __builtin_ia32_rdgsbase32(); 274} 275 276static __inline__ unsigned long long __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase"))) 277_readgsbase_u64(void) 278{ 279 return __builtin_ia32_rdgsbase64(); 280} 281 282static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase"))) 283_writefsbase_u32(unsigned int __V) 284{ 285 __builtin_ia32_wrfsbase32(__V); 286} 287 288static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase"))) 289_writefsbase_u64(unsigned long long __V) 290{ 291 __builtin_ia32_wrfsbase64(__V); 292} 293 294static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase"))) 295_writegsbase_u32(unsigned int __V) 296{ 297 __builtin_ia32_wrgsbase32(__V); 298} 299 300static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase"))) 301_writegsbase_u64(unsigned long long __V) 302{ 303 __builtin_ia32_wrgsbase64(__V); 304} 305 306#endif 307#endif /* __FSGSBASE__ */ 308 309#if !defined(_MSC_VER) || __has_feature(modules) || defined(__MOVBE__) 310 311/* The structs used below are to force the load/store to be unaligned. This 312 * is accomplished with the __packed__ attribute. The __may_alias__ prevents 313 * tbaa metadata from being generated based on the struct and the type of the 314 * field inside of it. 315 */ 316 317static __inline__ short __attribute__((__always_inline__, __nodebug__, __target__("movbe"))) 318_loadbe_i16(void const * __P) { 319 struct __loadu_i16 { 320 short __v; 321 } __attribute__((__packed__, __may_alias__)); 322 return __builtin_bswap16(((struct __loadu_i16*)__P)->__v); 323} 324 325static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("movbe"))) 326_storebe_i16(void * __P, short __D) { 327 struct __storeu_i16 { 328 short __v; 329 } __attribute__((__packed__, __may_alias__)); 330 ((struct __storeu_i16*)__P)->__v = __builtin_bswap16(__D); 331} 332 333static __inline__ int __attribute__((__always_inline__, __nodebug__, __target__("movbe"))) 334_loadbe_i32(void const * __P) { 335 struct __loadu_i32 { 336 int __v; 337 } __attribute__((__packed__, __may_alias__)); 338 return __builtin_bswap32(((struct __loadu_i32*)__P)->__v); 339} 340 341static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("movbe"))) 342_storebe_i32(void * __P, int __D) { 343 struct __storeu_i32 { 344 int __v; 345 } __attribute__((__packed__, __may_alias__)); 346 ((struct __storeu_i32*)__P)->__v = __builtin_bswap32(__D); 347} 348 349#ifdef __x86_64__ 350static __inline__ long long __attribute__((__always_inline__, __nodebug__, __target__("movbe"))) 351_loadbe_i64(void const * __P) { 352 struct __loadu_i64 { 353 long long __v; 354 } __attribute__((__packed__, __may_alias__)); 355 return __builtin_bswap64(((struct __loadu_i64*)__P)->__v); 356} 357 358static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("movbe"))) 359_storebe_i64(void * __P, long long __D) { 360 struct __storeu_i64 { 361 long long __v; 362 } __attribute__((__packed__, __may_alias__)); 363 ((struct __storeu_i64*)__P)->__v = __builtin_bswap64(__D); 364} 365#endif 366#endif /* __MOVBE */ 367 368#if !defined(_MSC_VER) || __has_feature(modules) || defined(__RTM__) 369#include <rtmintrin.h> 370#include <xtestintrin.h> 371#endif 372 373#if !defined(_MSC_VER) || __has_feature(modules) || defined(__SHA__) 374#include <shaintrin.h> 375#endif 376 377#if !defined(_MSC_VER) || __has_feature(modules) || defined(__FXSR__) 378#include <fxsrintrin.h> 379#endif 380 381#if !defined(_MSC_VER) || __has_feature(modules) || defined(__XSAVE__) 382#include <xsaveintrin.h> 383#endif 384 385#if !defined(_MSC_VER) || __has_feature(modules) || defined(__XSAVEOPT__) 386#include <xsaveoptintrin.h> 387#endif 388 389#if !defined(_MSC_VER) || __has_feature(modules) || defined(__XSAVEC__) 390#include <xsavecintrin.h> 391#endif 392 393#if !defined(_MSC_VER) || __has_feature(modules) || defined(__XSAVES__) 394#include <xsavesintrin.h> 395#endif 396 397#if !defined(_MSC_VER) || __has_feature(modules) || defined(__SHSTK__) 398#include <cetintrin.h> 399#endif 400 401/* Some intrinsics inside adxintrin.h are available only on processors with ADX, 402 * whereas others are also available at all times. */ 403#include <adxintrin.h> 404 405#if !defined(_MSC_VER) || __has_feature(modules) || defined(__RDSEED__) 406#include <rdseedintrin.h> 407#endif 408 409#if !defined(_MSC_VER) || __has_feature(modules) || defined(__WBNOINVD__) 410#include <wbnoinvdintrin.h> 411#endif 412 413#if !defined(_MSC_VER) || __has_feature(modules) || defined(__CLDEMOTE__) 414#include <cldemoteintrin.h> 415#endif 416 417#if !defined(_MSC_VER) || __has_feature(modules) || defined(__WAITPKG__) 418#include <waitpkgintrin.h> 419#endif 420 421#if !defined(_MSC_VER) || __has_feature(modules) || \ 422 defined(__MOVDIRI__) || defined(__MOVDIR64B__) 423#include <movdirintrin.h> 424#endif 425 426#if !defined(_MSC_VER) || __has_feature(modules) || defined(__PCONFIG__) 427#include <pconfigintrin.h> 428#endif 429 430#if !defined(_MSC_VER) || __has_feature(modules) || defined(__SGX__) 431#include <sgxintrin.h> 432#endif 433 434#if !defined(_MSC_VER) || __has_feature(modules) || defined(__PTWRITE__) 435#include <ptwriteintrin.h> 436#endif 437 438#if !defined(_MSC_VER) || __has_feature(modules) || defined(__INVPCID__) 439#include <invpcidintrin.h> 440#endif 441 442#ifdef _MSC_VER 443/* Define the default attributes for these intrinsics */ 444#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__)) 445#ifdef __cplusplus 446extern "C" { 447#endif 448/*----------------------------------------------------------------------------*\ 449|* Interlocked Exchange HLE 450\*----------------------------------------------------------------------------*/ 451#if defined(__i386__) || defined(__x86_64__) 452static __inline__ long __DEFAULT_FN_ATTRS 453_InterlockedExchange_HLEAcquire(long volatile *_Target, long _Value) { 454 __asm__ __volatile__(".byte 0xf2 ; lock ; xchg %0, %1" 455 : "+r" (_Value), "+m" (*_Target) :: "memory"); 456 return _Value; 457} 458static __inline__ long __DEFAULT_FN_ATTRS 459_InterlockedExchange_HLERelease(long volatile *_Target, long _Value) { 460 __asm__ __volatile__(".byte 0xf3 ; lock ; xchg %0, %1" 461 : "+r" (_Value), "+m" (*_Target) :: "memory"); 462 return _Value; 463} 464#endif 465#if defined(__x86_64__) 466static __inline__ __int64 __DEFAULT_FN_ATTRS 467_InterlockedExchange64_HLEAcquire(__int64 volatile *_Target, __int64 _Value) { 468 __asm__ __volatile__(".byte 0xf2 ; lock ; xchg %0, %1" 469 : "+r" (_Value), "+m" (*_Target) :: "memory"); 470 return _Value; 471} 472static __inline__ __int64 __DEFAULT_FN_ATTRS 473_InterlockedExchange64_HLERelease(__int64 volatile *_Target, __int64 _Value) { 474 __asm__ __volatile__(".byte 0xf3 ; lock ; xchg %0, %1" 475 : "+r" (_Value), "+m" (*_Target) :: "memory"); 476 return _Value; 477} 478#endif 479/*----------------------------------------------------------------------------*\ 480|* Interlocked Compare Exchange HLE 481\*----------------------------------------------------------------------------*/ 482#if defined(__i386__) || defined(__x86_64__) 483static __inline__ long __DEFAULT_FN_ATTRS 484_InterlockedCompareExchange_HLEAcquire(long volatile *_Destination, 485 long _Exchange, long _Comparand) { 486 __asm__ __volatile__(".byte 0xf2 ; lock ; cmpxchg %2, %1" 487 : "+a" (_Comparand), "+m" (*_Destination) 488 : "r" (_Exchange) : "memory"); 489 return _Comparand; 490} 491static __inline__ long __DEFAULT_FN_ATTRS 492_InterlockedCompareExchange_HLERelease(long volatile *_Destination, 493 long _Exchange, long _Comparand) { 494 __asm__ __volatile__(".byte 0xf3 ; lock ; cmpxchg %2, %1" 495 : "+a" (_Comparand), "+m" (*_Destination) 496 : "r" (_Exchange) : "memory"); 497 return _Comparand; 498} 499#endif 500#if defined(__x86_64__) 501static __inline__ __int64 __DEFAULT_FN_ATTRS 502_InterlockedCompareExchange64_HLEAcquire(__int64 volatile *_Destination, 503 __int64 _Exchange, __int64 _Comparand) { 504 __asm__ __volatile__(".byte 0xf2 ; lock ; cmpxchg %2, %1" 505 : "+a" (_Comparand), "+m" (*_Destination) 506 : "r" (_Exchange) : "memory"); 507 return _Comparand; 508} 509static __inline__ __int64 __DEFAULT_FN_ATTRS 510_InterlockedCompareExchange64_HLERelease(__int64 volatile *_Destination, 511 __int64 _Exchange, __int64 _Comparand) { 512 __asm__ __volatile__(".byte 0xf3 ; lock ; cmpxchg %2, %1" 513 : "+a" (_Comparand), "+m" (*_Destination) 514 : "r" (_Exchange) : "memory"); 515 return _Comparand; 516} 517#endif 518#ifdef __cplusplus 519} 520#endif 521 522#undef __DEFAULT_FN_ATTRS 523 524#endif /* _MSC_VER */ 525 526#endif /* __IMMINTRIN_H */ 527