immintrin.h revision 355940
1/*===---- immintrin.h - Intel intrinsics -----------------------------------=== 2 * 3 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 * See https://llvm.org/LICENSE.txt for license information. 5 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 * 7 *===-----------------------------------------------------------------------=== 8 */ 9 10#ifndef __IMMINTRIN_H 11#define __IMMINTRIN_H 12 13#if !defined(_MSC_VER) || __has_feature(modules) || defined(__MMX__) 14#include <mmintrin.h> 15#endif 16 17#if !defined(_MSC_VER) || __has_feature(modules) || defined(__SSE__) 18#include <xmmintrin.h> 19#endif 20 21#if !defined(_MSC_VER) || __has_feature(modules) || defined(__SSE2__) 22#include <emmintrin.h> 23#endif 24 25#if !defined(_MSC_VER) || __has_feature(modules) || defined(__SSE3__) 26#include <pmmintrin.h> 27#endif 28 29#if !defined(_MSC_VER) || __has_feature(modules) || defined(__SSSE3__) 30#include <tmmintrin.h> 31#endif 32 33#if !defined(_MSC_VER) || __has_feature(modules) || \ 34 (defined(__SSE4_2__) || defined(__SSE4_1__)) 35#include <smmintrin.h> 36#endif 37 38#if !defined(_MSC_VER) || __has_feature(modules) || \ 39 (defined(__AES__) || defined(__PCLMUL__)) 40#include <wmmintrin.h> 41#endif 42 43#if !defined(_MSC_VER) || __has_feature(modules) || defined(__CLFLUSHOPT__) 44#include <clflushoptintrin.h> 45#endif 46 47#if !defined(_MSC_VER) || __has_feature(modules) || defined(__CLWB__) 48#include <clwbintrin.h> 49#endif 50 51#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX__) 52#include <avxintrin.h> 53#endif 54 55#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX2__) 56#include <avx2intrin.h> 57#endif 58 59#if !defined(_MSC_VER) || __has_feature(modules) || defined(__F16C__) 60#include <f16cintrin.h> 61#endif 62 63#if !defined(_MSC_VER) || __has_feature(modules) || defined(__VPCLMULQDQ__) 64#include <vpclmulqdqintrin.h> 65#endif 66 67#if !defined(_MSC_VER) || __has_feature(modules) || defined(__BMI__) 68#include <bmiintrin.h> 69#endif 70 71#if !defined(_MSC_VER) || __has_feature(modules) || defined(__BMI2__) 72#include <bmi2intrin.h> 73#endif 74 75#if !defined(_MSC_VER) || __has_feature(modules) || defined(__LZCNT__) 76#include <lzcntintrin.h> 77#endif 78 79#if !defined(_MSC_VER) || __has_feature(modules) || defined(__POPCNT__) 80#include <popcntintrin.h> 81#endif 82 83#if !defined(_MSC_VER) || __has_feature(modules) || defined(__FMA__) 84#include <fmaintrin.h> 85#endif 86 87#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512F__) 88#include <avx512fintrin.h> 89#endif 90 91#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512VL__) 92#include <avx512vlintrin.h> 93#endif 94 95#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512BW__) 96#include <avx512bwintrin.h> 97#endif 98 99#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512BITALG__) 100#include <avx512bitalgintrin.h> 101#endif 102 103#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512CD__) 104#include <avx512cdintrin.h> 105#endif 106 107#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512VPOPCNTDQ__) 108#include <avx512vpopcntdqintrin.h> 109#endif 110 111#if !defined(_MSC_VER) || __has_feature(modules) || \ 112 (defined(__AVX512VL__) && defined(__AVX512VPOPCNTDQ__)) 113#include <avx512vpopcntdqvlintrin.h> 114#endif 115 116#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512VNNI__) 117#include <avx512vnniintrin.h> 118#endif 119 120#if !defined(_MSC_VER) || __has_feature(modules) || \ 121 (defined(__AVX512VL__) && defined(__AVX512VNNI__)) 122#include <avx512vlvnniintrin.h> 123#endif 124 125#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512DQ__) 126#include <avx512dqintrin.h> 127#endif 128 129#if !defined(_MSC_VER) || __has_feature(modules) || \ 130 (defined(__AVX512VL__) && defined(__AVX512BITALG__)) 131#include <avx512vlbitalgintrin.h> 132#endif 133 134#if !defined(_MSC_VER) || __has_feature(modules) || \ 135 (defined(__AVX512VL__) && defined(__AVX512BW__)) 136#include <avx512vlbwintrin.h> 137#endif 138 139#if !defined(_MSC_VER) || __has_feature(modules) || \ 140 (defined(__AVX512VL__) && defined(__AVX512CD__)) 141#include <avx512vlcdintrin.h> 142#endif 143 144#if !defined(_MSC_VER) || __has_feature(modules) || \ 145 (defined(__AVX512VL__) && defined(__AVX512DQ__)) 146#include <avx512vldqintrin.h> 147#endif 148 149#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512ER__) 150#include <avx512erintrin.h> 151#endif 152 153#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512IFMA__) 154#include <avx512ifmaintrin.h> 155#endif 156 157#if !defined(_MSC_VER) || __has_feature(modules) || \ 158 (defined(__AVX512IFMA__) && defined(__AVX512VL__)) 159#include <avx512ifmavlintrin.h> 160#endif 161 162#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512VBMI__) 163#include <avx512vbmiintrin.h> 164#endif 165 166#if !defined(_MSC_VER) || __has_feature(modules) || \ 167 (defined(__AVX512VBMI__) && defined(__AVX512VL__)) 168#include <avx512vbmivlintrin.h> 169#endif 170 171#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512VBMI2__) 172#include <avx512vbmi2intrin.h> 173#endif 174 175#if !defined(_MSC_VER) || __has_feature(modules) || \ 176 (defined(__AVX512VBMI2__) && defined(__AVX512VL__)) 177#include <avx512vlvbmi2intrin.h> 178#endif 179 180#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512PF__) 181#include <avx512pfintrin.h> 182#endif 183 184#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512BF16__) 185#include <avx512bf16intrin.h> 186#endif 187 188#if !defined(_MSC_VER) || __has_feature(modules) || \ 189 (defined(__AVX512VL__) && defined(__AVX512BF16__)) 190#include <avx512vlbf16intrin.h> 191#endif 192 193#if !defined(_MSC_VER) || __has_feature(modules) || defined(__PKU__) 194#include <pkuintrin.h> 195#endif 196 197#if !defined(_MSC_VER) || __has_feature(modules) || defined(__VAES__) 198#include <vaesintrin.h> 199#endif 200 201#if !defined(_MSC_VER) || __has_feature(modules) || defined(__GFNI__) 202#include <gfniintrin.h> 203#endif 204 205#if !defined(_MSC_VER) || __has_feature(modules) || defined(__RDPID__) 206/// Returns the value of the IA32_TSC_AUX MSR (0xc0000103). 207/// 208/// \headerfile <immintrin.h> 209/// 210/// This intrinsic corresponds to the <c> RDPID </c> instruction. 211static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__, __target__("rdpid"))) 212_rdpid_u32(void) { 213 return __builtin_ia32_rdpid(); 214} 215#endif // __RDPID__ 216 217#if !defined(_MSC_VER) || __has_feature(modules) || defined(__RDRND__) 218static __inline__ int __attribute__((__always_inline__, __nodebug__, __target__("rdrnd"))) 219_rdrand16_step(unsigned short *__p) 220{ 221 return __builtin_ia32_rdrand16_step(__p); 222} 223 224static __inline__ int __attribute__((__always_inline__, __nodebug__, __target__("rdrnd"))) 225_rdrand32_step(unsigned int *__p) 226{ 227 return __builtin_ia32_rdrand32_step(__p); 228} 229 230#ifdef __x86_64__ 231static __inline__ int __attribute__((__always_inline__, __nodebug__, __target__("rdrnd"))) 232_rdrand64_step(unsigned long long *__p) 233{ 234 return __builtin_ia32_rdrand64_step(__p); 235} 236#endif 237#endif /* __RDRND__ */ 238 239#if !defined(_MSC_VER) || __has_feature(modules) || defined(__FSGSBASE__) 240#ifdef __x86_64__ 241static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase"))) 242_readfsbase_u32(void) 243{ 244 return __builtin_ia32_rdfsbase32(); 245} 246 247static __inline__ unsigned long long __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase"))) 248_readfsbase_u64(void) 249{ 250 return __builtin_ia32_rdfsbase64(); 251} 252 253static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase"))) 254_readgsbase_u32(void) 255{ 256 return __builtin_ia32_rdgsbase32(); 257} 258 259static __inline__ unsigned long long __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase"))) 260_readgsbase_u64(void) 261{ 262 return __builtin_ia32_rdgsbase64(); 263} 264 265static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase"))) 266_writefsbase_u32(unsigned int __V) 267{ 268 __builtin_ia32_wrfsbase32(__V); 269} 270 271static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase"))) 272_writefsbase_u64(unsigned long long __V) 273{ 274 __builtin_ia32_wrfsbase64(__V); 275} 276 277static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase"))) 278_writegsbase_u32(unsigned int __V) 279{ 280 __builtin_ia32_wrgsbase32(__V); 281} 282 283static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase"))) 284_writegsbase_u64(unsigned long long __V) 285{ 286 __builtin_ia32_wrgsbase64(__V); 287} 288 289#endif 290#endif /* __FSGSBASE__ */ 291 292#if !defined(_MSC_VER) || __has_feature(modules) || defined(__MOVBE__) 293 294/* The structs used below are to force the load/store to be unaligned. This 295 * is accomplished with the __packed__ attribute. The __may_alias__ prevents 296 * tbaa metadata from being generated based on the struct and the type of the 297 * field inside of it. 298 */ 299 300static __inline__ short __attribute__((__always_inline__, __nodebug__, __target__("movbe"))) 301_loadbe_i16(void const * __P) { 302 struct __loadu_i16 { 303 short __v; 304 } __attribute__((__packed__, __may_alias__)); 305 return __builtin_bswap16(((struct __loadu_i16*)__P)->__v); 306} 307 308static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("movbe"))) 309_storebe_i16(void * __P, short __D) { 310 struct __storeu_i16 { 311 short __v; 312 } __attribute__((__packed__, __may_alias__)); 313 ((struct __storeu_i16*)__P)->__v = __builtin_bswap16(__D); 314} 315 316static __inline__ int __attribute__((__always_inline__, __nodebug__, __target__("movbe"))) 317_loadbe_i32(void const * __P) { 318 struct __loadu_i32 { 319 int __v; 320 } __attribute__((__packed__, __may_alias__)); 321 return __builtin_bswap32(((struct __loadu_i32*)__P)->__v); 322} 323 324static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("movbe"))) 325_storebe_i32(void * __P, int __D) { 326 struct __storeu_i32 { 327 int __v; 328 } __attribute__((__packed__, __may_alias__)); 329 ((struct __storeu_i32*)__P)->__v = __builtin_bswap32(__D); 330} 331 332#ifdef __x86_64__ 333static __inline__ long long __attribute__((__always_inline__, __nodebug__, __target__("movbe"))) 334_loadbe_i64(void const * __P) { 335 struct __loadu_i64 { 336 long long __v; 337 } __attribute__((__packed__, __may_alias__)); 338 return __builtin_bswap64(((struct __loadu_i64*)__P)->__v); 339} 340 341static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("movbe"))) 342_storebe_i64(void * __P, long long __D) { 343 struct __storeu_i64 { 344 long long __v; 345 } __attribute__((__packed__, __may_alias__)); 346 ((struct __storeu_i64*)__P)->__v = __builtin_bswap64(__D); 347} 348#endif 349#endif /* __MOVBE */ 350 351#if !defined(_MSC_VER) || __has_feature(modules) || defined(__RTM__) 352#include <rtmintrin.h> 353#include <xtestintrin.h> 354#endif 355 356#if !defined(_MSC_VER) || __has_feature(modules) || defined(__SHA__) 357#include <shaintrin.h> 358#endif 359 360#if !defined(_MSC_VER) || __has_feature(modules) || defined(__FXSR__) 361#include <fxsrintrin.h> 362#endif 363 364/* No feature check desired due to internal MSC_VER checks */ 365#include <xsaveintrin.h> 366 367#if !defined(_MSC_VER) || __has_feature(modules) || defined(__XSAVEOPT__) 368#include <xsaveoptintrin.h> 369#endif 370 371#if !defined(_MSC_VER) || __has_feature(modules) || defined(__XSAVEC__) 372#include <xsavecintrin.h> 373#endif 374 375#if !defined(_MSC_VER) || __has_feature(modules) || defined(__XSAVES__) 376#include <xsavesintrin.h> 377#endif 378 379#if !defined(_MSC_VER) || __has_feature(modules) || defined(__SHSTK__) 380#include <cetintrin.h> 381#endif 382 383/* Some intrinsics inside adxintrin.h are available only on processors with ADX, 384 * whereas others are also available at all times. */ 385#include <adxintrin.h> 386 387#if !defined(_MSC_VER) || __has_feature(modules) || defined(__RDSEED__) 388#include <rdseedintrin.h> 389#endif 390 391#if !defined(_MSC_VER) || __has_feature(modules) || defined(__WBNOINVD__) 392#include <wbnoinvdintrin.h> 393#endif 394 395#if !defined(_MSC_VER) || __has_feature(modules) || defined(__CLDEMOTE__) 396#include <cldemoteintrin.h> 397#endif 398 399#if !defined(_MSC_VER) || __has_feature(modules) || defined(__WAITPKG__) 400#include <waitpkgintrin.h> 401#endif 402 403#if !defined(_MSC_VER) || __has_feature(modules) || \ 404 defined(__MOVDIRI__) || defined(__MOVDIR64B__) 405#include <movdirintrin.h> 406#endif 407 408#if !defined(_MSC_VER) || __has_feature(modules) || defined(__PCONFIG__) 409#include <pconfigintrin.h> 410#endif 411 412#if !defined(_MSC_VER) || __has_feature(modules) || defined(__SGX__) 413#include <sgxintrin.h> 414#endif 415 416#if !defined(_MSC_VER) || __has_feature(modules) || defined(__PTWRITE__) 417#include <ptwriteintrin.h> 418#endif 419 420#if !defined(_MSC_VER) || __has_feature(modules) || defined(__INVPCID__) 421#include <invpcidintrin.h> 422#endif 423 424#if !defined(_MSC_VER) || __has_feature(modules) || \ 425 defined(__AVX512VP2INTERSECT__) 426#include <avx512vp2intersectintrin.h> 427#endif 428 429#if !defined(_MSC_VER) || __has_feature(modules) || \ 430 (defined(__AVX512VL__) && defined(__AVX512VP2INTERSECT__)) 431#include <avx512vlvp2intersectintrin.h> 432#endif 433 434#if !defined(_MSC_VER) || __has_feature(modules) || defined(__ENQCMD__) 435#include <enqcmdintrin.h> 436#endif 437 438#if defined(_MSC_VER) && __has_extension(gnu_asm) 439/* Define the default attributes for these intrinsics */ 440#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__)) 441#ifdef __cplusplus 442extern "C" { 443#endif 444/*----------------------------------------------------------------------------*\ 445|* Interlocked Exchange HLE 446\*----------------------------------------------------------------------------*/ 447#if defined(__i386__) || defined(__x86_64__) 448static __inline__ long __DEFAULT_FN_ATTRS 449_InterlockedExchange_HLEAcquire(long volatile *_Target, long _Value) { 450 __asm__ __volatile__(".byte 0xf2 ; lock ; xchg %0, %1" 451 : "+r" (_Value), "+m" (*_Target) :: "memory"); 452 return _Value; 453} 454static __inline__ long __DEFAULT_FN_ATTRS 455_InterlockedExchange_HLERelease(long volatile *_Target, long _Value) { 456 __asm__ __volatile__(".byte 0xf3 ; lock ; xchg %0, %1" 457 : "+r" (_Value), "+m" (*_Target) :: "memory"); 458 return _Value; 459} 460#endif 461#if defined(__x86_64__) 462static __inline__ __int64 __DEFAULT_FN_ATTRS 463_InterlockedExchange64_HLEAcquire(__int64 volatile *_Target, __int64 _Value) { 464 __asm__ __volatile__(".byte 0xf2 ; lock ; xchg %0, %1" 465 : "+r" (_Value), "+m" (*_Target) :: "memory"); 466 return _Value; 467} 468static __inline__ __int64 __DEFAULT_FN_ATTRS 469_InterlockedExchange64_HLERelease(__int64 volatile *_Target, __int64 _Value) { 470 __asm__ __volatile__(".byte 0xf3 ; lock ; xchg %0, %1" 471 : "+r" (_Value), "+m" (*_Target) :: "memory"); 472 return _Value; 473} 474#endif 475/*----------------------------------------------------------------------------*\ 476|* Interlocked Compare Exchange HLE 477\*----------------------------------------------------------------------------*/ 478#if defined(__i386__) || defined(__x86_64__) 479static __inline__ long __DEFAULT_FN_ATTRS 480_InterlockedCompareExchange_HLEAcquire(long volatile *_Destination, 481 long _Exchange, long _Comparand) { 482 __asm__ __volatile__(".byte 0xf2 ; lock ; cmpxchg %2, %1" 483 : "+a" (_Comparand), "+m" (*_Destination) 484 : "r" (_Exchange) : "memory"); 485 return _Comparand; 486} 487static __inline__ long __DEFAULT_FN_ATTRS 488_InterlockedCompareExchange_HLERelease(long volatile *_Destination, 489 long _Exchange, long _Comparand) { 490 __asm__ __volatile__(".byte 0xf3 ; lock ; cmpxchg %2, %1" 491 : "+a" (_Comparand), "+m" (*_Destination) 492 : "r" (_Exchange) : "memory"); 493 return _Comparand; 494} 495#endif 496#if defined(__x86_64__) 497static __inline__ __int64 __DEFAULT_FN_ATTRS 498_InterlockedCompareExchange64_HLEAcquire(__int64 volatile *_Destination, 499 __int64 _Exchange, __int64 _Comparand) { 500 __asm__ __volatile__(".byte 0xf2 ; lock ; cmpxchg %2, %1" 501 : "+a" (_Comparand), "+m" (*_Destination) 502 : "r" (_Exchange) : "memory"); 503 return _Comparand; 504} 505static __inline__ __int64 __DEFAULT_FN_ATTRS 506_InterlockedCompareExchange64_HLERelease(__int64 volatile *_Destination, 507 __int64 _Exchange, __int64 _Comparand) { 508 __asm__ __volatile__(".byte 0xf3 ; lock ; cmpxchg %2, %1" 509 : "+a" (_Comparand), "+m" (*_Destination) 510 : "r" (_Exchange) : "memory"); 511 return _Comparand; 512} 513#endif 514#ifdef __cplusplus 515} 516#endif 517 518#undef __DEFAULT_FN_ATTRS 519 520#endif /* defined(_MSC_VER) && __has_extension(gnu_asm) */ 521 522#endif /* __IMMINTRIN_H */ 523