1/*===---- immintrin.h - Intel intrinsics -----------------------------------=== 2 * 3 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 * See https://llvm.org/LICENSE.txt for license information. 5 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 * 7 *===-----------------------------------------------------------------------=== 8 */ 9 10#ifndef __IMMINTRIN_H 11#define __IMMINTRIN_H 12 13#if !defined(_MSC_VER) || __has_feature(modules) || defined(__MMX__) 14#include <mmintrin.h> 15#endif 16 17#if !defined(_MSC_VER) || __has_feature(modules) || defined(__SSE__) 18#include <xmmintrin.h> 19#endif 20 21#if !defined(_MSC_VER) || __has_feature(modules) || defined(__SSE2__) 22#include <emmintrin.h> 23#endif 24 25#if !defined(_MSC_VER) || __has_feature(modules) || defined(__SSE3__) 26#include <pmmintrin.h> 27#endif 28 29#if !defined(_MSC_VER) || __has_feature(modules) || defined(__SSSE3__) 30#include <tmmintrin.h> 31#endif 32 33#if !defined(_MSC_VER) || __has_feature(modules) || \ 34 (defined(__SSE4_2__) || defined(__SSE4_1__)) 35#include <smmintrin.h> 36#endif 37 38#if !defined(_MSC_VER) || __has_feature(modules) || \ 39 (defined(__AES__) || defined(__PCLMUL__)) 40#include <wmmintrin.h> 41#endif 42 43#if !defined(_MSC_VER) || __has_feature(modules) || defined(__CLFLUSHOPT__) 44#include <clflushoptintrin.h> 45#endif 46 47#if !defined(_MSC_VER) || __has_feature(modules) || defined(__CLWB__) 48#include <clwbintrin.h> 49#endif 50 51#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX__) 52#include <avxintrin.h> 53#endif 54 55#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX2__) 56#include <avx2intrin.h> 57#endif 58 59#if !defined(_MSC_VER) || __has_feature(modules) || defined(__F16C__) 60#include <f16cintrin.h> 61#endif 62 63#if !defined(_MSC_VER) || __has_feature(modules) || defined(__VPCLMULQDQ__) 64#include <vpclmulqdqintrin.h> 65#endif 66 67/* No feature check desired due to internal checks */ 68#include <bmiintrin.h> 69 70#if !defined(_MSC_VER) || __has_feature(modules) || defined(__BMI2__) 71#include <bmi2intrin.h> 72#endif 73 74#if !defined(_MSC_VER) || __has_feature(modules) || defined(__LZCNT__) 75#include <lzcntintrin.h> 76#endif 77 78#if !defined(_MSC_VER) || __has_feature(modules) || defined(__POPCNT__) 79#include <popcntintrin.h> 80#endif 81 82#if !defined(_MSC_VER) || __has_feature(modules) || defined(__FMA__) 83#include <fmaintrin.h> 84#endif 85 86#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512F__) 87#include <avx512fintrin.h> 88#endif 89 90#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512VL__) 91#include <avx512vlintrin.h> 92#endif 93 94#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512BW__) 95#include <avx512bwintrin.h> 96#endif 97 98#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512BITALG__) 99#include <avx512bitalgintrin.h> 100#endif 101 102#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512CD__) 103#include <avx512cdintrin.h> 104#endif 105 106#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512VPOPCNTDQ__) 107#include <avx512vpopcntdqintrin.h> 108#endif 109 110#if !defined(_MSC_VER) || __has_feature(modules) || \ 111 (defined(__AVX512VL__) && defined(__AVX512VPOPCNTDQ__)) 112#include <avx512vpopcntdqvlintrin.h> 113#endif 114 115#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512VNNI__) 116#include <avx512vnniintrin.h> 117#endif 118 119#if !defined(_MSC_VER) || __has_feature(modules) || \ 120 (defined(__AVX512VL__) && defined(__AVX512VNNI__)) 121#include <avx512vlvnniintrin.h> 122#endif 123 124#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512DQ__) 125#include <avx512dqintrin.h> 126#endif 127 128#if !defined(_MSC_VER) || __has_feature(modules) || \ 129 (defined(__AVX512VL__) && defined(__AVX512BITALG__)) 130#include <avx512vlbitalgintrin.h> 131#endif 132 133#if !defined(_MSC_VER) || __has_feature(modules) || \ 134 (defined(__AVX512VL__) && defined(__AVX512BW__)) 135#include <avx512vlbwintrin.h> 136#endif 137 138#if !defined(_MSC_VER) || __has_feature(modules) || \ 139 (defined(__AVX512VL__) && defined(__AVX512CD__)) 140#include <avx512vlcdintrin.h> 141#endif 142 143#if !defined(_MSC_VER) || __has_feature(modules) || \ 144 (defined(__AVX512VL__) && defined(__AVX512DQ__)) 145#include <avx512vldqintrin.h> 146#endif 147 148#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512ER__) 149#include <avx512erintrin.h> 150#endif 151 152#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512IFMA__) 153#include <avx512ifmaintrin.h> 154#endif 155 156#if !defined(_MSC_VER) || __has_feature(modules) || \ 157 (defined(__AVX512IFMA__) && defined(__AVX512VL__)) 158#include <avx512ifmavlintrin.h> 159#endif 160 161#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512VBMI__) 162#include <avx512vbmiintrin.h> 163#endif 164 165#if !defined(_MSC_VER) || __has_feature(modules) || \ 166 (defined(__AVX512VBMI__) && defined(__AVX512VL__)) 167#include <avx512vbmivlintrin.h> 168#endif 169 170#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512VBMI2__) 171#include <avx512vbmi2intrin.h> 172#endif 173 174#if !defined(_MSC_VER) || __has_feature(modules) || \ 175 (defined(__AVX512VBMI2__) && defined(__AVX512VL__)) 176#include <avx512vlvbmi2intrin.h> 177#endif 178 179#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512PF__) 180#include <avx512pfintrin.h> 181#endif 182 183#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512BF16__) 184#include <avx512bf16intrin.h> 185#endif 186 187#if !defined(_MSC_VER) || __has_feature(modules) || \ 188 (defined(__AVX512VL__) && defined(__AVX512BF16__)) 189#include <avx512vlbf16intrin.h> 190#endif 191 192#if !defined(_MSC_VER) || __has_feature(modules) || defined(__PKU__) 193#include <pkuintrin.h> 194#endif 195 196#if !defined(_MSC_VER) || __has_feature(modules) || defined(__VAES__) 197#include <vaesintrin.h> 198#endif 199 200#if !defined(_MSC_VER) || __has_feature(modules) || defined(__GFNI__) 201#include <gfniintrin.h> 202#endif 203 204#if !defined(_MSC_VER) || __has_feature(modules) || defined(__RDPID__) 205/// Returns the value of the IA32_TSC_AUX MSR (0xc0000103). 206/// 207/// \headerfile <immintrin.h> 208/// 209/// This intrinsic corresponds to the <c> RDPID </c> instruction. 210static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__, __target__("rdpid"))) 211_rdpid_u32(void) { 212 return __builtin_ia32_rdpid(); 213} 214#endif // __RDPID__ 215 216#if !defined(_MSC_VER) || __has_feature(modules) || defined(__RDRND__) 217static __inline__ int __attribute__((__always_inline__, __nodebug__, __target__("rdrnd"))) 218_rdrand16_step(unsigned short *__p) 219{ 220 return __builtin_ia32_rdrand16_step(__p); 221} 222 223static __inline__ int __attribute__((__always_inline__, __nodebug__, __target__("rdrnd"))) 224_rdrand32_step(unsigned int *__p) 225{ 226 return __builtin_ia32_rdrand32_step(__p); 227} 228 229#ifdef __x86_64__ 230static __inline__ int __attribute__((__always_inline__, __nodebug__, __target__("rdrnd"))) 231_rdrand64_step(unsigned long long *__p) 232{ 233 return __builtin_ia32_rdrand64_step(__p); 234} 235#endif 236#endif /* __RDRND__ */ 237 238#if !defined(_MSC_VER) || __has_feature(modules) || defined(__FSGSBASE__) 239#ifdef __x86_64__ 240static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase"))) 241_readfsbase_u32(void) 242{ 243 return __builtin_ia32_rdfsbase32(); 244} 245 246static __inline__ unsigned long long __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase"))) 247_readfsbase_u64(void) 248{ 249 return __builtin_ia32_rdfsbase64(); 250} 251 252static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase"))) 253_readgsbase_u32(void) 254{ 255 return __builtin_ia32_rdgsbase32(); 256} 257 258static __inline__ unsigned long long __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase"))) 259_readgsbase_u64(void) 260{ 261 return __builtin_ia32_rdgsbase64(); 262} 263 264static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase"))) 265_writefsbase_u32(unsigned int __V) 266{ 267 __builtin_ia32_wrfsbase32(__V); 268} 269 270static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase"))) 271_writefsbase_u64(unsigned long long __V) 272{ 273 __builtin_ia32_wrfsbase64(__V); 274} 275 276static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase"))) 277_writegsbase_u32(unsigned int __V) 278{ 279 __builtin_ia32_wrgsbase32(__V); 280} 281 282static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase"))) 283_writegsbase_u64(unsigned long long __V) 284{ 285 __builtin_ia32_wrgsbase64(__V); 286} 287 288#endif 289#endif /* __FSGSBASE__ */ 290 291#if !defined(_MSC_VER) || __has_feature(modules) || defined(__MOVBE__) 292 293/* The structs used below are to force the load/store to be unaligned. This 294 * is accomplished with the __packed__ attribute. The __may_alias__ prevents 295 * tbaa metadata from being generated based on the struct and the type of the 296 * field inside of it. 297 */ 298 299static __inline__ short __attribute__((__always_inline__, __nodebug__, __target__("movbe"))) 300_loadbe_i16(void const * __P) { 301 struct __loadu_i16 { 302 short __v; 303 } __attribute__((__packed__, __may_alias__)); 304 return __builtin_bswap16(((const struct __loadu_i16*)__P)->__v); 305} 306 307static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("movbe"))) 308_storebe_i16(void * __P, short __D) { 309 struct __storeu_i16 { 310 short __v; 311 } __attribute__((__packed__, __may_alias__)); 312 ((struct __storeu_i16*)__P)->__v = __builtin_bswap16(__D); 313} 314 315static __inline__ int __attribute__((__always_inline__, __nodebug__, __target__("movbe"))) 316_loadbe_i32(void const * __P) { 317 struct __loadu_i32 { 318 int __v; 319 } __attribute__((__packed__, __may_alias__)); 320 return __builtin_bswap32(((const struct __loadu_i32*)__P)->__v); 321} 322 323static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("movbe"))) 324_storebe_i32(void * __P, int __D) { 325 struct __storeu_i32 { 326 int __v; 327 } __attribute__((__packed__, __may_alias__)); 328 ((struct __storeu_i32*)__P)->__v = __builtin_bswap32(__D); 329} 330 331#ifdef __x86_64__ 332static __inline__ long long __attribute__((__always_inline__, __nodebug__, __target__("movbe"))) 333_loadbe_i64(void const * __P) { 334 struct __loadu_i64 { 335 long long __v; 336 } __attribute__((__packed__, __may_alias__)); 337 return __builtin_bswap64(((const struct __loadu_i64*)__P)->__v); 338} 339 340static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("movbe"))) 341_storebe_i64(void * __P, long long __D) { 342 struct __storeu_i64 { 343 long long __v; 344 } __attribute__((__packed__, __may_alias__)); 345 ((struct __storeu_i64*)__P)->__v = __builtin_bswap64(__D); 346} 347#endif 348#endif /* __MOVBE */ 349 350#if !defined(_MSC_VER) || __has_feature(modules) || defined(__RTM__) 351#include <rtmintrin.h> 352#include <xtestintrin.h> 353#endif 354 355#if !defined(_MSC_VER) || __has_feature(modules) || defined(__SHA__) 356#include <shaintrin.h> 357#endif 358 359#if !defined(_MSC_VER) || __has_feature(modules) || defined(__FXSR__) 360#include <fxsrintrin.h> 361#endif 362 363/* No feature check desired due to internal MSC_VER checks */ 364#include <xsaveintrin.h> 365 366#if !defined(_MSC_VER) || __has_feature(modules) || defined(__XSAVEOPT__) 367#include <xsaveoptintrin.h> 368#endif 369 370#if !defined(_MSC_VER) || __has_feature(modules) || defined(__XSAVEC__) 371#include <xsavecintrin.h> 372#endif 373 374#if !defined(_MSC_VER) || __has_feature(modules) || defined(__XSAVES__) 375#include <xsavesintrin.h> 376#endif 377 378#if !defined(_MSC_VER) || __has_feature(modules) || defined(__SHSTK__) 379#include <cetintrin.h> 380#endif 381 382/* Some intrinsics inside adxintrin.h are available only on processors with ADX, 383 * whereas others are also available at all times. */ 384#include <adxintrin.h> 385 386#if !defined(_MSC_VER) || __has_feature(modules) || defined(__RDSEED__) 387#include <rdseedintrin.h> 388#endif 389 390#if !defined(_MSC_VER) || __has_feature(modules) || defined(__WBNOINVD__) 391#include <wbnoinvdintrin.h> 392#endif 393 394#if !defined(_MSC_VER) || __has_feature(modules) || defined(__CLDEMOTE__) 395#include <cldemoteintrin.h> 396#endif 397 398#if !defined(_MSC_VER) || __has_feature(modules) || defined(__WAITPKG__) 399#include <waitpkgintrin.h> 400#endif 401 402#if !defined(_MSC_VER) || __has_feature(modules) || \ 403 defined(__MOVDIRI__) || defined(__MOVDIR64B__) 404#include <movdirintrin.h> 405#endif 406 407#if !defined(_MSC_VER) || __has_feature(modules) || defined(__PCONFIG__) 408#include <pconfigintrin.h> 409#endif 410 411#if !defined(_MSC_VER) || __has_feature(modules) || defined(__SGX__) 412#include <sgxintrin.h> 413#endif 414 415#if !defined(_MSC_VER) || __has_feature(modules) || defined(__PTWRITE__) 416#include <ptwriteintrin.h> 417#endif 418 419#if !defined(_MSC_VER) || __has_feature(modules) || defined(__INVPCID__) 420#include <invpcidintrin.h> 421#endif 422 423#if !defined(_MSC_VER) || __has_feature(modules) || \ 424 defined(__AVX512VP2INTERSECT__) 425#include <avx512vp2intersectintrin.h> 426#endif 427 428#if !defined(_MSC_VER) || __has_feature(modules) || \ 429 (defined(__AVX512VL__) && defined(__AVX512VP2INTERSECT__)) 430#include <avx512vlvp2intersectintrin.h> 431#endif 432 433#if !defined(_MSC_VER) || __has_feature(modules) || defined(__ENQCMD__) 434#include <enqcmdintrin.h> 435#endif 436 437#if defined(_MSC_VER) && __has_extension(gnu_asm) 438/* Define the default attributes for these intrinsics */ 439#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__)) 440#ifdef __cplusplus 441extern "C" { 442#endif 443/*----------------------------------------------------------------------------*\ 444|* Interlocked Exchange HLE 445\*----------------------------------------------------------------------------*/ 446#if defined(__i386__) || defined(__x86_64__) 447static __inline__ long __DEFAULT_FN_ATTRS 448_InterlockedExchange_HLEAcquire(long volatile *_Target, long _Value) { 449 __asm__ __volatile__(".byte 0xf2 ; lock ; xchg %0, %1" 450 : "+r" (_Value), "+m" (*_Target) :: "memory"); 451 return _Value; 452} 453static __inline__ long __DEFAULT_FN_ATTRS 454_InterlockedExchange_HLERelease(long volatile *_Target, long _Value) { 455 __asm__ __volatile__(".byte 0xf3 ; lock ; xchg %0, %1" 456 : "+r" (_Value), "+m" (*_Target) :: "memory"); 457 return _Value; 458} 459#endif 460#if defined(__x86_64__) 461static __inline__ __int64 __DEFAULT_FN_ATTRS 462_InterlockedExchange64_HLEAcquire(__int64 volatile *_Target, __int64 _Value) { 463 __asm__ __volatile__(".byte 0xf2 ; lock ; xchg %0, %1" 464 : "+r" (_Value), "+m" (*_Target) :: "memory"); 465 return _Value; 466} 467static __inline__ __int64 __DEFAULT_FN_ATTRS 468_InterlockedExchange64_HLERelease(__int64 volatile *_Target, __int64 _Value) { 469 __asm__ __volatile__(".byte 0xf3 ; lock ; xchg %0, %1" 470 : "+r" (_Value), "+m" (*_Target) :: "memory"); 471 return _Value; 472} 473#endif 474/*----------------------------------------------------------------------------*\ 475|* Interlocked Compare Exchange HLE 476\*----------------------------------------------------------------------------*/ 477#if defined(__i386__) || defined(__x86_64__) 478static __inline__ long __DEFAULT_FN_ATTRS 479_InterlockedCompareExchange_HLEAcquire(long volatile *_Destination, 480 long _Exchange, long _Comparand) { 481 __asm__ __volatile__(".byte 0xf2 ; lock ; cmpxchg %2, %1" 482 : "+a" (_Comparand), "+m" (*_Destination) 483 : "r" (_Exchange) : "memory"); 484 return _Comparand; 485} 486static __inline__ long __DEFAULT_FN_ATTRS 487_InterlockedCompareExchange_HLERelease(long volatile *_Destination, 488 long _Exchange, long _Comparand) { 489 __asm__ __volatile__(".byte 0xf3 ; lock ; cmpxchg %2, %1" 490 : "+a" (_Comparand), "+m" (*_Destination) 491 : "r" (_Exchange) : "memory"); 492 return _Comparand; 493} 494#endif 495#if defined(__x86_64__) 496static __inline__ __int64 __DEFAULT_FN_ATTRS 497_InterlockedCompareExchange64_HLEAcquire(__int64 volatile *_Destination, 498 __int64 _Exchange, __int64 _Comparand) { 499 __asm__ __volatile__(".byte 0xf2 ; lock ; cmpxchg %2, %1" 500 : "+a" (_Comparand), "+m" (*_Destination) 501 : "r" (_Exchange) : "memory"); 502 return _Comparand; 503} 504static __inline__ __int64 __DEFAULT_FN_ATTRS 505_InterlockedCompareExchange64_HLERelease(__int64 volatile *_Destination, 506 __int64 _Exchange, __int64 _Comparand) { 507 __asm__ __volatile__(".byte 0xf3 ; lock ; cmpxchg %2, %1" 508 : "+a" (_Comparand), "+m" (*_Destination) 509 : "r" (_Exchange) : "memory"); 510 return _Comparand; 511} 512#endif 513#ifdef __cplusplus 514} 515#endif 516 517#undef __DEFAULT_FN_ATTRS 518 519#endif /* defined(_MSC_VER) && __has_extension(gnu_asm) */ 520 521#endif /* __IMMINTRIN_H */ 522