1/* ===-------- Intrin.h ---------------------------------------------------=== 2 * 3 * Permission is hereby granted, free of charge, to any person obtaining a copy 4 * of this software and associated documentation files (the "Software"), to deal 5 * in the Software without restriction, including without limitation the rights 6 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 * copies of the Software, and to permit persons to whom the Software is 8 * furnished to do so, subject to the following conditions: 9 * 10 * The above copyright notice and this permission notice shall be included in 11 * all copies or substantial portions of the Software. 12 * 13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 * THE SOFTWARE. 20 * 21 *===-----------------------------------------------------------------------=== 22 */ 23 24/* Only include this if we're compiling for the windows platform. */ 25#ifndef _MSC_VER 26#include_next <Intrin.h> 27#else 28 29#ifndef __INTRIN_H 30#define __INTRIN_H 31 32/* First include the standard intrinsics. */ 33#include <x86intrin.h> 34 35#ifdef __cplusplus 36extern "C" { 37#endif 38 39/* And the random ones that aren't in those files. */ 40__m64 _m_from_float(float); 41__m64 _m_from_int(int _l); 42void _m_prefetch(void *); 43float _m_to_float(__m64); 44int _m_to_int(__m64 _M); 45 46/* Other assorted instruction intrinsics. */ 47void __addfsbyte(unsigned long, unsigned char); 48void __addfsdword(unsigned long, unsigned long); 49void __addfsword(unsigned long, unsigned short); 50void __code_seg(const char *); 51void __cpuid(int[4], int); 52void __cpuidex(int[4], int, int); 53void __debugbreak(void); 54__int64 __emul(int, int); 55unsigned __int64 __emulu(unsigned int, unsigned int); 56void __cdecl __fastfail(unsigned int); 57unsigned int __getcallerseflags(void); 58void __halt(void); 59unsigned char __inbyte(unsigned short); 60void __inbytestring(unsigned short, unsigned char *, unsigned long); 61void __incfsbyte(unsigned long); 62void __incfsdword(unsigned long); 63void __incfsword(unsigned long); 64unsigned long __indword(unsigned short); 65void __indwordstring(unsigned short, unsigned long *, unsigned long); 66void __int2c(void); 67void __invlpg(void *); 68unsigned short __inword(unsigned short); 69void __inwordstring(unsigned short, unsigned short *, unsigned long); 70void __lidt(void *); 71unsigned __int64 __ll_lshift(unsigned __int64, int); 72__int64 __ll_rshift(__int64, int); 73void __llwpcb(void *); 74unsigned char __lwpins32(unsigned int, unsigned int, unsigned int); 75void __lwpval32(unsigned int, unsigned int, unsigned int); 76unsigned int __lzcnt(unsigned int); 77unsigned short __lzcnt16(unsigned short); 78void __movsb(unsigned char *, unsigned char const *, size_t); 79void __movsd(unsigned long *, unsigned long const *, size_t); 80void __movsw(unsigned short *, unsigned short const *, size_t); 81void __nop(void); 82void __nvreg_restore_fence(void); 83void __nvreg_save_fence(void); 84void __outbyte(unsigned short, unsigned char); 85void __outbytestring(unsigned short, unsigned char *, unsigned long); 86void __outdword(unsigned short, unsigned long); 87void __outdwordstring(unsigned short, unsigned long *, unsigned long); 88void __outword(unsigned short, unsigned short); 89void __outwordstring(unsigned short, unsigned short *, unsigned long); 90static __inline__ 91unsigned int __popcnt(unsigned int); 92static __inline__ 93unsigned short __popcnt16(unsigned short); 94unsigned __int64 __rdtsc(void); 95unsigned __int64 __rdtscp(unsigned int *); 96unsigned long __readcr0(void); 97unsigned long __readcr2(void); 98unsigned long __readcr3(void); 99unsigned long __readcr5(void); 100unsigned long __readcr8(void); 101unsigned int __readdr(unsigned int); 102unsigned int __readeflags(void); 103unsigned char __readfsbyte(unsigned long); 104unsigned long __readfsdword(unsigned long); 105unsigned __int64 __readfsqword(unsigned long); 106unsigned short __readfsword(unsigned long); 107unsigned __int64 __readmsr(unsigned long); 108unsigned __int64 __readpmc(unsigned long); 109unsigned long __segmentlimit(unsigned long); 110void __sidt(void *); 111void *__slwpcb(void); 112void __stosb(unsigned char *, unsigned char, size_t); 113void __stosd(unsigned long *, unsigned long, size_t); 114void __stosw(unsigned short *, unsigned short, size_t); 115void __svm_clgi(void); 116void __svm_invlpga(void *, int); 117void __svm_skinit(int); 118void __svm_stgi(void); 119void __svm_vmload(size_t); 120void __svm_vmrun(size_t); 121void __svm_vmsave(size_t); 122void __ud2(void); 123unsigned __int64 __ull_rshift(unsigned __int64, int); 124void __vmx_off(void); 125void __vmx_vmptrst(unsigned __int64 *); 126void __wbinvd(void); 127void __writecr0(unsigned int); 128void __writecr3(unsigned int); 129void __writecr4(unsigned int); 130void __writecr8(unsigned int); 131void __writedr(unsigned int, unsigned int); 132void __writeeflags(unsigned int); 133void __writefsbyte(unsigned long, unsigned char); 134void __writefsdword(unsigned long, unsigned long); 135void __writefsqword(unsigned long, unsigned __int64); 136void __writefsword(unsigned long, unsigned short); 137void __writemsr(unsigned long, unsigned __int64); 138static __inline__ 139void *_AddressOfReturnAddress(void); 140unsigned int _andn_u32(unsigned int, unsigned int); 141unsigned int _bextr_u32(unsigned int, unsigned int, unsigned int); 142unsigned int _bextr_u32(unsigned int, unsigned int, unsigned int); 143unsigned int _bextri_u32(unsigned int, unsigned int); 144static __inline__ 145unsigned char _BitScanForward(unsigned long *_Index, unsigned long _Mask); 146static __inline__ 147unsigned char _BitScanReverse(unsigned long *_Index, unsigned long _Mask); 148static __inline__ 149unsigned char _bittest(long const *, long); 150static __inline__ 151unsigned char _bittestandcomplement(long *, long); 152static __inline__ 153unsigned char _bittestandreset(long *, long); 154static __inline__ 155unsigned char _bittestandset(long *, long); 156unsigned int _blcfill_u32(unsigned int); 157unsigned int _blci_u32(unsigned int); 158unsigned int _blcic_u32(unsigned int); 159unsigned int _blcmsk_u32(unsigned int); 160unsigned int _blcs_u32(unsigned int); 161unsigned int _blsfill_u32(unsigned int); 162unsigned int _blsi_u32(unsigned int); 163unsigned int _blsic_u32(unsigned int); 164unsigned int _blsmsk_u32(unsigned int); 165unsigned int _blsmsk_u32(unsigned int); 166unsigned int _blsr_u32(unsigned int); 167unsigned int _blsr_u32(unsigned int); 168unsigned __int64 __cdecl _byteswap_uint64(unsigned __int64); 169unsigned long __cdecl _byteswap_ulong(unsigned long); 170unsigned short __cdecl _byteswap_ushort(unsigned short); 171unsigned _bzhi_u32(unsigned int, unsigned int); 172void __cdecl _disable(void); 173void __cdecl _enable(void); 174void __cdecl _fxrstor(void const *); 175void __cdecl _fxsave(void *); 176long _InterlockedAddLargeStatistic(__int64 volatile *_Addend, long _Value); 177static __inline__ 178long _InterlockedAnd(long volatile *_Value, long _Mask); 179static __inline__ 180short _InterlockedAnd16(short volatile *_Value, short _Mask); 181static __inline__ 182char _InterlockedAnd8(char volatile *_Value, char _Mask); 183unsigned char _interlockedbittestandreset(long volatile *, long); 184unsigned char _interlockedbittestandset(long volatile *, long); 185static __inline__ 186long __cdecl _InterlockedCompareExchange(long volatile *_Destination, 187 long _Exchange, long _Comparand); 188long _InterlockedCompareExchange_HLEAcquire(long volatile *, long, long); 189long _InterlockedCompareExchange_HLERelease(long volatile *, long, long); 190static __inline__ 191short _InterlockedCompareExchange16(short volatile *_Destination, 192 short _Exchange, short _Comparand); 193static __inline__ 194__int64 _InterlockedCompareExchange64(__int64 volatile *_Destination, 195 __int64 _Exchange, __int64 _Comparand); 196__int64 _InterlockedcompareExchange64_HLEAcquire(__int64 volatile *, __int64, 197 __int64); 198__int64 _InterlockedCompareExchange64_HLERelease(__int64 volatile *, __int64, 199 __int64); 200static __inline__ 201char _InterlockedCompareExchange8(char volatile *_Destination, char _Exchange, 202 char _Comparand); 203void *_InterlockedCompareExchangePointer_HLEAcquire(void *volatile *, void *, 204 void *); 205void *_InterlockedCompareExchangePointer_HLERelease(void *volatile *, void *, 206 void *); 207static __inline__ 208long __cdecl _InterlockedDecrement(long volatile *_Addend); 209static __inline__ 210short _InterlockedDecrement16(short volatile *_Addend); 211static __inline__ 212long __cdecl _InterlockedExchange(long volatile *_Target, long _Value); 213static __inline__ 214short _InterlockedExchange16(short volatile *_Target, short _Value); 215static __inline__ 216char _InterlockedExchange8(char volatile *_Target, char _Value); 217static __inline__ 218long __cdecl _InterlockedExchangeAdd(long volatile *_Addend, long _Value); 219long _InterlockedExchangeAdd_HLEAcquire(long volatile *, long); 220long _InterlockedExchangeAdd_HLERelease(long volatile *, long); 221static __inline__ 222char _InterlockedExchangeAdd8(char volatile *_Addend, char _Value); 223static __inline__ 224long __cdecl _InterlockedIncrement(long volatile *_Addend); 225static __inline__ 226short _InterlockedIncrement16(short volatile *_Addend); 227static __inline__ 228long _InterlockedOr(long volatile *_Value, long _Mask); 229static __inline__ 230short _InterlockedOr16(short volatile *_Value, short _Mask); 231static __inline__ 232char _InterlockedOr8(char volatile *_Value, char _Mask); 233static __inline__ 234long _InterlockedXor(long volatile *_Value, long _Mask); 235static __inline__ 236short _InterlockedXor16(short volatile *_Value, short _Mask); 237static __inline__ 238char _InterlockedXor8(char volatile *_Value, char _Mask); 239void __cdecl _invpcid(unsigned int, void *); 240static __inline__ 241unsigned long __cdecl _lrotl(unsigned long, int); 242static __inline__ 243unsigned long __cdecl _lrotr(unsigned long, int); 244static __inline__ 245unsigned int _lzcnt_u32(unsigned int); 246static __inline__ 247void _ReadBarrier(void); 248static __inline__ 249void _ReadWriteBarrier(void); 250static __inline__ 251void *_ReturnAddress(void); 252unsigned int _rorx_u32(unsigned int, const unsigned int); 253int __cdecl _rdrand16_step(unsigned short *); 254int __cdecl _rdrand32_step(unsigned int *); 255static __inline__ 256unsigned int __cdecl _rotl(unsigned int _Value, int _Shift); 257static __inline__ 258unsigned short _rotl16(unsigned short _Value, unsigned char _Shift); 259static __inline__ 260unsigned __int64 __cdecl _rotl64(unsigned __int64 _Value, int _Shift); 261static __inline__ 262unsigned char _rotl8(unsigned char _Value, unsigned char _Shift); 263static __inline__ 264unsigned int __cdecl _rotr(unsigned int _Value, int _Shift); 265static __inline__ 266unsigned short _rotr16(unsigned short _Value, unsigned char _Shift); 267static __inline__ 268unsigned __int64 __cdecl _rotr64(unsigned __int64 _Value, int _Shift); 269static __inline__ 270unsigned char _rotr8(unsigned char _Value, unsigned char _Shift); 271int _sarx_i32(int, unsigned int); 272 273/* FIXME: Need definition for jmp_buf. 274 int __cdecl _setjmp(jmp_buf); */ 275 276unsigned int _shlx_u32(unsigned int, unsigned int); 277unsigned int _shrx_u32(unsigned int, unsigned int); 278void _Store_HLERelease(long volatile *, long); 279void _Store64_HLERelease(__int64 volatile *, __int64); 280void _StorePointer_HLERelease(void *volatile *, void *); 281unsigned int _t1mskc_u32(unsigned int); 282unsigned int _tzcnt_u32(unsigned int); 283unsigned int _tzcnt_u32(unsigned int); 284unsigned int _tzmsk_u32(unsigned int); 285static __inline__ 286void _WriteBarrier(void); 287void _xabort(const unsigned int imm); 288unsigned __int32 xbegin(void); 289void _xend(void); 290unsigned __int64 __cdecl _xgetbv(unsigned int); 291void __cdecl _xrstor(void const *, unsigned __int64); 292void __cdecl _xsave(void *, unsigned __int64); 293void __cdecl _xsaveopt(void *, unsigned __int64); 294void __cdecl _xsetbv(unsigned int, unsigned __int64); 295unsigned char _xtest(void); 296 297/* These additional intrinsics are turned on in x64/amd64/x86_64 mode. */ 298#ifdef __x86_64__ 299void __addgsbyte(unsigned long, unsigned char); 300void __addgsdword(unsigned long, unsigned long); 301void __addgsqword(unsigned long, unsigned __int64); 302void __addgsword(unsigned long, unsigned short); 303void __faststorefence(void); 304void __incgsbyte(unsigned long); 305void __incgsdword(unsigned long); 306void __incgsqword(unsigned long); 307void __incgsword(unsigned long); 308unsigned __int64 __popcnt64(unsigned __int64); 309unsigned __int64 __shiftleft128(unsigned __int64 _LowPart, 310 unsigned __int64 _HighPart, 311 unsigned char _Shift); 312unsigned __int64 __shiftright128(unsigned __int64 _LowPart, 313 unsigned __int64 _HighPart, 314 unsigned char _Shift); 315void __stosq(unsigned __int64 *, unsigned __int64, size_t); 316unsigned __int64 _andn_u64(unsigned __int64, unsigned __int64); 317unsigned __int64 _bextr_u64(unsigned __int64, unsigned int, unsigned int); 318unsigned __int64 _bextri_u64(unsigned __int64, unsigned int); 319static __inline__ 320unsigned char _BitScanForward64(unsigned long *_Index, unsigned __int64 _Mask); 321static __inline__ 322unsigned char _BitScanReverse64(unsigned long *_Index, unsigned __int64 _Mask); 323static __inline__ 324unsigned char _bittest64(__int64 const *, __int64); 325static __inline__ 326unsigned char _bittestandcomplement64(__int64 *, __int64); 327static __inline__ 328unsigned char _bittestandreset64(__int64 *, __int64); 329static __inline__ 330unsigned char _bittestandset64(__int64 *, __int64); 331unsigned __int64 _blcfill_u64(unsigned __int64); 332unsigned __int64 _blci_u64(unsigned __int64); 333unsigned __int64 _blcic_u64(unsigned __int64); 334unsigned __int64 _blcmsk_u64(unsigned __int64); 335unsigned __int64 _blcs_u64(unsigned __int64); 336unsigned __int64 _blsfill_u64(unsigned __int64); 337unsigned __int64 _blsi_u64(unsigned __int64); 338unsigned __int64 _blsic_u64(unsigned __int64); 339unsigned __int64 _blmsk_u64(unsigned __int64); 340unsigned __int64 _blsr_u64(unsigned __int64); 341unsigned __int64 __cdecl _byteswap_uint64(unsigned __int64); 342unsigned __int64 _bzhi_u64(unsigned __int64, unsigned int); 343void __cdecl _fxrstor64(void const *); 344void __cdecl _fxsave64(void *); 345long _InterlockedAnd_np(long volatile *_Value, long _Mask); 346short _InterlockedAnd16_np(short volatile *_Value, short _Mask); 347__int64 _InterlockedAnd64_np(__int64 volatile *_Value, __int64 _Mask); 348char _InterlockedAnd8_np(char volatile *_Value, char _Mask); 349unsigned char _interlockedbittestandreset64(__int64 volatile *, __int64); 350unsigned char _interlockedbittestandset64(__int64 volatile *, __int64); 351long _InterlockedCompareExchange_np(long volatile *_Destination, long _Exchange, 352 long _Comparand); 353unsigned char _InterlockedCompareExchange128(__int64 volatile *_Destination, 354 __int64 _ExchangeHigh, 355 __int64 _ExchangeLow, 356 __int64 *_CompareandResult); 357unsigned char _InterlockedCompareExchange128_np(__int64 volatile *_Destination, 358 __int64 _ExchangeHigh, 359 __int64 _ExchangeLow, 360 __int64 *_ComparandResult); 361short _InterlockedCompareExchange16_np(short volatile *_Destination, 362 short _Exchange, short _Comparand); 363__int64 _InterlockedCompareExchange64_np(__int64 volatile *_Destination, 364 __int64 _Exchange, __int64 _Comparand); 365void *_InterlockedCompareExchangePointer_np(void *volatile *_Destination, 366 void *_Exchange, void *_Comparand); 367long _InterlockedOr_np(long volatile *_Value, long _Mask); 368short _InterlockedOr16_np(short volatile *_Value, short _Mask); 369__int64 _InterlockedOr64_np(__int64 volatile *_Value, __int64 _Mask); 370char _InterlockedOr8_np(char volatile *_Value, char _Mask); 371long _InterlockedXor_np(long volatile *_Value, long _Mask); 372short _InterlockedXor16_np(short volatile *_Value, short _Mask); 373__int64 _InterlockedXor64_np(__int64 volatile *_Value, __int64 _Mask); 374char _InterlockedXor8_np(char volatile *_Value, char _Mask); 375unsigned __int64 _lzcnt_u64(unsigned __int64); 376__int64 _mul128(__int64 _Multiplier, __int64 _Multiplicand, 377 __int64 *_HighProduct); 378unsigned int __cdecl _readfsbase_u32(void); 379unsigned __int64 __cdecl _readfsbase_u64(void); 380unsigned int __cdecl _readgsbase_u32(void); 381unsigned __int64 __cdecl _readgsbase_u64(void); 382unsigned __int64 _rorx_u64(unsigned __int64, const unsigned int); 383unsigned __int64 _tzcnt_u64(unsigned __int64); 384unsigned __int64 _tzmsk_u64(unsigned __int64); 385unsigned __int64 _umul128(unsigned __int64 _Multiplier, 386 unsigned __int64 _Multiplicand, 387 unsigned __int64 *_HighProduct); 388void __cdecl _writefsbase_u32(unsigned int); 389void _cdecl _writefsbase_u64(unsigned __int64); 390void __cdecl _writegsbase_u32(unsigned int); 391void __cdecl _writegsbase_u64(unsigned __int64); 392void __cdecl _xrstor64(void const *, unsigned __int64); 393void __cdecl _xsave64(void *, unsigned __int64); 394void __cdecl _xsaveopt64(void *, unsigned __int64); 395 396#endif /* __x86_64__ */ 397 398/*----------------------------------------------------------------------------*\ 399|* Bit Twiddling 400\*----------------------------------------------------------------------------*/ 401static __inline__ unsigned char __attribute__((__always_inline__, __nodebug__)) 402_rotl8(unsigned char _Value, unsigned char _Shift) { 403 _Shift &= 0x7; 404 return _Shift ? (_Value << _Shift) | (_Value >> (8 - _Shift)) : _Value; 405} 406static __inline__ unsigned char __attribute__((__always_inline__, __nodebug__)) 407_rotr8(unsigned char _Value, unsigned char _Shift) { 408 _Shift &= 0x7; 409 return _Shift ? (_Value >> _Shift) | (_Value << (8 - _Shift)) : _Value; 410} 411static __inline__ unsigned short __attribute__((__always_inline__, __nodebug__)) 412_rotl16(unsigned short _Value, unsigned char _Shift) { 413 _Shift &= 0xf; 414 return _Shift ? (_Value << _Shift) | (_Value >> (16 - _Shift)) : _Value; 415} 416static __inline__ unsigned short __attribute__((__always_inline__, __nodebug__)) 417_rotr16(unsigned short _Value, unsigned char _Shift) { 418 _Shift &= 0xf; 419 return _Shift ? (_Value >> _Shift) | (_Value << (16 - _Shift)) : _Value; 420} 421static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__)) 422_rotl(unsigned int _Value, int _Shift) { 423 _Shift &= 0x1f; 424 return _Shift ? (_Value << _Shift) | (_Value >> (32 - _Shift)) : _Value; 425} 426static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__)) 427_rotr(unsigned int _Value, int _Shift) { 428 _Shift &= 0x1f; 429 return _Shift ? (_Value >> _Shift) | (_Value << (32 - _Shift)) : _Value; 430} 431static __inline__ unsigned long __attribute__((__always_inline__, __nodebug__)) 432_lrotl(unsigned long _Value, int _Shift) { 433 _Shift &= 0x1f; 434 return _Shift ? (_Value << _Shift) | (_Value >> (32 - _Shift)) : _Value; 435} 436static __inline__ unsigned long __attribute__((__always_inline__, __nodebug__)) 437_lrotr(unsigned long _Value, int _Shift) { 438 _Shift &= 0x1f; 439 return _Shift ? (_Value >> _Shift) | (_Value << (32 - _Shift)) : _Value; 440} 441static 442__inline__ unsigned __int64 __attribute__((__always_inline__, __nodebug__)) 443_rotl64(unsigned __int64 _Value, int _Shift) { 444 _Shift &= 0x3f; 445 return _Shift ? (_Value << _Shift) | (_Value >> (64 - _Shift)) : _Value; 446} 447static 448__inline__ unsigned __int64 __attribute__((__always_inline__, __nodebug__)) 449_rotr64(unsigned __int64 _Value, int _Shift) { 450 _Shift &= 0x3f; 451 return _Shift ? (_Value >> _Shift) | (_Value << (64 - _Shift)) : _Value; 452} 453/*----------------------------------------------------------------------------*\ 454|* Bit Counting and Testing 455\*----------------------------------------------------------------------------*/ 456static __inline__ unsigned char __attribute__((__always_inline__, __nodebug__)) 457_BitScanForward(unsigned long *_Index, unsigned long _Mask) { 458 if (!_Mask) 459 return 0; 460 *_Index = __builtin_ctzl(_Mask); 461 return 1; 462} 463static __inline__ unsigned char __attribute__((__always_inline__, __nodebug__)) 464_BitScanReverse(unsigned long *_Index, unsigned long _Mask) { 465 if (!_Mask) 466 return 0; 467 *_Index = 31 - __builtin_clzl(_Mask); 468 return 1; 469} 470static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__)) 471_lzcnt_u32(unsigned int a) { 472 if (!a) 473 return 32; 474 return __builtin_clzl(a); 475} 476static __inline__ unsigned short __attribute__((__always_inline__, __nodebug__)) 477__popcnt16(unsigned short value) { 478 return __builtin_popcount((int)value); 479} 480static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__)) 481__popcnt(unsigned int value) { 482 return __builtin_popcount(value); 483} 484static __inline__ unsigned char __attribute__((__always_inline__, __nodebug__)) 485_bittest(long const *a, long b) { 486 return (*a >> b) & 1; 487} 488static __inline__ unsigned char __attribute__((__always_inline__, __nodebug__)) 489_bittestandcomplement(long *a, long b) { 490 unsigned char x = (*a >> b) & 1; 491 *a = *a ^ (1 << b); 492 return x; 493} 494static __inline__ unsigned char __attribute__((__always_inline__, __nodebug__)) 495_bittestandreset(long *a, long b) { 496 unsigned char x = (*a >> b) & 1; 497 *a = *a & ~(1 << b); 498 return x; 499} 500static __inline__ unsigned char __attribute__((__always_inline__, __nodebug__)) 501_bittestandset(long *a, long b) { 502 unsigned char x = (*a >> b) & 1; 503 *a = *a | (1 << b); 504 return x; 505} 506#ifdef __x86_64__ 507static __inline__ unsigned char __attribute__((__always_inline__, __nodebug__)) 508_BitScanForward64(unsigned long *_Index, unsigned __int64 _Mask) { 509 if (!_Mask) 510 return 0; 511 *_Index = __builtin_ctzll(_Mask); 512 return 1; 513} 514static __inline__ unsigned char __attribute__((__always_inline__, __nodebug__)) 515_BitScanReverse64(unsigned long *_Index, unsigned __int64 _Mask) { 516 if (!_Mask) 517 return 0; 518 *_Index = 63 - __builtin_clzll(_Mask); 519 return 1; 520} 521static 522__inline__ unsigned __int64 __attribute__((__always_inline__, __nodebug__)) 523_lzcnt_u64(unsigned __int64 a) { 524 if (!a) 525 return 64; 526 return __builtin_clzll(a); 527} 528static __inline__ 529unsigned __int64 __attribute__((__always_inline__, __nodebug__)) 530 __popcnt64(unsigned __int64 value) { 531 return __builtin_popcountll(value); 532} 533static __inline__ unsigned char __attribute__((__always_inline__, __nodebug__)) 534_bittest64(__int64 const *a, __int64 b) { 535 return (*a >> b) & 1; 536} 537static __inline__ unsigned char __attribute__((__always_inline__, __nodebug__)) 538_bittestandcomplement64(__int64 *a, __int64 b) { 539 unsigned char x = (*a >> b) & 1; 540 *a = *a ^ (1ll << b); 541 return x; 542} 543static __inline__ unsigned char __attribute__((__always_inline__, __nodebug__)) 544_bittestandreset64(__int64 *a, __int64 b) { 545 unsigned char x = (*a >> b) & 1; 546 *a = *a & ~(1ll << b); 547 return x; 548} 549static __inline__ unsigned char __attribute__((__always_inline__, __nodebug__)) 550_bittestandset64(__int64 *a, __int64 b) { 551 unsigned char x = (*a >> b) & 1; 552 *a = *a | (1ll << b); 553 return x; 554} 555#endif 556/*----------------------------------------------------------------------------*\ 557|* Interlocked Exchange Add 558\*----------------------------------------------------------------------------*/ 559static __inline__ char __attribute__((__always_inline__, __nodebug__)) 560_InterlockedExchangeAdd8(char volatile *_Addend, char _Value) { 561 return __atomic_add_fetch(_Addend, _Value, 0) - _Value; 562} 563static __inline__ short __attribute__((__always_inline__, __nodebug__)) 564_InterlockedExchangeAdd16(short volatile *_Addend, short _Value) { 565 return __atomic_add_fetch(_Addend, _Value, 0) - _Value; 566} 567static __inline__ long __attribute__((__always_inline__, __nodebug__)) 568_InterlockedExchangeAdd(long volatile *_Addend, long _Value) { 569 return __atomic_add_fetch(_Addend, _Value, 0) - _Value; 570} 571#ifdef __x86_64__ 572static __inline__ __int64 __attribute__((__always_inline__, __nodebug__)) 573_InterlockedExchangeAdd64(__int64 volatile *_Addend, __int64 _Value) { 574 return __atomic_add_fetch(_Addend, _Value, 0) - _Value; 575} 576#endif 577/*----------------------------------------------------------------------------*\ 578|* Interlocked Exchange Sub 579\*----------------------------------------------------------------------------*/ 580static __inline__ char __attribute__((__always_inline__, __nodebug__)) 581_InterlockedExchangeSub8(char volatile *_Subend, char _Value) { 582 return __atomic_sub_fetch(_Subend, _Value, 0) + _Value; 583} 584static __inline__ short __attribute__((__always_inline__, __nodebug__)) 585_InterlockedExchangeSub16(short volatile *_Subend, short _Value) { 586 return __atomic_sub_fetch(_Subend, _Value, 0) + _Value; 587} 588static __inline__ long __attribute__((__always_inline__, __nodebug__)) 589_InterlockedExchangeSub(long volatile *_Subend, long _Value) { 590 return __atomic_sub_fetch(_Subend, _Value, 0) + _Value; 591} 592#ifdef __x86_64__ 593static __inline__ __int64 __attribute__((__always_inline__, __nodebug__)) 594_InterlockedExchangeSub64(__int64 volatile *_Subend, __int64 _Value) { 595 return __atomic_sub_fetch(_Subend, _Value, 0) + _Value; 596} 597#endif 598/*----------------------------------------------------------------------------*\ 599|* Interlocked Increment 600\*----------------------------------------------------------------------------*/ 601static __inline__ char __attribute__((__always_inline__, __nodebug__)) 602_InterlockedIncrement16(char volatile *_Value) { 603 return __atomic_add_fetch(_Value, 1, 0); 604} 605static __inline__ long __attribute__((__always_inline__, __nodebug__)) 606_InterlockedIncrement(long volatile *_Value) { 607 return __atomic_add_fetch(_Value, 1, 0); 608} 609#ifdef __x86_64__ 610static __inline__ __int64 __attribute__((__always_inline__, __nodebug__)) 611_InterlockedIncrement64(__int64 volatile *_Value) { 612 return __atomic_add_fetch(_Value, 1, 0); 613} 614#endif 615/*----------------------------------------------------------------------------*\ 616|* Interlocked Decrement 617\*----------------------------------------------------------------------------*/ 618static __inline__ char __attribute__((__always_inline__, __nodebug__)) 619_InterlockedDecrement16(char volatile *_Value) { 620 return __atomic_sub_fetch(_Value, 1, 0); 621} 622static __inline__ long __attribute__((__always_inline__, __nodebug__)) 623_InterlockedDecrement(long volatile *_Value) { 624 return __atomic_sub_fetch(_Value, 1, 0); 625} 626#ifdef __x86_64__ 627static __inline__ __int64 __attribute__((__always_inline__, __nodebug__)) 628_InterlockedDecrement64(__int64 volatile *_Value) { 629 return __atomic_sub_fetch(_Value, 1, 0); 630} 631#endif 632/*----------------------------------------------------------------------------*\ 633|* Interlocked And 634\*----------------------------------------------------------------------------*/ 635static __inline__ char __attribute__((__always_inline__, __nodebug__)) 636_InterlockedAnd8(char volatile *_Value, char _Mask) { 637 return __atomic_and_fetch(_Value, _Mask, 0); 638} 639static __inline__ short __attribute__((__always_inline__, __nodebug__)) 640_InterlockedAnd16(short volatile *_Value, short _Mask) { 641 return __atomic_and_fetch(_Value, _Mask, 0); 642} 643static __inline__ long __attribute__((__always_inline__, __nodebug__)) 644_InterlockedAnd(long volatile *_Value, long _Mask) { 645 return __atomic_and_fetch(_Value, _Mask, 0); 646} 647#ifdef __x86_64__ 648static __inline__ __int64 __attribute__((__always_inline__, __nodebug__)) 649_InterlockedAnd64(__int64 volatile *_Value, __int64 _Mask) { 650 return __atomic_and_fetch(_Value, _Mask, 0); 651} 652#endif 653/*----------------------------------------------------------------------------*\ 654|* Interlocked Or 655\*----------------------------------------------------------------------------*/ 656static __inline__ char __attribute__((__always_inline__, __nodebug__)) 657_InterlockedOr8(char volatile *_Value, char _Mask) { 658 return __atomic_or_fetch(_Value, _Mask, 0); 659} 660static __inline__ short __attribute__((__always_inline__, __nodebug__)) 661_InterlockedOr16(short volatile *_Value, short _Mask) { 662 return __atomic_or_fetch(_Value, _Mask, 0); 663} 664static __inline__ long __attribute__((__always_inline__, __nodebug__)) 665_InterlockedOr(long volatile *_Value, long _Mask) { 666 return __atomic_or_fetch(_Value, _Mask, 0); 667} 668#ifdef __x86_64__ 669static __inline__ __int64 __attribute__((__always_inline__, __nodebug__)) 670_InterlockedOr64(__int64 volatile *_Value, __int64 _Mask) { 671 return __atomic_or_fetch(_Value, _Mask, 0); 672} 673#endif 674/*----------------------------------------------------------------------------*\ 675|* Interlocked Xor 676\*----------------------------------------------------------------------------*/ 677static __inline__ char __attribute__((__always_inline__, __nodebug__)) 678_InterlockedXor8(char volatile *_Value, char _Mask) { 679 return __atomic_xor_fetch(_Value, _Mask, 0); 680} 681static __inline__ short __attribute__((__always_inline__, __nodebug__)) 682_InterlockedXor16(short volatile *_Value, short _Mask) { 683 return __atomic_xor_fetch(_Value, _Mask, 0); 684} 685static __inline__ long __attribute__((__always_inline__, __nodebug__)) 686_InterlockedXor(long volatile *_Value, long _Mask) { 687 return __atomic_xor_fetch(_Value, _Mask, 0); 688} 689#ifdef __x86_64__ 690static __inline__ __int64 __attribute__((__always_inline__, __nodebug__)) 691_InterlockedXor64(__int64 volatile *_Value, __int64 _Mask) { 692 return __atomic_xor_fetch(_Value, _Mask, 0); 693} 694#endif 695/*----------------------------------------------------------------------------*\ 696|* Interlocked Exchange 697\*----------------------------------------------------------------------------*/ 698static __inline__ char __attribute__((__always_inline__, __nodebug__)) 699_InterlockedExchange8(char volatile *_Target, char _Value) { 700 __atomic_exchange(_Target, &_Value, &_Value, 0); 701 return _Value; 702} 703static __inline__ short __attribute__((__always_inline__, __nodebug__)) 704_InterlockedExchange16(short volatile *_Target, short _Value) { 705 __atomic_exchange(_Target, &_Value, &_Value, 0); 706 return _Value; 707} 708static __inline__ long __attribute__((__always_inline__, __nodebug__)) 709_InterlockedExchange(long volatile *_Target, long _Value) { 710 __atomic_exchange(_Target, &_Value, &_Value, 0); 711 return _Value; 712} 713#ifdef __x86_64__ 714static __inline__ __int64 __attribute__((__always_inline__, __nodebug__)) 715_InterlockedExchange64(__int64 volatile *_Target, __int64 _Value) { 716 __atomic_exchange(_Target, &_Value, &_Value, 0); 717 return _Value; 718} 719#endif 720/*----------------------------------------------------------------------------*\ 721|* Interlocked Compare Exchange 722\*----------------------------------------------------------------------------*/ 723static __inline__ char __attribute__((__always_inline__, __nodebug__)) 724_InterlockedCompareExchange8(char volatile *_Destination, 725 char _Exchange, char _Comparand) { 726 __atomic_compare_exchange(_Destination, &_Comparand, &_Exchange, 0, 0, 0); 727 return _Comparand; 728} 729static __inline__ short __attribute__((__always_inline__, __nodebug__)) 730_InterlockedCompareExchange16(short volatile *_Destination, 731 short _Exchange, short _Comparand) { 732 __atomic_compare_exchange(_Destination, &_Comparand, &_Exchange, 0, 0, 0); 733 return _Comparand; 734} 735static __inline__ long __attribute__((__always_inline__, __nodebug__)) 736_InterlockedCompareExchange(long volatile *_Destination, 737 long _Exchange, long _Comparand) { 738 __atomic_compare_exchange(_Destination, &_Comparand, &_Exchange, 0, 0, 0); 739 return _Comparand; 740} 741#ifdef __x86_64__ 742static __inline__ __int64 __attribute__((__always_inline__, __nodebug__)) 743_InterlockedCompareExchange64(__int64 volatile *_Destination, 744 __int64 _Exchange, __int64 _Comparand) { 745 __atomic_compare_exchange(_Destination, &_Comparand, &_Exchange, 0, 0, 0); 746 return _Comparand; 747} 748#endif 749/*----------------------------------------------------------------------------*\ 750|* Barriers 751\*----------------------------------------------------------------------------*/ 752static __inline__ void __attribute__((__always_inline__, __nodebug__)) 753__attribute__((deprecated("use other intrinsics or C++11 atomics instead"))) 754_ReadWriteBarrier(void) { 755 __asm__ volatile ("" : : : "memory"); 756} 757static __inline__ void __attribute__((__always_inline__, __nodebug__)) 758__attribute__((deprecated("use other intrinsics or C++11 atomics instead"))) 759_ReadBarrier(void) { 760 __asm__ volatile ("" : : : "memory"); 761} 762static __inline__ void __attribute__((__always_inline__, __nodebug__)) 763__attribute__((deprecated("use other intrinsics or C++11 atomics instead"))) 764_WriteBarrier(void) { 765 __asm__ volatile ("" : : : "memory"); 766} 767/*----------------------------------------------------------------------------*\ 768|* Misc 769\*----------------------------------------------------------------------------*/ 770static __inline__ void * __attribute__((__always_inline__, __nodebug__)) 771_AddressOfReturnAddress(void) { 772 return (void*)((char*)__builtin_frame_address(0) + sizeof(void*)); 773} 774static __inline__ void * __attribute__((__always_inline__, __nodebug__)) 775_ReturnAddress(void) { 776 return __builtin_return_address(0); 777} 778 779#ifdef __cplusplus 780} 781#endif 782 783#endif /* __INTRIN_H */ 784#endif /* _MSC_VER */ 785