1/* ===-------- Intrin.h ---------------------------------------------------===
2 *
3 * Permission is hereby granted, free of charge, to any person obtaining a copy
4 * of this software and associated documentation files (the "Software"), to deal
5 * in the Software without restriction, including without limitation the rights
6 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7 * copies of the Software, and to permit persons to whom the Software is
8 * furnished to do so, subject to the following conditions:
9 *
10 * The above copyright notice and this permission notice shall be included in
11 * all copies or substantial portions of the Software.
12 *
13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19 * THE SOFTWARE.
20 *
21 *===-----------------------------------------------------------------------===
22 */
23
24/* Only include this if we're compiling for the windows platform. */
25#ifndef _MSC_VER
26#include_next <Intrin.h>
27#else
28
29#ifndef __INTRIN_H
30#define __INTRIN_H
31
32/* First include the standard intrinsics. */
33#include <x86intrin.h>
34
35#ifdef __cplusplus
36extern "C" {
37#endif
38
39/* And the random ones that aren't in those files. */
40__m64 _m_from_float(float);
41__m64 _m_from_int(int _l);
42void _m_prefetch(void *);
43float _m_to_float(__m64);
44int _m_to_int(__m64 _M);
45
46/* Other assorted instruction intrinsics. */
47void __addfsbyte(unsigned long, unsigned char);
48void __addfsdword(unsigned long, unsigned long);
49void __addfsword(unsigned long, unsigned short);
50void __code_seg(const char *);
51void __cpuid(int[4], int);
52void __cpuidex(int[4], int, int);
53void __debugbreak(void);
54__int64 __emul(int, int);
55unsigned __int64 __emulu(unsigned int, unsigned int);
56void __cdecl __fastfail(unsigned int);
57unsigned int __getcallerseflags(void);
58void __halt(void);
59unsigned char __inbyte(unsigned short);
60void __inbytestring(unsigned short, unsigned char *, unsigned long);
61void __incfsbyte(unsigned long);
62void __incfsdword(unsigned long);
63void __incfsword(unsigned long);
64unsigned long __indword(unsigned short);
65void __indwordstring(unsigned short, unsigned long *, unsigned long);
66void __int2c(void);
67void __invlpg(void *);
68unsigned short __inword(unsigned short);
69void __inwordstring(unsigned short, unsigned short *, unsigned long);
70void __lidt(void *);
71unsigned __int64 __ll_lshift(unsigned __int64, int);
72__int64 __ll_rshift(__int64, int);
73void __llwpcb(void *);
74unsigned char __lwpins32(unsigned int, unsigned int, unsigned int);
75void __lwpval32(unsigned int, unsigned int, unsigned int);
76unsigned int __lzcnt(unsigned int);
77unsigned short __lzcnt16(unsigned short);
78void __movsb(unsigned char *, unsigned char const *, size_t);
79void __movsd(unsigned long *, unsigned long const *, size_t);
80void __movsw(unsigned short *, unsigned short const *, size_t);
81void __nop(void);
82void __nvreg_restore_fence(void);
83void __nvreg_save_fence(void);
84void __outbyte(unsigned short, unsigned char);
85void __outbytestring(unsigned short, unsigned char *, unsigned long);
86void __outdword(unsigned short, unsigned long);
87void __outdwordstring(unsigned short, unsigned long *, unsigned long);
88void __outword(unsigned short, unsigned short);
89void __outwordstring(unsigned short, unsigned short *, unsigned long);
90static __inline__
91unsigned int __popcnt(unsigned int);
92static __inline__
93unsigned short __popcnt16(unsigned short);
94unsigned __int64 __rdtsc(void);
95unsigned __int64 __rdtscp(unsigned int *);
96unsigned long __readcr0(void);
97unsigned long __readcr2(void);
98unsigned long __readcr3(void);
99unsigned long __readcr5(void);
100unsigned long __readcr8(void);
101unsigned int __readdr(unsigned int);
102unsigned int __readeflags(void);
103unsigned char __readfsbyte(unsigned long);
104unsigned long __readfsdword(unsigned long);
105unsigned __int64 __readfsqword(unsigned long);
106unsigned short __readfsword(unsigned long);
107unsigned __int64 __readmsr(unsigned long);
108unsigned __int64 __readpmc(unsigned long);
109unsigned long __segmentlimit(unsigned long);
110void __sidt(void *);
111void *__slwpcb(void);
112void __stosb(unsigned char *, unsigned char, size_t);
113void __stosd(unsigned long *, unsigned long, size_t);
114void __stosw(unsigned short *, unsigned short, size_t);
115void __svm_clgi(void);
116void __svm_invlpga(void *, int);
117void __svm_skinit(int);
118void __svm_stgi(void);
119void __svm_vmload(size_t);
120void __svm_vmrun(size_t);
121void __svm_vmsave(size_t);
122void __ud2(void);
123unsigned __int64 __ull_rshift(unsigned __int64, int);
124void __vmx_off(void);
125void __vmx_vmptrst(unsigned __int64 *);
126void __wbinvd(void);
127void __writecr0(unsigned int);
128void __writecr3(unsigned int);
129void __writecr4(unsigned int);
130void __writecr8(unsigned int);
131void __writedr(unsigned int, unsigned int);
132void __writeeflags(unsigned int);
133void __writefsbyte(unsigned long, unsigned char);
134void __writefsdword(unsigned long, unsigned long);
135void __writefsqword(unsigned long, unsigned __int64);
136void __writefsword(unsigned long, unsigned short);
137void __writemsr(unsigned long, unsigned __int64);
138static __inline__
139void *_AddressOfReturnAddress(void);
140unsigned int _andn_u32(unsigned int, unsigned int);
141unsigned int _bextr_u32(unsigned int, unsigned int, unsigned int);
142unsigned int _bextr_u32(unsigned int, unsigned int, unsigned int);
143unsigned int _bextri_u32(unsigned int, unsigned int);
144static __inline__
145unsigned char _BitScanForward(unsigned long *_Index, unsigned long _Mask);
146static __inline__
147unsigned char _BitScanReverse(unsigned long *_Index, unsigned long _Mask);
148static __inline__
149unsigned char _bittest(long const *, long);
150static __inline__
151unsigned char _bittestandcomplement(long *, long);
152static __inline__
153unsigned char _bittestandreset(long *, long);
154static __inline__
155unsigned char _bittestandset(long *, long);
156unsigned int _blcfill_u32(unsigned int);
157unsigned int _blci_u32(unsigned int);
158unsigned int _blcic_u32(unsigned int);
159unsigned int _blcmsk_u32(unsigned int);
160unsigned int _blcs_u32(unsigned int);
161unsigned int _blsfill_u32(unsigned int);
162unsigned int _blsi_u32(unsigned int);
163unsigned int _blsic_u32(unsigned int);
164unsigned int _blsmsk_u32(unsigned int);
165unsigned int _blsmsk_u32(unsigned int);
166unsigned int _blsr_u32(unsigned int);
167unsigned int _blsr_u32(unsigned int);
168unsigned __int64 __cdecl _byteswap_uint64(unsigned __int64);
169unsigned long __cdecl _byteswap_ulong(unsigned long);
170unsigned short __cdecl _byteswap_ushort(unsigned short);
171unsigned _bzhi_u32(unsigned int, unsigned int);
172void __cdecl _disable(void);
173void __cdecl _enable(void);
174void __cdecl _fxrstor(void const *);
175void __cdecl _fxsave(void *);
176long _InterlockedAddLargeStatistic(__int64 volatile *_Addend, long _Value);
177static __inline__
178long _InterlockedAnd(long volatile *_Value, long _Mask);
179static __inline__
180short _InterlockedAnd16(short volatile *_Value, short _Mask);
181static __inline__
182char _InterlockedAnd8(char volatile *_Value, char _Mask);
183unsigned char _interlockedbittestandreset(long volatile *, long);
184unsigned char _interlockedbittestandset(long volatile *, long);
185static __inline__
186long __cdecl _InterlockedCompareExchange(long volatile *_Destination,
187                                         long _Exchange, long _Comparand);
188long _InterlockedCompareExchange_HLEAcquire(long volatile *, long, long);
189long _InterlockedCompareExchange_HLERelease(long volatile *, long, long);
190static __inline__
191short _InterlockedCompareExchange16(short volatile *_Destination,
192                                    short _Exchange, short _Comparand);
193static __inline__
194__int64 _InterlockedCompareExchange64(__int64 volatile *_Destination,
195                                      __int64 _Exchange, __int64 _Comparand);
196__int64 _InterlockedcompareExchange64_HLEAcquire(__int64 volatile *, __int64,
197                                                 __int64);
198__int64 _InterlockedCompareExchange64_HLERelease(__int64 volatile *, __int64,
199                                                 __int64);
200static __inline__
201char _InterlockedCompareExchange8(char volatile *_Destination, char _Exchange,
202                                  char _Comparand);
203void *_InterlockedCompareExchangePointer_HLEAcquire(void *volatile *, void *,
204                                                    void *);
205void *_InterlockedCompareExchangePointer_HLERelease(void *volatile *, void *,
206                                                    void *);
207static __inline__
208long __cdecl _InterlockedDecrement(long volatile *_Addend);
209static __inline__
210short _InterlockedDecrement16(short volatile *_Addend);
211static __inline__
212long __cdecl _InterlockedExchange(long volatile *_Target, long _Value);
213static __inline__
214short _InterlockedExchange16(short volatile *_Target, short _Value);
215static __inline__
216char _InterlockedExchange8(char volatile *_Target, char _Value);
217static __inline__
218long __cdecl _InterlockedExchangeAdd(long volatile *_Addend, long _Value);
219long _InterlockedExchangeAdd_HLEAcquire(long volatile *, long);
220long _InterlockedExchangeAdd_HLERelease(long volatile *, long);
221static __inline__
222char _InterlockedExchangeAdd8(char volatile *_Addend, char _Value);
223static __inline__
224long __cdecl _InterlockedIncrement(long volatile *_Addend);
225static __inline__
226short _InterlockedIncrement16(short volatile *_Addend);
227static __inline__
228long _InterlockedOr(long volatile *_Value, long _Mask);
229static __inline__
230short _InterlockedOr16(short volatile *_Value, short _Mask);
231static __inline__
232char _InterlockedOr8(char volatile *_Value, char _Mask);
233static __inline__
234long _InterlockedXor(long volatile *_Value, long _Mask);
235static __inline__
236short _InterlockedXor16(short volatile *_Value, short _Mask);
237static __inline__
238char _InterlockedXor8(char volatile *_Value, char _Mask);
239void __cdecl _invpcid(unsigned int, void *);
240static __inline__
241unsigned long __cdecl _lrotl(unsigned long, int);
242static __inline__
243unsigned long __cdecl _lrotr(unsigned long, int);
244static __inline__
245unsigned int _lzcnt_u32(unsigned int);
246static __inline__
247void _ReadBarrier(void);
248static __inline__
249void _ReadWriteBarrier(void);
250static __inline__
251void *_ReturnAddress(void);
252unsigned int _rorx_u32(unsigned int, const unsigned int);
253int __cdecl _rdrand16_step(unsigned short *);
254int __cdecl _rdrand32_step(unsigned int *);
255static __inline__
256unsigned int __cdecl _rotl(unsigned int _Value, int _Shift);
257static __inline__
258unsigned short _rotl16(unsigned short _Value, unsigned char _Shift);
259static __inline__
260unsigned __int64 __cdecl _rotl64(unsigned __int64 _Value, int _Shift);
261static __inline__
262unsigned char _rotl8(unsigned char _Value, unsigned char _Shift);
263static __inline__
264unsigned int __cdecl _rotr(unsigned int _Value, int _Shift);
265static __inline__
266unsigned short _rotr16(unsigned short _Value, unsigned char _Shift);
267static __inline__
268unsigned __int64 __cdecl _rotr64(unsigned __int64 _Value, int _Shift);
269static __inline__
270unsigned char _rotr8(unsigned char _Value, unsigned char _Shift);
271int _sarx_i32(int, unsigned int);
272
273/* FIXME: Need definition for jmp_buf.
274   int __cdecl _setjmp(jmp_buf); */
275
276unsigned int _shlx_u32(unsigned int, unsigned int);
277unsigned int _shrx_u32(unsigned int, unsigned int);
278void _Store_HLERelease(long volatile *, long);
279void _Store64_HLERelease(__int64 volatile *, __int64);
280void _StorePointer_HLERelease(void *volatile *, void *);
281unsigned int _t1mskc_u32(unsigned int);
282unsigned int _tzcnt_u32(unsigned int);
283unsigned int _tzcnt_u32(unsigned int);
284unsigned int _tzmsk_u32(unsigned int);
285static __inline__
286void _WriteBarrier(void);
287void _xabort(const unsigned int imm);
288unsigned __int32 xbegin(void);
289void _xend(void);
290unsigned __int64 __cdecl _xgetbv(unsigned int);
291void __cdecl _xrstor(void const *, unsigned __int64);
292void __cdecl _xsave(void *, unsigned __int64);
293void __cdecl _xsaveopt(void *, unsigned __int64);
294void __cdecl _xsetbv(unsigned int, unsigned __int64);
295unsigned char _xtest(void);
296
297/* These additional intrinsics are turned on in x64/amd64/x86_64 mode. */
298#ifdef __x86_64__
299void __addgsbyte(unsigned long, unsigned char);
300void __addgsdword(unsigned long, unsigned long);
301void __addgsqword(unsigned long, unsigned __int64);
302void __addgsword(unsigned long, unsigned short);
303void __faststorefence(void);
304void __incgsbyte(unsigned long);
305void __incgsdword(unsigned long);
306void __incgsqword(unsigned long);
307void __incgsword(unsigned long);
308unsigned __int64 __popcnt64(unsigned __int64);
309unsigned __int64 __shiftleft128(unsigned __int64 _LowPart,
310                                unsigned __int64 _HighPart,
311                                unsigned char _Shift);
312unsigned __int64 __shiftright128(unsigned __int64 _LowPart,
313                                 unsigned __int64 _HighPart,
314                                 unsigned char _Shift);
315void __stosq(unsigned __int64 *, unsigned __int64, size_t);
316unsigned __int64 _andn_u64(unsigned __int64, unsigned __int64);
317unsigned __int64 _bextr_u64(unsigned __int64, unsigned int, unsigned int);
318unsigned __int64 _bextri_u64(unsigned __int64, unsigned int);
319static __inline__
320unsigned char _BitScanForward64(unsigned long *_Index, unsigned __int64 _Mask);
321static __inline__
322unsigned char _BitScanReverse64(unsigned long *_Index, unsigned __int64 _Mask);
323static __inline__
324unsigned char _bittest64(__int64 const *, __int64);
325static __inline__
326unsigned char _bittestandcomplement64(__int64 *, __int64);
327static __inline__
328unsigned char _bittestandreset64(__int64 *, __int64);
329static __inline__
330unsigned char _bittestandset64(__int64 *, __int64);
331unsigned __int64 _blcfill_u64(unsigned __int64);
332unsigned __int64 _blci_u64(unsigned __int64);
333unsigned __int64 _blcic_u64(unsigned __int64);
334unsigned __int64 _blcmsk_u64(unsigned __int64);
335unsigned __int64 _blcs_u64(unsigned __int64);
336unsigned __int64 _blsfill_u64(unsigned __int64);
337unsigned __int64 _blsi_u64(unsigned __int64);
338unsigned __int64 _blsic_u64(unsigned __int64);
339unsigned __int64 _blmsk_u64(unsigned __int64);
340unsigned __int64 _blsr_u64(unsigned __int64);
341unsigned __int64 __cdecl _byteswap_uint64(unsigned __int64);
342unsigned __int64 _bzhi_u64(unsigned __int64, unsigned int);
343void __cdecl _fxrstor64(void const *);
344void __cdecl _fxsave64(void *);
345long _InterlockedAnd_np(long volatile *_Value, long _Mask);
346short _InterlockedAnd16_np(short volatile *_Value, short _Mask);
347__int64 _InterlockedAnd64_np(__int64 volatile *_Value, __int64 _Mask);
348char _InterlockedAnd8_np(char volatile *_Value, char _Mask);
349unsigned char _interlockedbittestandreset64(__int64 volatile *, __int64);
350unsigned char _interlockedbittestandset64(__int64 volatile *, __int64);
351long _InterlockedCompareExchange_np(long volatile *_Destination, long _Exchange,
352                                    long _Comparand);
353unsigned char _InterlockedCompareExchange128(__int64 volatile *_Destination,
354                                             __int64 _ExchangeHigh,
355                                             __int64 _ExchangeLow,
356                                             __int64 *_CompareandResult);
357unsigned char _InterlockedCompareExchange128_np(__int64 volatile *_Destination,
358                                                __int64 _ExchangeHigh,
359                                                __int64 _ExchangeLow,
360                                                __int64 *_ComparandResult);
361short _InterlockedCompareExchange16_np(short volatile *_Destination,
362                                       short _Exchange, short _Comparand);
363__int64 _InterlockedCompareExchange64_np(__int64 volatile *_Destination,
364                                         __int64 _Exchange, __int64 _Comparand);
365void *_InterlockedCompareExchangePointer_np(void *volatile *_Destination,
366                                            void *_Exchange, void *_Comparand);
367long _InterlockedOr_np(long volatile *_Value, long _Mask);
368short _InterlockedOr16_np(short volatile *_Value, short _Mask);
369__int64 _InterlockedOr64_np(__int64 volatile *_Value, __int64 _Mask);
370char _InterlockedOr8_np(char volatile *_Value, char _Mask);
371long _InterlockedXor_np(long volatile *_Value, long _Mask);
372short _InterlockedXor16_np(short volatile *_Value, short _Mask);
373__int64 _InterlockedXor64_np(__int64 volatile *_Value, __int64 _Mask);
374char _InterlockedXor8_np(char volatile *_Value, char _Mask);
375unsigned __int64 _lzcnt_u64(unsigned __int64);
376__int64 _mul128(__int64 _Multiplier, __int64 _Multiplicand,
377                __int64 *_HighProduct);
378unsigned int __cdecl _readfsbase_u32(void);
379unsigned __int64 __cdecl _readfsbase_u64(void);
380unsigned int __cdecl _readgsbase_u32(void);
381unsigned __int64 __cdecl _readgsbase_u64(void);
382unsigned __int64 _rorx_u64(unsigned __int64, const unsigned int);
383unsigned __int64 _tzcnt_u64(unsigned __int64);
384unsigned __int64 _tzmsk_u64(unsigned __int64);
385unsigned __int64 _umul128(unsigned __int64 _Multiplier,
386                          unsigned __int64 _Multiplicand,
387                          unsigned __int64 *_HighProduct);
388void __cdecl _writefsbase_u32(unsigned int);
389void _cdecl _writefsbase_u64(unsigned __int64);
390void __cdecl _writegsbase_u32(unsigned int);
391void __cdecl _writegsbase_u64(unsigned __int64);
392void __cdecl _xrstor64(void const *, unsigned __int64);
393void __cdecl _xsave64(void *, unsigned __int64);
394void __cdecl _xsaveopt64(void *, unsigned __int64);
395
396#endif /* __x86_64__ */
397
398/*----------------------------------------------------------------------------*\
399|* Bit Twiddling
400\*----------------------------------------------------------------------------*/
401static __inline__ unsigned char __attribute__((__always_inline__, __nodebug__))
402_rotl8(unsigned char _Value, unsigned char _Shift) {
403  _Shift &= 0x7;
404  return _Shift ? (_Value << _Shift) | (_Value >> (8 - _Shift)) : _Value;
405}
406static __inline__ unsigned char __attribute__((__always_inline__, __nodebug__))
407_rotr8(unsigned char _Value, unsigned char _Shift) {
408  _Shift &= 0x7;
409  return _Shift ? (_Value >> _Shift) | (_Value << (8 - _Shift)) : _Value;
410}
411static __inline__ unsigned short __attribute__((__always_inline__, __nodebug__))
412_rotl16(unsigned short _Value, unsigned char _Shift) {
413  _Shift &= 0xf;
414  return _Shift ? (_Value << _Shift) | (_Value >> (16 - _Shift)) : _Value;
415}
416static __inline__ unsigned short __attribute__((__always_inline__, __nodebug__))
417_rotr16(unsigned short _Value, unsigned char _Shift) {
418  _Shift &= 0xf;
419  return _Shift ? (_Value >> _Shift) | (_Value << (16 - _Shift)) : _Value;
420}
421static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__))
422_rotl(unsigned int _Value, int _Shift) {
423  _Shift &= 0x1f;
424  return _Shift ? (_Value << _Shift) | (_Value >> (32 - _Shift)) : _Value;
425}
426static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__))
427_rotr(unsigned int _Value, int _Shift) {
428  _Shift &= 0x1f;
429  return _Shift ? (_Value >> _Shift) | (_Value << (32 - _Shift)) : _Value;
430}
431static __inline__ unsigned long __attribute__((__always_inline__, __nodebug__))
432_lrotl(unsigned long _Value, int _Shift) {
433  _Shift &= 0x1f;
434  return _Shift ? (_Value << _Shift) | (_Value >> (32 - _Shift)) : _Value;
435}
436static __inline__ unsigned long __attribute__((__always_inline__, __nodebug__))
437_lrotr(unsigned long _Value, int _Shift) {
438  _Shift &= 0x1f;
439  return _Shift ? (_Value >> _Shift) | (_Value << (32 - _Shift)) : _Value;
440}
441static
442__inline__ unsigned __int64 __attribute__((__always_inline__, __nodebug__))
443_rotl64(unsigned __int64 _Value, int _Shift) {
444  _Shift &= 0x3f;
445  return _Shift ? (_Value << _Shift) | (_Value >> (64 - _Shift)) : _Value;
446}
447static
448__inline__ unsigned __int64 __attribute__((__always_inline__, __nodebug__))
449_rotr64(unsigned __int64 _Value, int _Shift) {
450  _Shift &= 0x3f;
451  return _Shift ? (_Value >> _Shift) | (_Value << (64 - _Shift)) : _Value;
452}
453/*----------------------------------------------------------------------------*\
454|* Bit Counting and Testing
455\*----------------------------------------------------------------------------*/
456static __inline__ unsigned char __attribute__((__always_inline__, __nodebug__))
457_BitScanForward(unsigned long *_Index, unsigned long _Mask) {
458  if (!_Mask)
459    return 0;
460  *_Index = __builtin_ctzl(_Mask);
461  return 1;
462}
463static __inline__ unsigned char __attribute__((__always_inline__, __nodebug__))
464_BitScanReverse(unsigned long *_Index, unsigned long _Mask) {
465  if (!_Mask)
466    return 0;
467  *_Index = 31 - __builtin_clzl(_Mask);
468  return 1;
469}
470static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__))
471_lzcnt_u32(unsigned int a) {
472  if (!a)
473    return 32;
474  return __builtin_clzl(a);
475}
476static __inline__ unsigned short __attribute__((__always_inline__, __nodebug__))
477__popcnt16(unsigned short value) {
478  return __builtin_popcount((int)value);
479}
480static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__))
481__popcnt(unsigned int value) {
482  return __builtin_popcount(value);
483}
484static __inline__ unsigned char __attribute__((__always_inline__, __nodebug__))
485_bittest(long const *a, long b) {
486  return (*a >> b) & 1;
487}
488static __inline__ unsigned char __attribute__((__always_inline__, __nodebug__))
489_bittestandcomplement(long *a, long b) {
490  unsigned char x = (*a >> b) & 1;
491  *a = *a ^ (1 << b);
492  return x;
493}
494static __inline__ unsigned char __attribute__((__always_inline__, __nodebug__))
495_bittestandreset(long *a, long b) {
496  unsigned char x = (*a >> b) & 1;
497  *a = *a & ~(1 << b);
498  return x;
499}
500static __inline__ unsigned char __attribute__((__always_inline__, __nodebug__))
501_bittestandset(long *a, long b) {
502  unsigned char x = (*a >> b) & 1;
503  *a = *a | (1 << b);
504  return x;
505}
506#ifdef __x86_64__
507static __inline__ unsigned char __attribute__((__always_inline__, __nodebug__))
508_BitScanForward64(unsigned long *_Index, unsigned __int64 _Mask) {
509  if (!_Mask)
510    return 0;
511  *_Index = __builtin_ctzll(_Mask);
512  return 1;
513}
514static __inline__ unsigned char __attribute__((__always_inline__, __nodebug__))
515_BitScanReverse64(unsigned long *_Index, unsigned __int64 _Mask) {
516  if (!_Mask)
517    return 0;
518  *_Index = 63 - __builtin_clzll(_Mask);
519  return 1;
520}
521static
522__inline__ unsigned __int64 __attribute__((__always_inline__, __nodebug__))
523_lzcnt_u64(unsigned __int64 a) {
524  if (!a)
525    return 64;
526  return __builtin_clzll(a);
527}
528static __inline__
529unsigned __int64 __attribute__((__always_inline__, __nodebug__))
530 __popcnt64(unsigned __int64 value) {
531  return __builtin_popcountll(value);
532}
533static __inline__ unsigned char __attribute__((__always_inline__, __nodebug__))
534_bittest64(__int64 const *a, __int64 b) {
535  return (*a >> b) & 1;
536}
537static __inline__ unsigned char __attribute__((__always_inline__, __nodebug__))
538_bittestandcomplement64(__int64 *a, __int64 b) {
539  unsigned char x = (*a >> b) & 1;
540  *a = *a ^ (1ll << b);
541  return x;
542}
543static __inline__ unsigned char __attribute__((__always_inline__, __nodebug__))
544_bittestandreset64(__int64 *a, __int64 b) {
545  unsigned char x = (*a >> b) & 1;
546  *a = *a & ~(1ll << b);
547  return x;
548}
549static __inline__ unsigned char __attribute__((__always_inline__, __nodebug__))
550_bittestandset64(__int64 *a, __int64 b) {
551  unsigned char x = (*a >> b) & 1;
552  *a = *a | (1ll << b);
553  return x;
554}
555#endif
556/*----------------------------------------------------------------------------*\
557|* Interlocked Exchange Add
558\*----------------------------------------------------------------------------*/
559static __inline__ char __attribute__((__always_inline__, __nodebug__))
560_InterlockedExchangeAdd8(char volatile *_Addend, char _Value) {
561  return __atomic_add_fetch(_Addend, _Value, 0) - _Value;
562}
563static __inline__ short __attribute__((__always_inline__, __nodebug__))
564_InterlockedExchangeAdd16(short volatile *_Addend, short _Value) {
565  return __atomic_add_fetch(_Addend, _Value, 0) - _Value;
566}
567static __inline__ long __attribute__((__always_inline__, __nodebug__))
568_InterlockedExchangeAdd(long volatile *_Addend, long _Value) {
569  return __atomic_add_fetch(_Addend, _Value, 0) - _Value;
570}
571#ifdef __x86_64__
572static __inline__ __int64 __attribute__((__always_inline__, __nodebug__))
573_InterlockedExchangeAdd64(__int64 volatile *_Addend, __int64 _Value) {
574  return __atomic_add_fetch(_Addend, _Value, 0) - _Value;
575}
576#endif
577/*----------------------------------------------------------------------------*\
578|* Interlocked Exchange Sub
579\*----------------------------------------------------------------------------*/
580static __inline__ char __attribute__((__always_inline__, __nodebug__))
581_InterlockedExchangeSub8(char volatile *_Subend, char _Value) {
582  return __atomic_sub_fetch(_Subend, _Value, 0) + _Value;
583}
584static __inline__ short __attribute__((__always_inline__, __nodebug__))
585_InterlockedExchangeSub16(short volatile *_Subend, short _Value) {
586  return __atomic_sub_fetch(_Subend, _Value, 0) + _Value;
587}
588static __inline__ long __attribute__((__always_inline__, __nodebug__))
589_InterlockedExchangeSub(long volatile *_Subend, long _Value) {
590  return __atomic_sub_fetch(_Subend, _Value, 0) + _Value;
591}
592#ifdef __x86_64__
593static __inline__ __int64 __attribute__((__always_inline__, __nodebug__))
594_InterlockedExchangeSub64(__int64 volatile *_Subend, __int64 _Value) {
595  return __atomic_sub_fetch(_Subend, _Value, 0) + _Value;
596}
597#endif
598/*----------------------------------------------------------------------------*\
599|* Interlocked Increment
600\*----------------------------------------------------------------------------*/
601static __inline__ char __attribute__((__always_inline__, __nodebug__))
602_InterlockedIncrement16(char volatile *_Value) {
603  return __atomic_add_fetch(_Value, 1, 0);
604}
605static __inline__ long __attribute__((__always_inline__, __nodebug__))
606_InterlockedIncrement(long volatile *_Value) {
607  return __atomic_add_fetch(_Value, 1, 0);
608}
609#ifdef __x86_64__
610static __inline__ __int64 __attribute__((__always_inline__, __nodebug__))
611_InterlockedIncrement64(__int64 volatile *_Value) {
612  return __atomic_add_fetch(_Value, 1, 0);
613}
614#endif
615/*----------------------------------------------------------------------------*\
616|* Interlocked Decrement
617\*----------------------------------------------------------------------------*/
618static __inline__ char __attribute__((__always_inline__, __nodebug__))
619_InterlockedDecrement16(char volatile *_Value) {
620  return __atomic_sub_fetch(_Value, 1, 0);
621}
622static __inline__ long __attribute__((__always_inline__, __nodebug__))
623_InterlockedDecrement(long volatile *_Value) {
624  return __atomic_sub_fetch(_Value, 1, 0);
625}
626#ifdef __x86_64__
627static __inline__ __int64 __attribute__((__always_inline__, __nodebug__))
628_InterlockedDecrement64(__int64 volatile *_Value) {
629  return __atomic_sub_fetch(_Value, 1, 0);
630}
631#endif
632/*----------------------------------------------------------------------------*\
633|* Interlocked And
634\*----------------------------------------------------------------------------*/
635static __inline__ char __attribute__((__always_inline__, __nodebug__))
636_InterlockedAnd8(char volatile *_Value, char _Mask) {
637  return __atomic_and_fetch(_Value, _Mask, 0);
638}
639static __inline__ short __attribute__((__always_inline__, __nodebug__))
640_InterlockedAnd16(short volatile *_Value, short _Mask) {
641  return __atomic_and_fetch(_Value, _Mask, 0);
642}
643static __inline__ long __attribute__((__always_inline__, __nodebug__))
644_InterlockedAnd(long volatile *_Value, long _Mask) {
645  return __atomic_and_fetch(_Value, _Mask, 0);
646}
647#ifdef __x86_64__
648static __inline__ __int64 __attribute__((__always_inline__, __nodebug__))
649_InterlockedAnd64(__int64 volatile *_Value, __int64 _Mask) {
650  return __atomic_and_fetch(_Value, _Mask, 0);
651}
652#endif
653/*----------------------------------------------------------------------------*\
654|* Interlocked Or
655\*----------------------------------------------------------------------------*/
656static __inline__ char __attribute__((__always_inline__, __nodebug__))
657_InterlockedOr8(char volatile *_Value, char _Mask) {
658  return __atomic_or_fetch(_Value, _Mask, 0);
659}
660static __inline__ short __attribute__((__always_inline__, __nodebug__))
661_InterlockedOr16(short volatile *_Value, short _Mask) {
662  return __atomic_or_fetch(_Value, _Mask, 0);
663}
664static __inline__ long __attribute__((__always_inline__, __nodebug__))
665_InterlockedOr(long volatile *_Value, long _Mask) {
666  return __atomic_or_fetch(_Value, _Mask, 0);
667}
668#ifdef __x86_64__
669static __inline__ __int64 __attribute__((__always_inline__, __nodebug__))
670_InterlockedOr64(__int64 volatile *_Value, __int64 _Mask) {
671  return __atomic_or_fetch(_Value, _Mask, 0);
672}
673#endif
674/*----------------------------------------------------------------------------*\
675|* Interlocked Xor
676\*----------------------------------------------------------------------------*/
677static __inline__ char __attribute__((__always_inline__, __nodebug__))
678_InterlockedXor8(char volatile *_Value, char _Mask) {
679  return __atomic_xor_fetch(_Value, _Mask, 0);
680}
681static __inline__ short __attribute__((__always_inline__, __nodebug__))
682_InterlockedXor16(short volatile *_Value, short _Mask) {
683  return __atomic_xor_fetch(_Value, _Mask, 0);
684}
685static __inline__ long __attribute__((__always_inline__, __nodebug__))
686_InterlockedXor(long volatile *_Value, long _Mask) {
687  return __atomic_xor_fetch(_Value, _Mask, 0);
688}
689#ifdef __x86_64__
690static __inline__ __int64 __attribute__((__always_inline__, __nodebug__))
691_InterlockedXor64(__int64 volatile *_Value, __int64 _Mask) {
692  return __atomic_xor_fetch(_Value, _Mask, 0);
693}
694#endif
695/*----------------------------------------------------------------------------*\
696|* Interlocked Exchange
697\*----------------------------------------------------------------------------*/
698static __inline__ char __attribute__((__always_inline__, __nodebug__))
699_InterlockedExchange8(char volatile *_Target, char _Value) {
700  __atomic_exchange(_Target, &_Value, &_Value, 0);
701  return _Value;
702}
703static __inline__ short __attribute__((__always_inline__, __nodebug__))
704_InterlockedExchange16(short volatile *_Target, short _Value) {
705  __atomic_exchange(_Target, &_Value, &_Value, 0);
706  return _Value;
707}
708static __inline__ long __attribute__((__always_inline__, __nodebug__))
709_InterlockedExchange(long volatile *_Target, long _Value) {
710  __atomic_exchange(_Target, &_Value, &_Value, 0);
711  return _Value;
712}
713#ifdef __x86_64__
714static __inline__ __int64 __attribute__((__always_inline__, __nodebug__))
715_InterlockedExchange64(__int64 volatile *_Target, __int64 _Value) {
716  __atomic_exchange(_Target, &_Value, &_Value, 0);
717  return _Value;
718}
719#endif
720/*----------------------------------------------------------------------------*\
721|* Interlocked Compare Exchange
722\*----------------------------------------------------------------------------*/
723static __inline__ char __attribute__((__always_inline__, __nodebug__))
724_InterlockedCompareExchange8(char volatile *_Destination,
725                             char _Exchange, char _Comparand) {
726  __atomic_compare_exchange(_Destination, &_Comparand, &_Exchange, 0, 0, 0);
727  return _Comparand;
728}
729static __inline__ short __attribute__((__always_inline__, __nodebug__))
730_InterlockedCompareExchange16(short volatile *_Destination,
731                              short _Exchange, short _Comparand) {
732  __atomic_compare_exchange(_Destination, &_Comparand, &_Exchange, 0, 0, 0);
733  return _Comparand;
734}
735static __inline__ long __attribute__((__always_inline__, __nodebug__))
736_InterlockedCompareExchange(long volatile *_Destination,
737                            long _Exchange, long _Comparand) {
738  __atomic_compare_exchange(_Destination, &_Comparand, &_Exchange, 0, 0, 0);
739  return _Comparand;
740}
741#ifdef __x86_64__
742static __inline__ __int64 __attribute__((__always_inline__, __nodebug__))
743_InterlockedCompareExchange64(__int64 volatile *_Destination,
744                              __int64 _Exchange, __int64 _Comparand) {
745  __atomic_compare_exchange(_Destination, &_Comparand, &_Exchange, 0, 0, 0);
746  return _Comparand;
747}
748#endif
749/*----------------------------------------------------------------------------*\
750|* Barriers
751\*----------------------------------------------------------------------------*/
752static __inline__ void __attribute__((__always_inline__, __nodebug__))
753__attribute__((deprecated("use other intrinsics or C++11 atomics instead")))
754_ReadWriteBarrier(void) {
755  __asm__ volatile ("" : : : "memory");
756}
757static __inline__ void __attribute__((__always_inline__, __nodebug__))
758__attribute__((deprecated("use other intrinsics or C++11 atomics instead")))
759_ReadBarrier(void) {
760  __asm__ volatile ("" : : : "memory");
761}
762static __inline__ void __attribute__((__always_inline__, __nodebug__))
763__attribute__((deprecated("use other intrinsics or C++11 atomics instead")))
764_WriteBarrier(void) {
765  __asm__ volatile ("" : : : "memory");
766}
767/*----------------------------------------------------------------------------*\
768|* Misc
769\*----------------------------------------------------------------------------*/
770static __inline__ void * __attribute__((__always_inline__, __nodebug__))
771_AddressOfReturnAddress(void) {
772  return (void*)((char*)__builtin_frame_address(0) + sizeof(void*));
773}
774static __inline__ void * __attribute__((__always_inline__, __nodebug__))
775_ReturnAddress(void) {
776  return __builtin_return_address(0);
777}
778
779#ifdef __cplusplus
780}
781#endif
782
783#endif /* __INTRIN_H */
784#endif /* _MSC_VER */
785