1/*===---- bmiintrin.h - BMI intrinsics -------------------------------------===
2 *
3 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 * See https://llvm.org/LICENSE.txt for license information.
5 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 *
7 *===-----------------------------------------------------------------------===
8 */
9
10#if !defined __X86INTRIN_H && !defined __IMMINTRIN_H
11#error "Never use <bmiintrin.h> directly; include <x86intrin.h> instead."
12#endif
13
14#ifndef __BMIINTRIN_H
15#define __BMIINTRIN_H
16
17/* Allow using the tzcnt intrinsics even for non-BMI targets. Since the TZCNT
18   instruction behaves as BSF on non-BMI targets, there is code that expects
19   to use it as a potentially faster version of BSF. */
20#define __RELAXED_FN_ATTRS __attribute__((__always_inline__, __nodebug__))
21
22#define _tzcnt_u16(a)     (__tzcnt_u16((a)))
23
24/// Counts the number of trailing zero bits in the operand.
25///
26/// \headerfile <x86intrin.h>
27///
28/// This intrinsic corresponds to the <c> TZCNT </c> instruction.
29///
30/// \param __X
31///    An unsigned 16-bit integer whose trailing zeros are to be counted.
32/// \returns An unsigned 16-bit integer containing the number of trailing zero
33///    bits in the operand.
34static __inline__ unsigned short __RELAXED_FN_ATTRS
35__tzcnt_u16(unsigned short __X)
36{
37  return __builtin_ia32_tzcnt_u16(__X);
38}
39
40/// Counts the number of trailing zero bits in the operand.
41///
42/// \headerfile <x86intrin.h>
43///
44/// This intrinsic corresponds to the <c> TZCNT </c> instruction.
45///
46/// \param __X
47///    An unsigned 32-bit integer whose trailing zeros are to be counted.
48/// \returns An unsigned 32-bit integer containing the number of trailing zero
49///    bits in the operand.
50/// \see _mm_tzcnt_32
51static __inline__ unsigned int __RELAXED_FN_ATTRS
52__tzcnt_u32(unsigned int __X)
53{
54  return __builtin_ia32_tzcnt_u32(__X);
55}
56
57/// Counts the number of trailing zero bits in the operand.
58///
59/// \headerfile <x86intrin.h>
60///
61/// This intrinsic corresponds to the <c> TZCNT </c> instruction.
62///
63/// \param __X
64///    An unsigned 32-bit integer whose trailing zeros are to be counted.
65/// \returns An 32-bit integer containing the number of trailing zero bits in
66///    the operand.
67/// \see __tzcnt_u32
68static __inline__ int __RELAXED_FN_ATTRS
69_mm_tzcnt_32(unsigned int __X)
70{
71  return (int)__builtin_ia32_tzcnt_u32(__X);
72}
73
74#define _tzcnt_u32(a)     (__tzcnt_u32((a)))
75
76#ifdef __x86_64__
77
78/// Counts the number of trailing zero bits in the operand.
79///
80/// \headerfile <x86intrin.h>
81///
82/// This intrinsic corresponds to the <c> TZCNT </c> instruction.
83///
84/// \param __X
85///    An unsigned 64-bit integer whose trailing zeros are to be counted.
86/// \returns An unsigned 64-bit integer containing the number of trailing zero
87///    bits in the operand.
88/// \see _mm_tzcnt_64
89static __inline__ unsigned long long __RELAXED_FN_ATTRS
90__tzcnt_u64(unsigned long long __X)
91{
92  return __builtin_ia32_tzcnt_u64(__X);
93}
94
95/// Counts the number of trailing zero bits in the operand.
96///
97/// \headerfile <x86intrin.h>
98///
99/// This intrinsic corresponds to the <c> TZCNT </c> instruction.
100///
101/// \param __X
102///    An unsigned 64-bit integer whose trailing zeros are to be counted.
103/// \returns An 64-bit integer containing the number of trailing zero bits in
104///    the operand.
105/// \see __tzcnt_u64
106static __inline__ long long __RELAXED_FN_ATTRS
107_mm_tzcnt_64(unsigned long long __X)
108{
109  return (long long)__builtin_ia32_tzcnt_u64(__X);
110}
111
112#define _tzcnt_u64(a)     (__tzcnt_u64((a)))
113
114#endif /* __x86_64__ */
115
116#undef __RELAXED_FN_ATTRS
117
118#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) ||      \
119    defined(__BMI__)
120
121/* Define the default attributes for the functions in this file. */
122#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("bmi")))
123
124#define _andn_u32(a, b)   (__andn_u32((a), (b)))
125
126/* _bextr_u32 != __bextr_u32 */
127#define _blsi_u32(a)      (__blsi_u32((a)))
128
129#define _blsmsk_u32(a)    (__blsmsk_u32((a)))
130
131#define _blsr_u32(a)      (__blsr_u32((a)))
132
133/// Performs a bitwise AND of the second operand with the one's
134///    complement of the first operand.
135///
136/// \headerfile <x86intrin.h>
137///
138/// This intrinsic corresponds to the <c> ANDN </c> instruction.
139///
140/// \param __X
141///    An unsigned integer containing one of the operands.
142/// \param __Y
143///    An unsigned integer containing one of the operands.
144/// \returns An unsigned integer containing the bitwise AND of the second
145///    operand with the one's complement of the first operand.
146static __inline__ unsigned int __DEFAULT_FN_ATTRS
147__andn_u32(unsigned int __X, unsigned int __Y)
148{
149  return ~__X & __Y;
150}
151
152/* AMD-specified, double-leading-underscore version of BEXTR */
153/// Extracts the specified bits from the first operand and returns them
154///    in the least significant bits of the result.
155///
156/// \headerfile <x86intrin.h>
157///
158/// This intrinsic corresponds to the <c> BEXTR </c> instruction.
159///
160/// \param __X
161///    An unsigned integer whose bits are to be extracted.
162/// \param __Y
163///    An unsigned integer used to specify which bits are extracted. Bits [7:0]
164///    specify the index of the least significant bit. Bits [15:8] specify the
165///    number of bits to be extracted.
166/// \returns An unsigned integer whose least significant bits contain the
167///    extracted bits.
168/// \see _bextr_u32
169static __inline__ unsigned int __DEFAULT_FN_ATTRS
170__bextr_u32(unsigned int __X, unsigned int __Y)
171{
172  return __builtin_ia32_bextr_u32(__X, __Y);
173}
174
175/* Intel-specified, single-leading-underscore version of BEXTR */
176/// Extracts the specified bits from the first operand and returns them
177///    in the least significant bits of the result.
178///
179/// \headerfile <x86intrin.h>
180///
181/// This intrinsic corresponds to the <c> BEXTR </c> instruction.
182///
183/// \param __X
184///    An unsigned integer whose bits are to be extracted.
185/// \param __Y
186///    An unsigned integer used to specify the index of the least significant
187///    bit for the bits to be extracted. Bits [7:0] specify the index.
188/// \param __Z
189///    An unsigned integer used to specify the number of bits to be extracted.
190///    Bits [7:0] specify the number of bits.
191/// \returns An unsigned integer whose least significant bits contain the
192///    extracted bits.
193/// \see __bextr_u32
194static __inline__ unsigned int __DEFAULT_FN_ATTRS
195_bextr_u32(unsigned int __X, unsigned int __Y, unsigned int __Z)
196{
197  return __builtin_ia32_bextr_u32 (__X, ((__Y & 0xff) | ((__Z & 0xff) << 8)));
198}
199
200/* Intel-specified, single-leading-underscore version of BEXTR2 */
201/// Extracts the specified bits from the first operand and returns them
202///    in the least significant bits of the result.
203///
204/// \headerfile <x86intrin.h>
205///
206/// This intrinsic corresponds to the <c> BEXTR </c> instruction.
207///
208/// \param __X
209///    An unsigned integer whose bits are to be extracted.
210/// \param __Y
211///    An unsigned integer used to specify which bits are extracted. Bits [7:0]
212///    specify the index of the least significant bit. Bits [15:8] specify the
213///    number of bits to be extracted.
214/// \returns An unsigned integer whose least significant bits contain the
215///    extracted bits.
216/// \see __bextr_u32
217static __inline__ unsigned int __DEFAULT_FN_ATTRS
218_bextr2_u32(unsigned int __X, unsigned int __Y) {
219  return __builtin_ia32_bextr_u32(__X, __Y);
220}
221
222/// Clears all bits in the source except for the least significant bit
223///    containing a value of 1 and returns the result.
224///
225/// \headerfile <x86intrin.h>
226///
227/// This intrinsic corresponds to the <c> BLSI </c> instruction.
228///
229/// \param __X
230///    An unsigned integer whose bits are to be cleared.
231/// \returns An unsigned integer containing the result of clearing the bits from
232///    the source operand.
233static __inline__ unsigned int __DEFAULT_FN_ATTRS
234__blsi_u32(unsigned int __X)
235{
236  return __X & -__X;
237}
238
239/// Creates a mask whose bits are set to 1, using bit 0 up to and
240///    including the least significant bit that is set to 1 in the source
241///    operand and returns the result.
242///
243/// \headerfile <x86intrin.h>
244///
245/// This intrinsic corresponds to the <c> BLSMSK </c> instruction.
246///
247/// \param __X
248///    An unsigned integer used to create the mask.
249/// \returns An unsigned integer containing the newly created mask.
250static __inline__ unsigned int __DEFAULT_FN_ATTRS
251__blsmsk_u32(unsigned int __X)
252{
253  return __X ^ (__X - 1);
254}
255
256/// Clears the least significant bit that is set to 1 in the source
257///    operand and returns the result.
258///
259/// \headerfile <x86intrin.h>
260///
261/// This intrinsic corresponds to the <c> BLSR </c> instruction.
262///
263/// \param __X
264///    An unsigned integer containing the operand to be cleared.
265/// \returns An unsigned integer containing the result of clearing the source
266///    operand.
267static __inline__ unsigned int __DEFAULT_FN_ATTRS
268__blsr_u32(unsigned int __X)
269{
270  return __X & (__X - 1);
271}
272
273#ifdef __x86_64__
274
275#define _andn_u64(a, b)   (__andn_u64((a), (b)))
276
277/* _bextr_u64 != __bextr_u64 */
278#define _blsi_u64(a)      (__blsi_u64((a)))
279
280#define _blsmsk_u64(a)    (__blsmsk_u64((a)))
281
282#define _blsr_u64(a)      (__blsr_u64((a)))
283
284/// Performs a bitwise AND of the second operand with the one's
285///    complement of the first operand.
286///
287/// \headerfile <x86intrin.h>
288///
289/// This intrinsic corresponds to the <c> ANDN </c> instruction.
290///
291/// \param __X
292///    An unsigned 64-bit integer containing one of the operands.
293/// \param __Y
294///    An unsigned 64-bit integer containing one of the operands.
295/// \returns An unsigned 64-bit integer containing the bitwise AND of the second
296///    operand with the one's complement of the first operand.
297static __inline__ unsigned long long __DEFAULT_FN_ATTRS
298__andn_u64 (unsigned long long __X, unsigned long long __Y)
299{
300  return ~__X & __Y;
301}
302
303/* AMD-specified, double-leading-underscore version of BEXTR */
304/// Extracts the specified bits from the first operand and returns them
305///    in the least significant bits of the result.
306///
307/// \headerfile <x86intrin.h>
308///
309/// This intrinsic corresponds to the <c> BEXTR </c> instruction.
310///
311/// \param __X
312///    An unsigned 64-bit integer whose bits are to be extracted.
313/// \param __Y
314///    An unsigned 64-bit integer used to specify which bits are extracted. Bits
315///    [7:0] specify the index of the least significant bit. Bits [15:8] specify
316///    the number of bits to be extracted.
317/// \returns An unsigned 64-bit integer whose least significant bits contain the
318///    extracted bits.
319/// \see _bextr_u64
320static __inline__ unsigned long long __DEFAULT_FN_ATTRS
321__bextr_u64(unsigned long long __X, unsigned long long __Y)
322{
323  return __builtin_ia32_bextr_u64(__X, __Y);
324}
325
326/* Intel-specified, single-leading-underscore version of BEXTR */
327/// Extracts the specified bits from the first operand and returns them
328///     in the least significant bits of the result.
329///
330/// \headerfile <x86intrin.h>
331///
332/// This intrinsic corresponds to the <c> BEXTR </c> instruction.
333///
334/// \param __X
335///    An unsigned 64-bit integer whose bits are to be extracted.
336/// \param __Y
337///    An unsigned integer used to specify the index of the least significant
338///    bit for the bits to be extracted. Bits [7:0] specify the index.
339/// \param __Z
340///    An unsigned integer used to specify the number of bits to be extracted.
341///    Bits [7:0] specify the number of bits.
342/// \returns An unsigned 64-bit integer whose least significant bits contain the
343///    extracted bits.
344/// \see __bextr_u64
345static __inline__ unsigned long long __DEFAULT_FN_ATTRS
346_bextr_u64(unsigned long long __X, unsigned int __Y, unsigned int __Z)
347{
348  return __builtin_ia32_bextr_u64 (__X, ((__Y & 0xff) | ((__Z & 0xff) << 8)));
349}
350
351/* Intel-specified, single-leading-underscore version of BEXTR2 */
352/// Extracts the specified bits from the first operand and returns them
353///    in the least significant bits of the result.
354///
355/// \headerfile <x86intrin.h>
356///
357/// This intrinsic corresponds to the <c> BEXTR </c> instruction.
358///
359/// \param __X
360///    An unsigned 64-bit integer whose bits are to be extracted.
361/// \param __Y
362///    An unsigned 64-bit integer used to specify which bits are extracted. Bits
363///    [7:0] specify the index of the least significant bit. Bits [15:8] specify
364///    the number of bits to be extracted.
365/// \returns An unsigned 64-bit integer whose least significant bits contain the
366///    extracted bits.
367/// \see __bextr_u64
368static __inline__ unsigned long long __DEFAULT_FN_ATTRS
369_bextr2_u64(unsigned long long __X, unsigned long long __Y) {
370  return __builtin_ia32_bextr_u64(__X, __Y);
371}
372
373/// Clears all bits in the source except for the least significant bit
374///    containing a value of 1 and returns the result.
375///
376/// \headerfile <x86intrin.h>
377///
378/// This intrinsic corresponds to the <c> BLSI </c> instruction.
379///
380/// \param __X
381///    An unsigned 64-bit integer whose bits are to be cleared.
382/// \returns An unsigned 64-bit integer containing the result of clearing the
383///    bits from the source operand.
384static __inline__ unsigned long long __DEFAULT_FN_ATTRS
385__blsi_u64(unsigned long long __X)
386{
387  return __X & -__X;
388}
389
390/// Creates a mask whose bits are set to 1, using bit 0 up to and
391///    including the least significant bit that is set to 1 in the source
392///    operand and returns the result.
393///
394/// \headerfile <x86intrin.h>
395///
396/// This intrinsic corresponds to the <c> BLSMSK </c> instruction.
397///
398/// \param __X
399///    An unsigned 64-bit integer used to create the mask.
400/// \returns An unsigned 64-bit integer containing the newly created mask.
401static __inline__ unsigned long long __DEFAULT_FN_ATTRS
402__blsmsk_u64(unsigned long long __X)
403{
404  return __X ^ (__X - 1);
405}
406
407/// Clears the least significant bit that is set to 1 in the source
408///    operand and returns the result.
409///
410/// \headerfile <x86intrin.h>
411///
412/// This intrinsic corresponds to the <c> BLSR </c> instruction.
413///
414/// \param __X
415///    An unsigned 64-bit integer containing the operand to be cleared.
416/// \returns An unsigned 64-bit integer containing the result of clearing the
417///    source operand.
418static __inline__ unsigned long long __DEFAULT_FN_ATTRS
419__blsr_u64(unsigned long long __X)
420{
421  return __X & (__X - 1);
422}
423
424#endif /* __x86_64__ */
425
426#undef __DEFAULT_FN_ATTRS
427
428#endif /* !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules)   \
429          || defined(__BMI__) */
430
431#endif /* __BMIINTRIN_H */
432