1/*===---- bmiintrin.h - BMI intrinsics -------------------------------------===
2 *
3 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 * See https://llvm.org/LICENSE.txt for license information.
5 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 *
7 *===-----------------------------------------------------------------------===
8 */
9
10#if !defined __X86INTRIN_H && !defined __IMMINTRIN_H
11#error "Never use <bmiintrin.h> directly; include <x86intrin.h> instead."
12#endif
13
14#ifndef __BMIINTRIN_H
15#define __BMIINTRIN_H
16
17/* Allow using the tzcnt intrinsics even for non-BMI targets. Since the TZCNT
18   instruction behaves as BSF on non-BMI targets, there is code that expects
19   to use it as a potentially faster version of BSF. */
20#define __RELAXED_FN_ATTRS __attribute__((__always_inline__, __nodebug__))
21
22#define _tzcnt_u16(a)     (__tzcnt_u16((a)))
23
24/// Counts the number of trailing zero bits in the operand.
25///
26/// \headerfile <x86intrin.h>
27///
28/// This intrinsic corresponds to the <c> TZCNT </c> instruction.
29///
30/// \param __X
31///    An unsigned 16-bit integer whose trailing zeros are to be counted.
32/// \returns An unsigned 16-bit integer containing the number of trailing zero
33///    bits in the operand.
34static __inline__ unsigned short __RELAXED_FN_ATTRS
35__tzcnt_u16(unsigned short __X)
36{
37  return __builtin_ia32_tzcnt_u16(__X);
38}
39
40/// Counts the number of trailing zero bits in the operand.
41///
42/// \headerfile <x86intrin.h>
43///
44/// This intrinsic corresponds to the <c> TZCNT </c> instruction.
45///
46/// \param __X
47///    An unsigned 32-bit integer whose trailing zeros are to be counted.
48/// \returns An unsigned 32-bit integer containing the number of trailing zero
49///    bits in the operand.
50static __inline__ unsigned int __RELAXED_FN_ATTRS
51__tzcnt_u32(unsigned int __X)
52{
53  return __builtin_ia32_tzcnt_u32(__X);
54}
55
56/// Counts the number of trailing zero bits in the operand.
57///
58/// \headerfile <x86intrin.h>
59///
60/// This intrinsic corresponds to the <c> TZCNT </c> instruction.
61///
62/// \param __X
63///    An unsigned 32-bit integer whose trailing zeros are to be counted.
64/// \returns An 32-bit integer containing the number of trailing zero bits in
65///    the operand.
66static __inline__ int __RELAXED_FN_ATTRS
67_mm_tzcnt_32(unsigned int __X)
68{
69  return __builtin_ia32_tzcnt_u32(__X);
70}
71
72#define _tzcnt_u32(a)     (__tzcnt_u32((a)))
73
74#ifdef __x86_64__
75
76/// Counts the number of trailing zero bits in the operand.
77///
78/// \headerfile <x86intrin.h>
79///
80/// This intrinsic corresponds to the <c> TZCNT </c> instruction.
81///
82/// \param __X
83///    An unsigned 64-bit integer whose trailing zeros are to be counted.
84/// \returns An unsigned 64-bit integer containing the number of trailing zero
85///    bits in the operand.
86static __inline__ unsigned long long __RELAXED_FN_ATTRS
87__tzcnt_u64(unsigned long long __X)
88{
89  return __builtin_ia32_tzcnt_u64(__X);
90}
91
92/// Counts the number of trailing zero bits in the operand.
93///
94/// \headerfile <x86intrin.h>
95///
96/// This intrinsic corresponds to the <c> TZCNT </c> instruction.
97///
98/// \param __X
99///    An unsigned 64-bit integer whose trailing zeros are to be counted.
100/// \returns An 64-bit integer containing the number of trailing zero bits in
101///    the operand.
102static __inline__ long long __RELAXED_FN_ATTRS
103_mm_tzcnt_64(unsigned long long __X)
104{
105  return __builtin_ia32_tzcnt_u64(__X);
106}
107
108#define _tzcnt_u64(a)     (__tzcnt_u64((a)))
109
110#endif /* __x86_64__ */
111
112#undef __RELAXED_FN_ATTRS
113
114#if !defined(_MSC_VER) || __has_feature(modules) || defined(__BMI__)
115
116/* Define the default attributes for the functions in this file. */
117#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("bmi")))
118
119#define _andn_u32(a, b)   (__andn_u32((a), (b)))
120
121/* _bextr_u32 != __bextr_u32 */
122#define _blsi_u32(a)      (__blsi_u32((a)))
123
124#define _blsmsk_u32(a)    (__blsmsk_u32((a)))
125
126#define _blsr_u32(a)      (__blsr_u32((a)))
127
128/// Performs a bitwise AND of the second operand with the one's
129///    complement of the first operand.
130///
131/// \headerfile <x86intrin.h>
132///
133/// This intrinsic corresponds to the <c> ANDN </c> instruction.
134///
135/// \param __X
136///    An unsigned integer containing one of the operands.
137/// \param __Y
138///    An unsigned integer containing one of the operands.
139/// \returns An unsigned integer containing the bitwise AND of the second
140///    operand with the one's complement of the first operand.
141static __inline__ unsigned int __DEFAULT_FN_ATTRS
142__andn_u32(unsigned int __X, unsigned int __Y)
143{
144  return ~__X & __Y;
145}
146
147/* AMD-specified, double-leading-underscore version of BEXTR */
148/// Extracts the specified bits from the first operand and returns them
149///    in the least significant bits of the result.
150///
151/// \headerfile <x86intrin.h>
152///
153/// This intrinsic corresponds to the <c> BEXTR </c> instruction.
154///
155/// \param __X
156///    An unsigned integer whose bits are to be extracted.
157/// \param __Y
158///    An unsigned integer used to specify which bits are extracted. Bits [7:0]
159///    specify the index of the least significant bit. Bits [15:8] specify the
160///    number of bits to be extracted.
161/// \returns An unsigned integer whose least significant bits contain the
162///    extracted bits.
163/// \see _bextr_u32
164static __inline__ unsigned int __DEFAULT_FN_ATTRS
165__bextr_u32(unsigned int __X, unsigned int __Y)
166{
167  return __builtin_ia32_bextr_u32(__X, __Y);
168}
169
170/* Intel-specified, single-leading-underscore version of BEXTR */
171/// Extracts the specified bits from the first operand and returns them
172///    in the least significant bits of the result.
173///
174/// \headerfile <x86intrin.h>
175///
176/// This intrinsic corresponds to the <c> BEXTR </c> instruction.
177///
178/// \param __X
179///    An unsigned integer whose bits are to be extracted.
180/// \param __Y
181///    An unsigned integer used to specify the index of the least significant
182///    bit for the bits to be extracted. Bits [7:0] specify the index.
183/// \param __Z
184///    An unsigned integer used to specify the number of bits to be extracted.
185///    Bits [7:0] specify the number of bits.
186/// \returns An unsigned integer whose least significant bits contain the
187///    extracted bits.
188/// \see __bextr_u32
189static __inline__ unsigned int __DEFAULT_FN_ATTRS
190_bextr_u32(unsigned int __X, unsigned int __Y, unsigned int __Z)
191{
192  return __builtin_ia32_bextr_u32 (__X, ((__Y & 0xff) | ((__Z & 0xff) << 8)));
193}
194
195/// Clears all bits in the source except for the least significant bit
196///    containing a value of 1 and returns the result.
197///
198/// \headerfile <x86intrin.h>
199///
200/// This intrinsic corresponds to the <c> BLSI </c> instruction.
201///
202/// \param __X
203///    An unsigned integer whose bits are to be cleared.
204/// \returns An unsigned integer containing the result of clearing the bits from
205///    the source operand.
206static __inline__ unsigned int __DEFAULT_FN_ATTRS
207__blsi_u32(unsigned int __X)
208{
209  return __X & -__X;
210}
211
212/// Creates a mask whose bits are set to 1, using bit 0 up to and
213///    including the least significant bit that is set to 1 in the source
214///    operand and returns the result.
215///
216/// \headerfile <x86intrin.h>
217///
218/// This intrinsic corresponds to the <c> BLSMSK </c> instruction.
219///
220/// \param __X
221///    An unsigned integer used to create the mask.
222/// \returns An unsigned integer containing the newly created mask.
223static __inline__ unsigned int __DEFAULT_FN_ATTRS
224__blsmsk_u32(unsigned int __X)
225{
226  return __X ^ (__X - 1);
227}
228
229/// Clears the least significant bit that is set to 1 in the source
230///    operand and returns the result.
231///
232/// \headerfile <x86intrin.h>
233///
234/// This intrinsic corresponds to the <c> BLSR </c> instruction.
235///
236/// \param __X
237///    An unsigned integer containing the operand to be cleared.
238/// \returns An unsigned integer containing the result of clearing the source
239///    operand.
240static __inline__ unsigned int __DEFAULT_FN_ATTRS
241__blsr_u32(unsigned int __X)
242{
243  return __X & (__X - 1);
244}
245
246#ifdef __x86_64__
247
248#define _andn_u64(a, b)   (__andn_u64((a), (b)))
249
250/* _bextr_u64 != __bextr_u64 */
251#define _blsi_u64(a)      (__blsi_u64((a)))
252
253#define _blsmsk_u64(a)    (__blsmsk_u64((a)))
254
255#define _blsr_u64(a)      (__blsr_u64((a)))
256
257/// Performs a bitwise AND of the second operand with the one's
258///    complement of the first operand.
259///
260/// \headerfile <x86intrin.h>
261///
262/// This intrinsic corresponds to the <c> ANDN </c> instruction.
263///
264/// \param __X
265///    An unsigned 64-bit integer containing one of the operands.
266/// \param __Y
267///    An unsigned 64-bit integer containing one of the operands.
268/// \returns An unsigned 64-bit integer containing the bitwise AND of the second
269///    operand with the one's complement of the first operand.
270static __inline__ unsigned long long __DEFAULT_FN_ATTRS
271__andn_u64 (unsigned long long __X, unsigned long long __Y)
272{
273  return ~__X & __Y;
274}
275
276/* AMD-specified, double-leading-underscore version of BEXTR */
277/// Extracts the specified bits from the first operand and returns them
278///    in the least significant bits of the result.
279///
280/// \headerfile <x86intrin.h>
281///
282/// This intrinsic corresponds to the <c> BEXTR </c> instruction.
283///
284/// \param __X
285///    An unsigned 64-bit integer whose bits are to be extracted.
286/// \param __Y
287///    An unsigned 64-bit integer used to specify which bits are extracted. Bits
288///    [7:0] specify the index of the least significant bit. Bits [15:8] specify
289///    the number of bits to be extracted.
290/// \returns An unsigned 64-bit integer whose least significant bits contain the
291///    extracted bits.
292/// \see _bextr_u64
293static __inline__ unsigned long long __DEFAULT_FN_ATTRS
294__bextr_u64(unsigned long long __X, unsigned long long __Y)
295{
296  return __builtin_ia32_bextr_u64(__X, __Y);
297}
298
299/* Intel-specified, single-leading-underscore version of BEXTR */
300/// Extracts the specified bits from the first operand and returns them
301///     in the least significant bits of the result.
302///
303/// \headerfile <x86intrin.h>
304///
305/// This intrinsic corresponds to the <c> BEXTR </c> instruction.
306///
307/// \param __X
308///    An unsigned 64-bit integer whose bits are to be extracted.
309/// \param __Y
310///    An unsigned integer used to specify the index of the least significant
311///    bit for the bits to be extracted. Bits [7:0] specify the index.
312/// \param __Z
313///    An unsigned integer used to specify the number of bits to be extracted.
314///    Bits [7:0] specify the number of bits.
315/// \returns An unsigned 64-bit integer whose least significant bits contain the
316///    extracted bits.
317/// \see __bextr_u64
318static __inline__ unsigned long long __DEFAULT_FN_ATTRS
319_bextr_u64(unsigned long long __X, unsigned int __Y, unsigned int __Z)
320{
321  return __builtin_ia32_bextr_u64 (__X, ((__Y & 0xff) | ((__Z & 0xff) << 8)));
322}
323
324/// Clears all bits in the source except for the least significant bit
325///    containing a value of 1 and returns the result.
326///
327/// \headerfile <x86intrin.h>
328///
329/// This intrinsic corresponds to the <c> BLSI </c> instruction.
330///
331/// \param __X
332///    An unsigned 64-bit integer whose bits are to be cleared.
333/// \returns An unsigned 64-bit integer containing the result of clearing the
334///    bits from the source operand.
335static __inline__ unsigned long long __DEFAULT_FN_ATTRS
336__blsi_u64(unsigned long long __X)
337{
338  return __X & -__X;
339}
340
341/// Creates a mask whose bits are set to 1, using bit 0 up to and
342///    including the least significant bit that is set to 1 in the source
343///    operand and returns the result.
344///
345/// \headerfile <x86intrin.h>
346///
347/// This intrinsic corresponds to the <c> BLSMSK </c> instruction.
348///
349/// \param __X
350///    An unsigned 64-bit integer used to create the mask.
351/// \returns An unsigned 64-bit integer containing the newly created mask.
352static __inline__ unsigned long long __DEFAULT_FN_ATTRS
353__blsmsk_u64(unsigned long long __X)
354{
355  return __X ^ (__X - 1);
356}
357
358/// Clears the least significant bit that is set to 1 in the source
359///    operand and returns the result.
360///
361/// \headerfile <x86intrin.h>
362///
363/// This intrinsic corresponds to the <c> BLSR </c> instruction.
364///
365/// \param __X
366///    An unsigned 64-bit integer containing the operand to be cleared.
367/// \returns An unsigned 64-bit integer containing the result of clearing the
368///    source operand.
369static __inline__ unsigned long long __DEFAULT_FN_ATTRS
370__blsr_u64(unsigned long long __X)
371{
372  return __X & (__X - 1);
373}
374
375#endif /* __x86_64__ */
376
377#undef __DEFAULT_FN_ATTRS
378
379#endif /* !defined(_MSC_VER) || __has_feature(modules) || defined(__BMI__) */
380
381#endif /* __BMIINTRIN_H */
382