1/*===---- bmiintrin.h - BMI intrinsics -------------------------------------===
2 *
3 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 * See https://llvm.org/LICENSE.txt for license information.
5 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 *
7 *===-----------------------------------------------------------------------===
8 */
9
10#if !defined __X86INTRIN_H && !defined __IMMINTRIN_H
11#error "Never use <bmiintrin.h> directly; include <x86intrin.h> instead."
12#endif
13
14#ifndef __BMIINTRIN_H
15#define __BMIINTRIN_H
16
17/* Allow using the tzcnt intrinsics even for non-BMI targets. Since the TZCNT
18   instruction behaves as BSF on non-BMI targets, there is code that expects
19   to use it as a potentially faster version of BSF. */
20#define __RELAXED_FN_ATTRS __attribute__((__always_inline__, __nodebug__))
21
22#define _tzcnt_u16(a)     (__tzcnt_u16((a)))
23
24/// Counts the number of trailing zero bits in the operand.
25///
26/// \headerfile <x86intrin.h>
27///
28/// This intrinsic corresponds to the <c> TZCNT </c> instruction.
29///
30/// \param __X
31///    An unsigned 16-bit integer whose trailing zeros are to be counted.
32/// \returns An unsigned 16-bit integer containing the number of trailing zero
33///    bits in the operand.
34static __inline__ unsigned short __RELAXED_FN_ATTRS
35__tzcnt_u16(unsigned short __X)
36{
37  return __builtin_ia32_tzcnt_u16(__X);
38}
39
40/// Counts the number of trailing zero bits in the operand.
41///
42/// \headerfile <x86intrin.h>
43///
44/// This intrinsic corresponds to the <c> TZCNT </c> instruction.
45///
46/// \param __X
47///    An unsigned 32-bit integer whose trailing zeros are to be counted.
48/// \returns An unsigned 32-bit integer containing the number of trailing zero
49///    bits in the operand.
50static __inline__ unsigned int __RELAXED_FN_ATTRS
51__tzcnt_u32(unsigned int __X)
52{
53  return __builtin_ia32_tzcnt_u32(__X);
54}
55
56/// Counts the number of trailing zero bits in the operand.
57///
58/// \headerfile <x86intrin.h>
59///
60/// This intrinsic corresponds to the <c> TZCNT </c> instruction.
61///
62/// \param __X
63///    An unsigned 32-bit integer whose trailing zeros are to be counted.
64/// \returns An 32-bit integer containing the number of trailing zero bits in
65///    the operand.
66static __inline__ int __RELAXED_FN_ATTRS
67_mm_tzcnt_32(unsigned int __X)
68{
69  return __builtin_ia32_tzcnt_u32(__X);
70}
71
72#define _tzcnt_u32(a)     (__tzcnt_u32((a)))
73
74#ifdef __x86_64__
75
76/// Counts the number of trailing zero bits in the operand.
77///
78/// \headerfile <x86intrin.h>
79///
80/// This intrinsic corresponds to the <c> TZCNT </c> instruction.
81///
82/// \param __X
83///    An unsigned 64-bit integer whose trailing zeros are to be counted.
84/// \returns An unsigned 64-bit integer containing the number of trailing zero
85///    bits in the operand.
86static __inline__ unsigned long long __RELAXED_FN_ATTRS
87__tzcnt_u64(unsigned long long __X)
88{
89  return __builtin_ia32_tzcnt_u64(__X);
90}
91
92/// Counts the number of trailing zero bits in the operand.
93///
94/// \headerfile <x86intrin.h>
95///
96/// This intrinsic corresponds to the <c> TZCNT </c> instruction.
97///
98/// \param __X
99///    An unsigned 64-bit integer whose trailing zeros are to be counted.
100/// \returns An 64-bit integer containing the number of trailing zero bits in
101///    the operand.
102static __inline__ long long __RELAXED_FN_ATTRS
103_mm_tzcnt_64(unsigned long long __X)
104{
105  return __builtin_ia32_tzcnt_u64(__X);
106}
107
108#define _tzcnt_u64(a)     (__tzcnt_u64((a)))
109
110#endif /* __x86_64__ */
111
112#undef __RELAXED_FN_ATTRS
113
114#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) ||      \
115    defined(__BMI__)
116
117/* Define the default attributes for the functions in this file. */
118#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("bmi")))
119
120#define _andn_u32(a, b)   (__andn_u32((a), (b)))
121
122/* _bextr_u32 != __bextr_u32 */
123#define _blsi_u32(a)      (__blsi_u32((a)))
124
125#define _blsmsk_u32(a)    (__blsmsk_u32((a)))
126
127#define _blsr_u32(a)      (__blsr_u32((a)))
128
129/// Performs a bitwise AND of the second operand with the one's
130///    complement of the first operand.
131///
132/// \headerfile <x86intrin.h>
133///
134/// This intrinsic corresponds to the <c> ANDN </c> instruction.
135///
136/// \param __X
137///    An unsigned integer containing one of the operands.
138/// \param __Y
139///    An unsigned integer containing one of the operands.
140/// \returns An unsigned integer containing the bitwise AND of the second
141///    operand with the one's complement of the first operand.
142static __inline__ unsigned int __DEFAULT_FN_ATTRS
143__andn_u32(unsigned int __X, unsigned int __Y)
144{
145  return ~__X & __Y;
146}
147
148/* AMD-specified, double-leading-underscore version of BEXTR */
149/// Extracts the specified bits from the first operand and returns them
150///    in the least significant bits of the result.
151///
152/// \headerfile <x86intrin.h>
153///
154/// This intrinsic corresponds to the <c> BEXTR </c> instruction.
155///
156/// \param __X
157///    An unsigned integer whose bits are to be extracted.
158/// \param __Y
159///    An unsigned integer used to specify which bits are extracted. Bits [7:0]
160///    specify the index of the least significant bit. Bits [15:8] specify the
161///    number of bits to be extracted.
162/// \returns An unsigned integer whose least significant bits contain the
163///    extracted bits.
164/// \see _bextr_u32
165static __inline__ unsigned int __DEFAULT_FN_ATTRS
166__bextr_u32(unsigned int __X, unsigned int __Y)
167{
168  return __builtin_ia32_bextr_u32(__X, __Y);
169}
170
171/* Intel-specified, single-leading-underscore version of BEXTR */
172/// Extracts the specified bits from the first operand and returns them
173///    in the least significant bits of the result.
174///
175/// \headerfile <x86intrin.h>
176///
177/// This intrinsic corresponds to the <c> BEXTR </c> instruction.
178///
179/// \param __X
180///    An unsigned integer whose bits are to be extracted.
181/// \param __Y
182///    An unsigned integer used to specify the index of the least significant
183///    bit for the bits to be extracted. Bits [7:0] specify the index.
184/// \param __Z
185///    An unsigned integer used to specify the number of bits to be extracted.
186///    Bits [7:0] specify the number of bits.
187/// \returns An unsigned integer whose least significant bits contain the
188///    extracted bits.
189/// \see __bextr_u32
190static __inline__ unsigned int __DEFAULT_FN_ATTRS
191_bextr_u32(unsigned int __X, unsigned int __Y, unsigned int __Z)
192{
193  return __builtin_ia32_bextr_u32 (__X, ((__Y & 0xff) | ((__Z & 0xff) << 8)));
194}
195
196/* Intel-specified, single-leading-underscore version of BEXTR2 */
197/// Extracts the specified bits from the first operand and returns them
198///    in the least significant bits of the result.
199///
200/// \headerfile <x86intrin.h>
201///
202/// This intrinsic corresponds to the <c> BEXTR </c> instruction.
203///
204/// \param __X
205///    An unsigned integer whose bits are to be extracted.
206/// \param __Y
207///    An unsigned integer used to specify which bits are extracted. Bits [7:0]
208///    specify the index of the least significant bit. Bits [15:8] specify the
209///    number of bits to be extracted.
210/// \returns An unsigned integer whose least significant bits contain the
211///    extracted bits.
212/// \see __bextr_u32
213static __inline__ unsigned int __DEFAULT_FN_ATTRS
214_bextr2_u32(unsigned int __X, unsigned int __Y) {
215  return __builtin_ia32_bextr_u32(__X, __Y);
216}
217
218/// Clears all bits in the source except for the least significant bit
219///    containing a value of 1 and returns the result.
220///
221/// \headerfile <x86intrin.h>
222///
223/// This intrinsic corresponds to the <c> BLSI </c> instruction.
224///
225/// \param __X
226///    An unsigned integer whose bits are to be cleared.
227/// \returns An unsigned integer containing the result of clearing the bits from
228///    the source operand.
229static __inline__ unsigned int __DEFAULT_FN_ATTRS
230__blsi_u32(unsigned int __X)
231{
232  return __X & -__X;
233}
234
235/// Creates a mask whose bits are set to 1, using bit 0 up to and
236///    including the least significant bit that is set to 1 in the source
237///    operand and returns the result.
238///
239/// \headerfile <x86intrin.h>
240///
241/// This intrinsic corresponds to the <c> BLSMSK </c> instruction.
242///
243/// \param __X
244///    An unsigned integer used to create the mask.
245/// \returns An unsigned integer containing the newly created mask.
246static __inline__ unsigned int __DEFAULT_FN_ATTRS
247__blsmsk_u32(unsigned int __X)
248{
249  return __X ^ (__X - 1);
250}
251
252/// Clears the least significant bit that is set to 1 in the source
253///    operand and returns the result.
254///
255/// \headerfile <x86intrin.h>
256///
257/// This intrinsic corresponds to the <c> BLSR </c> instruction.
258///
259/// \param __X
260///    An unsigned integer containing the operand to be cleared.
261/// \returns An unsigned integer containing the result of clearing the source
262///    operand.
263static __inline__ unsigned int __DEFAULT_FN_ATTRS
264__blsr_u32(unsigned int __X)
265{
266  return __X & (__X - 1);
267}
268
269#ifdef __x86_64__
270
271#define _andn_u64(a, b)   (__andn_u64((a), (b)))
272
273/* _bextr_u64 != __bextr_u64 */
274#define _blsi_u64(a)      (__blsi_u64((a)))
275
276#define _blsmsk_u64(a)    (__blsmsk_u64((a)))
277
278#define _blsr_u64(a)      (__blsr_u64((a)))
279
280/// Performs a bitwise AND of the second operand with the one's
281///    complement of the first operand.
282///
283/// \headerfile <x86intrin.h>
284///
285/// This intrinsic corresponds to the <c> ANDN </c> instruction.
286///
287/// \param __X
288///    An unsigned 64-bit integer containing one of the operands.
289/// \param __Y
290///    An unsigned 64-bit integer containing one of the operands.
291/// \returns An unsigned 64-bit integer containing the bitwise AND of the second
292///    operand with the one's complement of the first operand.
293static __inline__ unsigned long long __DEFAULT_FN_ATTRS
294__andn_u64 (unsigned long long __X, unsigned long long __Y)
295{
296  return ~__X & __Y;
297}
298
299/* AMD-specified, double-leading-underscore version of BEXTR */
300/// Extracts the specified bits from the first operand and returns them
301///    in the least significant bits of the result.
302///
303/// \headerfile <x86intrin.h>
304///
305/// This intrinsic corresponds to the <c> BEXTR </c> instruction.
306///
307/// \param __X
308///    An unsigned 64-bit integer whose bits are to be extracted.
309/// \param __Y
310///    An unsigned 64-bit integer used to specify which bits are extracted. Bits
311///    [7:0] specify the index of the least significant bit. Bits [15:8] specify
312///    the number of bits to be extracted.
313/// \returns An unsigned 64-bit integer whose least significant bits contain the
314///    extracted bits.
315/// \see _bextr_u64
316static __inline__ unsigned long long __DEFAULT_FN_ATTRS
317__bextr_u64(unsigned long long __X, unsigned long long __Y)
318{
319  return __builtin_ia32_bextr_u64(__X, __Y);
320}
321
322/* Intel-specified, single-leading-underscore version of BEXTR */
323/// Extracts the specified bits from the first operand and returns them
324///     in the least significant bits of the result.
325///
326/// \headerfile <x86intrin.h>
327///
328/// This intrinsic corresponds to the <c> BEXTR </c> instruction.
329///
330/// \param __X
331///    An unsigned 64-bit integer whose bits are to be extracted.
332/// \param __Y
333///    An unsigned integer used to specify the index of the least significant
334///    bit for the bits to be extracted. Bits [7:0] specify the index.
335/// \param __Z
336///    An unsigned integer used to specify the number of bits to be extracted.
337///    Bits [7:0] specify the number of bits.
338/// \returns An unsigned 64-bit integer whose least significant bits contain the
339///    extracted bits.
340/// \see __bextr_u64
341static __inline__ unsigned long long __DEFAULT_FN_ATTRS
342_bextr_u64(unsigned long long __X, unsigned int __Y, unsigned int __Z)
343{
344  return __builtin_ia32_bextr_u64 (__X, ((__Y & 0xff) | ((__Z & 0xff) << 8)));
345}
346
347/* Intel-specified, single-leading-underscore version of BEXTR2 */
348/// Extracts the specified bits from the first operand and returns them
349///    in the least significant bits of the result.
350///
351/// \headerfile <x86intrin.h>
352///
353/// This intrinsic corresponds to the <c> BEXTR </c> instruction.
354///
355/// \param __X
356///    An unsigned 64-bit integer whose bits are to be extracted.
357/// \param __Y
358///    An unsigned 64-bit integer used to specify which bits are extracted. Bits
359///    [7:0] specify the index of the least significant bit. Bits [15:8] specify
360///    the number of bits to be extracted.
361/// \returns An unsigned 64-bit integer whose least significant bits contain the
362///    extracted bits.
363/// \see __bextr_u64
364static __inline__ unsigned long long __DEFAULT_FN_ATTRS
365_bextr2_u64(unsigned long long __X, unsigned long long __Y) {
366  return __builtin_ia32_bextr_u64(__X, __Y);
367}
368
369/// Clears all bits in the source except for the least significant bit
370///    containing a value of 1 and returns the result.
371///
372/// \headerfile <x86intrin.h>
373///
374/// This intrinsic corresponds to the <c> BLSI </c> instruction.
375///
376/// \param __X
377///    An unsigned 64-bit integer whose bits are to be cleared.
378/// \returns An unsigned 64-bit integer containing the result of clearing the
379///    bits from the source operand.
380static __inline__ unsigned long long __DEFAULT_FN_ATTRS
381__blsi_u64(unsigned long long __X)
382{
383  return __X & -__X;
384}
385
386/// Creates a mask whose bits are set to 1, using bit 0 up to and
387///    including the least significant bit that is set to 1 in the source
388///    operand and returns the result.
389///
390/// \headerfile <x86intrin.h>
391///
392/// This intrinsic corresponds to the <c> BLSMSK </c> instruction.
393///
394/// \param __X
395///    An unsigned 64-bit integer used to create the mask.
396/// \returns An unsigned 64-bit integer containing the newly created mask.
397static __inline__ unsigned long long __DEFAULT_FN_ATTRS
398__blsmsk_u64(unsigned long long __X)
399{
400  return __X ^ (__X - 1);
401}
402
403/// Clears the least significant bit that is set to 1 in the source
404///    operand and returns the result.
405///
406/// \headerfile <x86intrin.h>
407///
408/// This intrinsic corresponds to the <c> BLSR </c> instruction.
409///
410/// \param __X
411///    An unsigned 64-bit integer containing the operand to be cleared.
412/// \returns An unsigned 64-bit integer containing the result of clearing the
413///    source operand.
414static __inline__ unsigned long long __DEFAULT_FN_ATTRS
415__blsr_u64(unsigned long long __X)
416{
417  return __X & (__X - 1);
418}
419
420#endif /* __x86_64__ */
421
422#undef __DEFAULT_FN_ATTRS
423
424#endif /* !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules)   \
425          || defined(__BMI__) */
426
427#endif /* __BMIINTRIN_H */
428