bmiintrin.h revision 321369
1/*===---- bmiintrin.h - BMI intrinsics -------------------------------------===
2 *
3 * Permission is hereby granted, free of charge, to any person obtaining a copy
4 * of this software and associated documentation files (the "Software"), to deal
5 * in the Software without restriction, including without limitation the rights
6 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7 * copies of the Software, and to permit persons to whom the Software is
8 * furnished to do so, subject to the following conditions:
9 *
10 * The above copyright notice and this permission notice shall be included in
11 * all copies or substantial portions of the Software.
12 *
13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19 * THE SOFTWARE.
20 *
21 *===-----------------------------------------------------------------------===
22 */
23
24#if !defined __X86INTRIN_H && !defined __IMMINTRIN_H
25#error "Never use <bmiintrin.h> directly; include <x86intrin.h> instead."
26#endif
27
28#ifndef __BMIINTRIN_H
29#define __BMIINTRIN_H
30
31#define _tzcnt_u16(a)     (__tzcnt_u16((a)))
32
33#define _andn_u32(a, b)   (__andn_u32((a), (b)))
34
35/* _bextr_u32 != __bextr_u32 */
36#define _blsi_u32(a)      (__blsi_u32((a)))
37
38#define _blsmsk_u32(a)    (__blsmsk_u32((a)))
39
40#define _blsr_u32(a)      (__blsr_u32((a)))
41
42#define _tzcnt_u32(a)     (__tzcnt_u32((a)))
43
44/* Define the default attributes for the functions in this file. */
45#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("bmi")))
46
47/* Allow using the tzcnt intrinsics even for non-BMI targets. Since the TZCNT
48   instruction behaves as BSF on non-BMI targets, there is code that expects
49   to use it as a potentially faster version of BSF. */
50#define __RELAXED_FN_ATTRS __attribute__((__always_inline__, __nodebug__))
51
52/// \brief Counts the number of trailing zero bits in the operand.
53///
54/// \headerfile <x86intrin.h>
55///
56/// This intrinsic corresponds to the <c> TZCNT </c> instruction.
57///
58/// \param __X
59///    An unsigned 16-bit integer whose trailing zeros are to be counted.
60/// \returns An unsigned 16-bit integer containing the number of trailing zero
61///    bits in the operand.
62static __inline__ unsigned short __RELAXED_FN_ATTRS
63__tzcnt_u16(unsigned short __X)
64{
65  return __X ? __builtin_ctzs(__X) : 16;
66}
67
68/// \brief Performs a bitwise AND of the second operand with the one's
69///    complement of the first operand.
70///
71/// \headerfile <x86intrin.h>
72///
73/// This intrinsic corresponds to the <c> ANDN </c> instruction.
74///
75/// \param __X
76///    An unsigned integer containing one of the operands.
77/// \param __Y
78///    An unsigned integer containing one of the operands.
79/// \returns An unsigned integer containing the bitwise AND of the second
80///    operand with the one's complement of the first operand.
81static __inline__ unsigned int __DEFAULT_FN_ATTRS
82__andn_u32(unsigned int __X, unsigned int __Y)
83{
84  return ~__X & __Y;
85}
86
87/* AMD-specified, double-leading-underscore version of BEXTR */
88/// \brief Extracts the specified bits from the first operand and returns them
89///    in the least significant bits of the result.
90///
91/// \headerfile <x86intrin.h>
92///
93/// This intrinsic corresponds to the <c> BEXTR </c> instruction.
94///
95/// \param __X
96///    An unsigned integer whose bits are to be extracted.
97/// \param __Y
98///    An unsigned integer used to specify which bits are extracted. Bits [7:0]
99///    specify the index of the least significant bit. Bits [15:8] specify the
100///    number of bits to be extracted.
101/// \returns An unsigned integer whose least significant bits contain the
102///    extracted bits.
103static __inline__ unsigned int __DEFAULT_FN_ATTRS
104__bextr_u32(unsigned int __X, unsigned int __Y)
105{
106  return __builtin_ia32_bextr_u32(__X, __Y);
107}
108
109/* Intel-specified, single-leading-underscore version of BEXTR */
110/// \brief Extracts the specified bits from the first operand and returns them
111///    in the least significant bits of the result.
112///
113/// \headerfile <x86intrin.h>
114///
115/// This intrinsic corresponds to the <c> BEXTR </c> instruction.
116///
117/// \param __X
118///    An unsigned integer whose bits are to be extracted.
119/// \param __Y
120///    An unsigned integer used to specify the index of the least significant
121///    bit for the bits to be extracted. Bits [7:0] specify the index.
122/// \param __Z
123///    An unsigned integer used to specify the number of bits to be extracted.
124///    Bits [7:0] specify the number of bits.
125/// \returns An unsigned integer whose least significant bits contain the
126///    extracted bits.
127static __inline__ unsigned int __DEFAULT_FN_ATTRS
128_bextr_u32(unsigned int __X, unsigned int __Y, unsigned int __Z)
129{
130  return __builtin_ia32_bextr_u32 (__X, ((__Y & 0xff) | ((__Z & 0xff) << 8)));
131}
132
133/// \brief Clears all bits in the source except for the least significant bit
134///    containing a value of 1 and returns the result.
135///
136/// \headerfile <x86intrin.h>
137///
138/// This intrinsic corresponds to the <c> BLSI </c> instruction.
139///
140/// \param __X
141///    An unsigned integer whose bits are to be cleared.
142/// \returns An unsigned integer containing the result of clearing the bits from
143///    the source operand.
144static __inline__ unsigned int __DEFAULT_FN_ATTRS
145__blsi_u32(unsigned int __X)
146{
147  return __X & -__X;
148}
149
150/// \brief Creates a mask whose bits are set to 1, using bit 0 up to and
151///    including the least significant bit that is set to 1 in the source
152///    operand and returns the result.
153///
154/// \headerfile <x86intrin.h>
155///
156/// This intrinsic corresponds to the <c> BLSMSK </c> instruction.
157///
158/// \param __X
159///    An unsigned integer used to create the mask.
160/// \returns An unsigned integer containing the newly created mask.
161static __inline__ unsigned int __DEFAULT_FN_ATTRS
162__blsmsk_u32(unsigned int __X)
163{
164  return __X ^ (__X - 1);
165}
166
167/// \brief Clears the least significant bit that is set to 1 in the source
168///    operand and returns the result.
169///
170/// \headerfile <x86intrin.h>
171///
172/// This intrinsic corresponds to the <c> BLSR </c> instruction.
173///
174/// \param __X
175///    An unsigned integer containing the operand to be cleared.
176/// \returns An unsigned integer containing the result of clearing the source
177///    operand.
178static __inline__ unsigned int __DEFAULT_FN_ATTRS
179__blsr_u32(unsigned int __X)
180{
181  return __X & (__X - 1);
182}
183
184/// \brief Counts the number of trailing zero bits in the operand.
185///
186/// \headerfile <x86intrin.h>
187///
188/// This intrinsic corresponds to the <c> TZCNT </c> instruction.
189///
190/// \param __X
191///    An unsigned 32-bit integer whose trailing zeros are to be counted.
192/// \returns An unsigned 32-bit integer containing the number of trailing zero
193///    bits in the operand.
194static __inline__ unsigned int __RELAXED_FN_ATTRS
195__tzcnt_u32(unsigned int __X)
196{
197  return __X ? __builtin_ctz(__X) : 32;
198}
199
200/// \brief Counts the number of trailing zero bits in the operand.
201///
202/// \headerfile <x86intrin.h>
203///
204/// This intrinsic corresponds to the <c> TZCNT </c> instruction.
205///
206/// \param __X
207///    An unsigned 32-bit integer whose trailing zeros are to be counted.
208/// \returns An 32-bit integer containing the number of trailing zero bits in
209///    the operand.
210static __inline__ int __RELAXED_FN_ATTRS
211_mm_tzcnt_32(unsigned int __X)
212{
213  return __X ? __builtin_ctz(__X) : 32;
214}
215
216#ifdef __x86_64__
217
218#define _andn_u64(a, b)   (__andn_u64((a), (b)))
219
220/* _bextr_u64 != __bextr_u64 */
221#define _blsi_u64(a)      (__blsi_u64((a)))
222
223#define _blsmsk_u64(a)    (__blsmsk_u64((a)))
224
225#define _blsr_u64(a)      (__blsr_u64((a)))
226
227#define _tzcnt_u64(a)     (__tzcnt_u64((a)))
228
229/// \brief Performs a bitwise AND of the second operand with the one's
230///    complement of the first operand.
231///
232/// \headerfile <x86intrin.h>
233///
234/// This intrinsic corresponds to the <c> ANDN </c> instruction.
235///
236/// \param __X
237///    An unsigned 64-bit integer containing one of the operands.
238/// \param __Y
239///    An unsigned 64-bit integer containing one of the operands.
240/// \returns An unsigned 64-bit integer containing the bitwise AND of the second
241///    operand with the one's complement of the first operand.
242static __inline__ unsigned long long __DEFAULT_FN_ATTRS
243__andn_u64 (unsigned long long __X, unsigned long long __Y)
244{
245  return ~__X & __Y;
246}
247
248/* AMD-specified, double-leading-underscore version of BEXTR */
249/// \brief Extracts the specified bits from the first operand and returns them
250///    in the least significant bits of the result.
251///
252/// \headerfile <x86intrin.h>
253///
254/// This intrinsic corresponds to the <c> BEXTR </c> instruction.
255///
256/// \param __X
257///    An unsigned 64-bit integer whose bits are to be extracted.
258/// \param __Y
259///    An unsigned 64-bit integer used to specify which bits are extracted. Bits
260///    [7:0] specify the index of the least significant bit. Bits [15:8] specify
261///    the number of bits to be extracted.
262/// \returns An unsigned 64-bit integer whose least significant bits contain the
263///    extracted bits.
264static __inline__ unsigned long long __DEFAULT_FN_ATTRS
265__bextr_u64(unsigned long long __X, unsigned long long __Y)
266{
267  return __builtin_ia32_bextr_u64(__X, __Y);
268}
269
270/* Intel-specified, single-leading-underscore version of BEXTR */
271/// \brief Extracts the specified bits from the first operand and returns them
272///     in the least significant bits of the result.
273///
274/// \headerfile <x86intrin.h>
275///
276/// This intrinsic corresponds to the <c> BEXTR </c> instruction.
277///
278/// \param __X
279///    An unsigned 64-bit integer whose bits are to be extracted.
280/// \param __Y
281///    An unsigned integer used to specify the index of the least significant
282///    bit for the bits to be extracted. Bits [7:0] specify the index.
283/// \param __Z
284///    An unsigned integer used to specify the number of bits to be extracted.
285///    Bits [7:0] specify the number of bits.
286/// \returns An unsigned 64-bit integer whose least significant bits contain the
287///    extracted bits.
288static __inline__ unsigned long long __DEFAULT_FN_ATTRS
289_bextr_u64(unsigned long long __X, unsigned int __Y, unsigned int __Z)
290{
291  return __builtin_ia32_bextr_u64 (__X, ((__Y & 0xff) | ((__Z & 0xff) << 8)));
292}
293
294/// \brief Clears all bits in the source except for the least significant bit
295///    containing a value of 1 and returns the result.
296///
297/// \headerfile <x86intrin.h>
298///
299/// This intrinsic corresponds to the <c> BLSI </c> instruction.
300///
301/// \param __X
302///    An unsigned 64-bit integer whose bits are to be cleared.
303/// \returns An unsigned 64-bit integer containing the result of clearing the
304///    bits from the source operand.
305static __inline__ unsigned long long __DEFAULT_FN_ATTRS
306__blsi_u64(unsigned long long __X)
307{
308  return __X & -__X;
309}
310
311/// \brief Creates a mask whose bits are set to 1, using bit 0 up to and
312///    including the least significant bit that is set to 1 in the source
313///    operand and returns the result.
314///
315/// \headerfile <x86intrin.h>
316///
317/// This intrinsic corresponds to the <c> BLSMSK </c> instruction.
318///
319/// \param __X
320///    An unsigned 64-bit integer used to create the mask.
321/// \returns An unsigned 64-bit integer containing the newly created mask.
322static __inline__ unsigned long long __DEFAULT_FN_ATTRS
323__blsmsk_u64(unsigned long long __X)
324{
325  return __X ^ (__X - 1);
326}
327
328/// \brief Clears the least significant bit that is set to 1 in the source
329///    operand and returns the result.
330///
331/// \headerfile <x86intrin.h>
332///
333/// This intrinsic corresponds to the <c> BLSR </c> instruction.
334///
335/// \param __X
336///    An unsigned 64-bit integer containing the operand to be cleared.
337/// \returns An unsigned 64-bit integer containing the result of clearing the
338///    source operand.
339static __inline__ unsigned long long __DEFAULT_FN_ATTRS
340__blsr_u64(unsigned long long __X)
341{
342  return __X & (__X - 1);
343}
344
345/// \brief Counts the number of trailing zero bits in the operand.
346///
347/// \headerfile <x86intrin.h>
348///
349/// This intrinsic corresponds to the <c> TZCNT </c> instruction.
350///
351/// \param __X
352///    An unsigned 64-bit integer whose trailing zeros are to be counted.
353/// \returns An unsigned 64-bit integer containing the number of trailing zero
354///    bits in the operand.
355static __inline__ unsigned long long __RELAXED_FN_ATTRS
356__tzcnt_u64(unsigned long long __X)
357{
358  return __X ? __builtin_ctzll(__X) : 64;
359}
360
361/// \brief Counts the number of trailing zero bits in the operand.
362///
363/// \headerfile <x86intrin.h>
364///
365/// This intrinsic corresponds to the <c> TZCNT </c> instruction.
366///
367/// \param __X
368///    An unsigned 64-bit integer whose trailing zeros are to be counted.
369/// \returns An 64-bit integer containing the number of trailing zero bits in
370///    the operand.
371static __inline__ long long __RELAXED_FN_ATTRS
372_mm_tzcnt_64(unsigned long long __X)
373{
374  return __X ? __builtin_ctzll(__X) : 64;
375}
376
377#endif /* __x86_64__ */
378
379#undef __DEFAULT_FN_ATTRS
380#undef __RELAXED_FN_ATTRS
381
382#endif /* __BMIINTRIN_H */
383