ia32intrin.h revision 360660
1/* ===-------- ia32intrin.h ---------------------------------------------------===
2 *
3 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 * See https://llvm.org/LICENSE.txt for license information.
5 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 *
7 *===-----------------------------------------------------------------------===
8 */
9
10#ifndef __X86INTRIN_H
11#error "Never use <ia32intrin.h> directly; include <x86intrin.h> instead."
12#endif
13
14#ifndef __IA32INTRIN_H
15#define __IA32INTRIN_H
16
17/** Find the first set bit starting from the lsb. Result is undefined if
18 *  input is 0.
19 *
20 *  \headerfile <x86intrin.h>
21 *
22 *  This intrinsic corresponds to the <c> BSF </c> instruction or the
23 *  <c> TZCNT </c> instruction.
24 *
25 *  \param __A
26 *     A 32-bit integer operand.
27 *  \returns A 32-bit integer containing the bit number.
28 */
29static __inline__ int __attribute__((__always_inline__, __nodebug__))
30__bsfd(int __A) {
31  return __builtin_ctz(__A);
32}
33
34/** Find the first set bit starting from the msb. Result is undefined if
35 *  input is 0.
36 *
37 *  \headerfile <x86intrin.h>
38 *
39 *  This intrinsic corresponds to the <c> BSR </c> instruction or the
40 *  <c> LZCNT </c> instruction and an <c> XOR </c>.
41 *
42 *  \param __A
43 *     A 32-bit integer operand.
44 *  \returns A 32-bit integer containing the bit number.
45 */
46static __inline__ int __attribute__((__always_inline__, __nodebug__))
47__bsrd(int __A) {
48  return 31 - __builtin_clz(__A);
49}
50
51/** Swaps the bytes in the input. Converting little endian to big endian or
52 *  vice versa.
53 *
54 *  \headerfile <x86intrin.h>
55 *
56 *  This intrinsic corresponds to the <c> BSWAP </c> instruction.
57 *
58 *  \param __A
59 *     A 32-bit integer operand.
60 *  \returns A 32-bit integer containing the swapped bytes.
61 */
62static __inline__ int __attribute__((__always_inline__, __nodebug__))
63__bswapd(int __A) {
64  return __builtin_bswap32(__A);
65}
66
67static __inline__ int __attribute__((__always_inline__, __nodebug__))
68_bswap(int __A) {
69  return __builtin_bswap32(__A);
70}
71
72#define _bit_scan_forward(A) __bsfd((A))
73#define _bit_scan_reverse(A) __bsrd((A))
74
75#ifdef __x86_64__
76/** Find the first set bit starting from the lsb. Result is undefined if
77 *  input is 0.
78 *
79 *  \headerfile <x86intrin.h>
80 *
81 *  This intrinsic corresponds to the <c> BSF </c> instruction or the
82 *  <c> TZCNT </c> instruction.
83 *
84 *  \param __A
85 *     A 64-bit integer operand.
86 *  \returns A 32-bit integer containing the bit number.
87 */
88static __inline__ int __attribute__((__always_inline__, __nodebug__))
89__bsfq(long long __A) {
90  return __builtin_ctzll(__A);
91}
92
93/** Find the first set bit starting from the msb. Result is undefined if
94 *  input is 0.
95 *
96 *  \headerfile <x86intrin.h>
97 *
98 *  This intrinsic corresponds to the <c> BSR </c> instruction or the
99 *  <c> LZCNT </c> instruction and an <c> XOR </c>.
100 *
101 *  \param __A
102 *     A 64-bit integer operand.
103 *  \returns A 32-bit integer containing the bit number.
104 */
105static __inline__ int __attribute__((__always_inline__, __nodebug__))
106__bsrq(long long __A) {
107  return 63 - __builtin_clzll(__A);
108}
109
110/** Swaps the bytes in the input. Converting little endian to big endian or
111 *  vice versa.
112 *
113 *  \headerfile <x86intrin.h>
114 *
115 *  This intrinsic corresponds to the <c> BSWAP </c> instruction.
116 *
117 *  \param __A
118 *     A 64-bit integer operand.
119 *  \returns A 64-bit integer containing the swapped bytes.
120 */
121static __inline__ long long __attribute__((__always_inline__, __nodebug__))
122__bswapq(long long __A) {
123  return __builtin_bswap64(__A);
124}
125
126#define _bswap64(A) __bswapq((A))
127#endif
128
129/** Counts the number of bits in the source operand having a value of 1.
130 *
131 *  \headerfile <x86intrin.h>
132 *
133 *  This intrinsic corresponds to the <c> POPCNT </c> instruction or a
134 *  a sequence of arithmetic and logic ops to calculate it.
135 *
136 *  \param __A
137 *     An unsigned 32-bit integer operand.
138 *  \returns A 32-bit integer containing the number of bits with value 1 in the
139 *     source operand.
140 */
141static __inline__ int __attribute__((__always_inline__, __nodebug__))
142__popcntd(unsigned int __A)
143{
144  return __builtin_popcount(__A);
145}
146
147#define _popcnt32(A) __popcntd((A))
148
149#ifdef __x86_64__
150/** Counts the number of bits in the source operand having a value of 1.
151 *
152 *  \headerfile <x86intrin.h>
153 *
154 *  This intrinsic corresponds to the <c> POPCNT </c> instruction or a
155 *  a sequence of arithmetic and logic ops to calculate it.
156 *
157 *  \param __A
158 *     An unsigned 64-bit integer operand.
159 *  \returns A 64-bit integer containing the number of bits with value 1 in the
160 *     source operand.
161 */
162static __inline__ long long __attribute__((__always_inline__, __nodebug__))
163__popcntq(unsigned long long __A)
164{
165  return __builtin_popcountll(__A);
166}
167
168#define _popcnt64(A) __popcntq((A))
169#endif /* __x86_64__ */
170
171#ifdef __x86_64__
172static __inline__ unsigned long long __attribute__((__always_inline__, __nodebug__))
173__readeflags(void)
174{
175  return __builtin_ia32_readeflags_u64();
176}
177
178static __inline__ void __attribute__((__always_inline__, __nodebug__))
179__writeeflags(unsigned long long __f)
180{
181  __builtin_ia32_writeeflags_u64(__f);
182}
183
184#else /* !__x86_64__ */
185static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__))
186__readeflags(void)
187{
188  return __builtin_ia32_readeflags_u32();
189}
190
191static __inline__ void __attribute__((__always_inline__, __nodebug__))
192__writeeflags(unsigned int __f)
193{
194  __builtin_ia32_writeeflags_u32(__f);
195}
196#endif /* !__x86_64__ */
197
198/** Adds the unsigned integer operand to the CRC-32C checksum of the
199 *     unsigned char operand.
200 *
201 *  \headerfile <x86intrin.h>
202 *
203 *  This intrinsic corresponds to the <c> CRC32B </c> instruction.
204 *
205 *  \param __C
206 *     An unsigned integer operand to add to the CRC-32C checksum of operand
207 *     \a  __D.
208 *  \param __D
209 *     An unsigned 8-bit integer operand used to compute the CRC-32C checksum.
210 *  \returns The result of adding operand \a __C to the CRC-32C checksum of
211 *     operand \a __D.
212 */
213static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__, __target__("sse4.2")))
214__crc32b(unsigned int __C, unsigned char __D)
215{
216  return __builtin_ia32_crc32qi(__C, __D);
217}
218
219/** Adds the unsigned integer operand to the CRC-32C checksum of the
220 *     unsigned short operand.
221 *
222 *  \headerfile <x86intrin.h>
223 *
224 *  This intrinsic corresponds to the <c> CRC32W </c> instruction.
225 *
226 *  \param __C
227 *     An unsigned integer operand to add to the CRC-32C checksum of operand
228 *     \a  __D.
229 *  \param __D
230 *     An unsigned 16-bit integer operand used to compute the CRC-32C checksum.
231 *  \returns The result of adding operand \a __C to the CRC-32C checksum of
232 *     operand \a __D.
233 */
234static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__, __target__("sse4.2")))
235__crc32w(unsigned int __C, unsigned short __D)
236{
237  return __builtin_ia32_crc32hi(__C, __D);
238}
239
240/** Adds the unsigned integer operand to the CRC-32C checksum of the
241 *     second unsigned integer operand.
242 *
243 *  \headerfile <x86intrin.h>
244 *
245 *  This intrinsic corresponds to the <c> CRC32D </c> instruction.
246 *
247 *  \param __C
248 *     An unsigned integer operand to add to the CRC-32C checksum of operand
249 *     \a  __D.
250 *  \param __D
251 *     An unsigned 32-bit integer operand used to compute the CRC-32C checksum.
252 *  \returns The result of adding operand \a __C to the CRC-32C checksum of
253 *     operand \a __D.
254 */
255static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__, __target__("sse4.2")))
256__crc32d(unsigned int __C, unsigned int __D)
257{
258  return __builtin_ia32_crc32si(__C, __D);
259}
260
261#ifdef __x86_64__
262/** Adds the unsigned integer operand to the CRC-32C checksum of the
263 *     unsigned 64-bit integer operand.
264 *
265 *  \headerfile <x86intrin.h>
266 *
267 *  This intrinsic corresponds to the <c> CRC32Q </c> instruction.
268 *
269 *  \param __C
270 *     An unsigned integer operand to add to the CRC-32C checksum of operand
271 *     \a  __D.
272 *  \param __D
273 *     An unsigned 64-bit integer operand used to compute the CRC-32C checksum.
274 *  \returns The result of adding operand \a __C to the CRC-32C checksum of
275 *     operand \a __D.
276 */
277static __inline__ unsigned long long __attribute__((__always_inline__, __nodebug__, __target__("sse4.2")))
278__crc32q(unsigned long long __C, unsigned long long __D)
279{
280  return __builtin_ia32_crc32di(__C, __D);
281}
282#endif /* __x86_64__ */
283
284static __inline__ unsigned long long __attribute__((__always_inline__, __nodebug__))
285__rdpmc(int __A) {
286  return __builtin_ia32_rdpmc(__A);
287}
288
289/* __rdtscp */
290static __inline__ unsigned long long __attribute__((__always_inline__, __nodebug__))
291__rdtscp(unsigned int *__A) {
292  return __builtin_ia32_rdtscp(__A);
293}
294
295#define _rdtsc() __rdtsc()
296
297#define _rdpmc(A) __rdpmc(A)
298
299static __inline__ void __attribute__((__always_inline__, __nodebug__))
300_wbinvd(void) {
301  __builtin_ia32_wbinvd();
302}
303
304static __inline__ unsigned char __attribute__((__always_inline__, __nodebug__))
305__rolb(unsigned char __X, int __C) {
306  return __builtin_rotateleft8(__X, __C);
307}
308
309static __inline__ unsigned char __attribute__((__always_inline__, __nodebug__))
310__rorb(unsigned char __X, int __C) {
311  return __builtin_rotateright8(__X, __C);
312}
313
314static __inline__ unsigned short __attribute__((__always_inline__, __nodebug__))
315__rolw(unsigned short __X, int __C) {
316  return __builtin_rotateleft16(__X, __C);
317}
318
319static __inline__ unsigned short __attribute__((__always_inline__, __nodebug__))
320__rorw(unsigned short __X, int __C) {
321  return __builtin_rotateright16(__X, __C);
322}
323
324static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__))
325__rold(unsigned int __X, int __C) {
326  return __builtin_rotateleft32(__X, __C);
327}
328
329static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__))
330__rord(unsigned int __X, int __C) {
331  return __builtin_rotateright32(__X, __C);
332}
333
334#ifdef __x86_64__
335static __inline__ unsigned long long __attribute__((__always_inline__, __nodebug__))
336__rolq(unsigned long long __X, int __C) {
337  return __builtin_rotateleft64(__X, __C);
338}
339
340static __inline__ unsigned long long __attribute__((__always_inline__, __nodebug__))
341__rorq(unsigned long long __X, int __C) {
342  return __builtin_rotateright64(__X, __C);
343}
344#endif /* __x86_64__ */
345
346#ifndef _MSC_VER
347/* These are already provided as builtins for MSVC. */
348/* Select the correct function based on the size of long. */
349#ifdef __LP64__
350#define _lrotl(a,b) __rolq((a), (b))
351#define _lrotr(a,b) __rorq((a), (b))
352#else
353#define _lrotl(a,b) __rold((a), (b))
354#define _lrotr(a,b) __rord((a), (b))
355#endif
356#define _rotl(a,b) __rold((a), (b))
357#define _rotr(a,b) __rord((a), (b))
358#endif // _MSC_VER
359
360/* These are not builtins so need to be provided in all modes. */
361#define _rotwl(a,b) __rolw((a), (b))
362#define _rotwr(a,b) __rorw((a), (b))
363
364#endif /* __IA32INTRIN_H */
365