1/* ===-------- ia32intrin.h ---------------------------------------------------===
2 *
3 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 * See https://llvm.org/LICENSE.txt for license information.
5 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 *
7 *===-----------------------------------------------------------------------===
8 */
9
10#ifndef __X86INTRIN_H
11#error "Never use <ia32intrin.h> directly; include <x86intrin.h> instead."
12#endif
13
14#ifndef __IA32INTRIN_H
15#define __IA32INTRIN_H
16
17/* Define the default attributes for the functions in this file. */
18#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__))
19#define __DEFAULT_FN_ATTRS_SSE42 __attribute__((__always_inline__, __nodebug__, __target__("sse4.2")))
20
21#if defined(__cplusplus) && (__cplusplus >= 201103L)
22#define __DEFAULT_FN_ATTRS_CAST __attribute__((__always_inline__)) constexpr
23#define __DEFAULT_FN_ATTRS_CONSTEXPR __DEFAULT_FN_ATTRS constexpr
24#else
25#define __DEFAULT_FN_ATTRS_CAST __attribute__((__always_inline__))
26#define __DEFAULT_FN_ATTRS_CONSTEXPR __DEFAULT_FN_ATTRS
27#endif
28
29/** Find the first set bit starting from the lsb. Result is undefined if
30 *  input is 0.
31 *
32 *  \headerfile <x86intrin.h>
33 *
34 *  This intrinsic corresponds to the <c> BSF </c> instruction or the
35 *  <c> TZCNT </c> instruction.
36 *
37 *  \param __A
38 *     A 32-bit integer operand.
39 *  \returns A 32-bit integer containing the bit number.
40 */
41static __inline__ int __DEFAULT_FN_ATTRS_CONSTEXPR
42__bsfd(int __A) {
43  return __builtin_ctz(__A);
44}
45
46/** Find the first set bit starting from the msb. Result is undefined if
47 *  input is 0.
48 *
49 *  \headerfile <x86intrin.h>
50 *
51 *  This intrinsic corresponds to the <c> BSR </c> instruction or the
52 *  <c> LZCNT </c> instruction and an <c> XOR </c>.
53 *
54 *  \param __A
55 *     A 32-bit integer operand.
56 *  \returns A 32-bit integer containing the bit number.
57 */
58static __inline__ int __DEFAULT_FN_ATTRS_CONSTEXPR
59__bsrd(int __A) {
60  return 31 - __builtin_clz(__A);
61}
62
63/** Swaps the bytes in the input. Converting little endian to big endian or
64 *  vice versa.
65 *
66 *  \headerfile <x86intrin.h>
67 *
68 *  This intrinsic corresponds to the <c> BSWAP </c> instruction.
69 *
70 *  \param __A
71 *     A 32-bit integer operand.
72 *  \returns A 32-bit integer containing the swapped bytes.
73 */
74static __inline__ int __DEFAULT_FN_ATTRS_CONSTEXPR
75__bswapd(int __A) {
76  return __builtin_bswap32(__A);
77}
78
79static __inline__ int __DEFAULT_FN_ATTRS_CONSTEXPR
80_bswap(int __A) {
81  return __builtin_bswap32(__A);
82}
83
84#define _bit_scan_forward(A) __bsfd((A))
85#define _bit_scan_reverse(A) __bsrd((A))
86
87#ifdef __x86_64__
88/** Find the first set bit starting from the lsb. Result is undefined if
89 *  input is 0.
90 *
91 *  \headerfile <x86intrin.h>
92 *
93 *  This intrinsic corresponds to the <c> BSF </c> instruction or the
94 *  <c> TZCNT </c> instruction.
95 *
96 *  \param __A
97 *     A 64-bit integer operand.
98 *  \returns A 32-bit integer containing the bit number.
99 */
100static __inline__ int __DEFAULT_FN_ATTRS_CONSTEXPR
101__bsfq(long long __A) {
102  return __builtin_ctzll(__A);
103}
104
105/** Find the first set bit starting from the msb. Result is undefined if
106 *  input is 0.
107 *
108 *  \headerfile <x86intrin.h>
109 *
110 *  This intrinsic corresponds to the <c> BSR </c> instruction or the
111 *  <c> LZCNT </c> instruction and an <c> XOR </c>.
112 *
113 *  \param __A
114 *     A 64-bit integer operand.
115 *  \returns A 32-bit integer containing the bit number.
116 */
117static __inline__ int __DEFAULT_FN_ATTRS_CONSTEXPR
118__bsrq(long long __A) {
119  return 63 - __builtin_clzll(__A);
120}
121
122/** Swaps the bytes in the input. Converting little endian to big endian or
123 *  vice versa.
124 *
125 *  \headerfile <x86intrin.h>
126 *
127 *  This intrinsic corresponds to the <c> BSWAP </c> instruction.
128 *
129 *  \param __A
130 *     A 64-bit integer operand.
131 *  \returns A 64-bit integer containing the swapped bytes.
132 */
133static __inline__ long long __DEFAULT_FN_ATTRS_CONSTEXPR
134__bswapq(long long __A) {
135  return __builtin_bswap64(__A);
136}
137
138#define _bswap64(A) __bswapq((A))
139#endif
140
141/** Counts the number of bits in the source operand having a value of 1.
142 *
143 *  \headerfile <x86intrin.h>
144 *
145 *  This intrinsic corresponds to the <c> POPCNT </c> instruction or a
146 *  a sequence of arithmetic and logic ops to calculate it.
147 *
148 *  \param __A
149 *     An unsigned 32-bit integer operand.
150 *  \returns A 32-bit integer containing the number of bits with value 1 in the
151 *     source operand.
152 */
153static __inline__ int __DEFAULT_FN_ATTRS_CONSTEXPR
154__popcntd(unsigned int __A)
155{
156  return __builtin_popcount(__A);
157}
158
159#define _popcnt32(A) __popcntd((A))
160
161#ifdef __x86_64__
162/** Counts the number of bits in the source operand having a value of 1.
163 *
164 *  \headerfile <x86intrin.h>
165 *
166 *  This intrinsic corresponds to the <c> POPCNT </c> instruction or a
167 *  a sequence of arithmetic and logic ops to calculate it.
168 *
169 *  \param __A
170 *     An unsigned 64-bit integer operand.
171 *  \returns A 64-bit integer containing the number of bits with value 1 in the
172 *     source operand.
173 */
174static __inline__ long long __DEFAULT_FN_ATTRS_CONSTEXPR
175__popcntq(unsigned long long __A)
176{
177  return __builtin_popcountll(__A);
178}
179
180#define _popcnt64(A) __popcntq((A))
181#endif /* __x86_64__ */
182
183#ifdef __x86_64__
184static __inline__ unsigned long long __DEFAULT_FN_ATTRS
185__readeflags(void)
186{
187  return __builtin_ia32_readeflags_u64();
188}
189
190static __inline__ void __DEFAULT_FN_ATTRS
191__writeeflags(unsigned long long __f)
192{
193  __builtin_ia32_writeeflags_u64(__f);
194}
195
196#else /* !__x86_64__ */
197static __inline__ unsigned int __DEFAULT_FN_ATTRS
198__readeflags(void)
199{
200  return __builtin_ia32_readeflags_u32();
201}
202
203static __inline__ void __DEFAULT_FN_ATTRS
204__writeeflags(unsigned int __f)
205{
206  __builtin_ia32_writeeflags_u32(__f);
207}
208#endif /* !__x86_64__ */
209
210/** Cast a 32-bit float value to a 32-bit unsigned integer value
211 *
212 *  \headerfile <x86intrin.h>
213 *  This intrinsic corresponds to the <c> VMOVD / MOVD </c> instruction in x86_64,
214 *  and corresponds to the <c> VMOVL / MOVL </c> instruction in ia32.
215 *
216 *  \param __A
217 *     A 32-bit float value.
218 *  \returns a 32-bit unsigned integer containing the converted value.
219 */
220static __inline__ unsigned int __DEFAULT_FN_ATTRS_CAST
221_castf32_u32(float __A) {
222  return __builtin_bit_cast(unsigned int, __A);
223}
224
225/** Cast a 64-bit float value to a 64-bit unsigned integer value
226 *
227 *  \headerfile <x86intrin.h>
228 *  This intrinsic corresponds to the <c> VMOVQ / MOVQ </c> instruction in x86_64,
229 *  and corresponds to the <c> VMOVL / MOVL </c> instruction in ia32.
230 *
231 *  \param __A
232 *     A 64-bit float value.
233 *  \returns a 64-bit unsigned integer containing the converted value.
234 */
235static __inline__ unsigned long long __DEFAULT_FN_ATTRS_CAST
236_castf64_u64(double __A) {
237  return __builtin_bit_cast(unsigned long long, __A);
238}
239
240/** Cast a 32-bit unsigned integer value to a 32-bit float value
241 *
242 *  \headerfile <x86intrin.h>
243 *  This intrinsic corresponds to the <c> VMOVQ / MOVQ </c> instruction in x86_64,
244 *  and corresponds to the <c> FLDS </c> instruction in ia32.
245 *
246 *  \param __A
247 *     A 32-bit unsigned integer value.
248 *  \returns a 32-bit float value containing the converted value.
249 */
250static __inline__ float __DEFAULT_FN_ATTRS_CAST
251_castu32_f32(unsigned int __A) {
252  return __builtin_bit_cast(float, __A);
253}
254
255/** Cast a 64-bit unsigned integer value to a 64-bit float value
256 *
257 *  \headerfile <x86intrin.h>
258 *  This intrinsic corresponds to the <c> VMOVQ / MOVQ </c> instruction in x86_64,
259 *  and corresponds to the <c> FLDL </c> instruction in ia32.
260 *
261 *  \param __A
262 *     A 64-bit unsigned integer value.
263 *  \returns a 64-bit float value containing the converted value.
264 */
265static __inline__ double __DEFAULT_FN_ATTRS_CAST
266_castu64_f64(unsigned long long __A) {
267  return __builtin_bit_cast(double, __A);
268}
269
270/** Adds the unsigned integer operand to the CRC-32C checksum of the
271 *     unsigned char operand.
272 *
273 *  \headerfile <x86intrin.h>
274 *
275 *  This intrinsic corresponds to the <c> CRC32B </c> instruction.
276 *
277 *  \param __C
278 *     An unsigned integer operand to add to the CRC-32C checksum of operand
279 *     \a  __D.
280 *  \param __D
281 *     An unsigned 8-bit integer operand used to compute the CRC-32C checksum.
282 *  \returns The result of adding operand \a __C to the CRC-32C checksum of
283 *     operand \a __D.
284 */
285static __inline__ unsigned int __DEFAULT_FN_ATTRS_SSE42
286__crc32b(unsigned int __C, unsigned char __D)
287{
288  return __builtin_ia32_crc32qi(__C, __D);
289}
290
291/** Adds the unsigned integer operand to the CRC-32C checksum of the
292 *     unsigned short operand.
293 *
294 *  \headerfile <x86intrin.h>
295 *
296 *  This intrinsic corresponds to the <c> CRC32W </c> instruction.
297 *
298 *  \param __C
299 *     An unsigned integer operand to add to the CRC-32C checksum of operand
300 *     \a  __D.
301 *  \param __D
302 *     An unsigned 16-bit integer operand used to compute the CRC-32C checksum.
303 *  \returns The result of adding operand \a __C to the CRC-32C checksum of
304 *     operand \a __D.
305 */
306static __inline__ unsigned int __DEFAULT_FN_ATTRS_SSE42
307__crc32w(unsigned int __C, unsigned short __D)
308{
309  return __builtin_ia32_crc32hi(__C, __D);
310}
311
312/** Adds the unsigned integer operand to the CRC-32C checksum of the
313 *     second unsigned integer operand.
314 *
315 *  \headerfile <x86intrin.h>
316 *
317 *  This intrinsic corresponds to the <c> CRC32D </c> instruction.
318 *
319 *  \param __C
320 *     An unsigned integer operand to add to the CRC-32C checksum of operand
321 *     \a  __D.
322 *  \param __D
323 *     An unsigned 32-bit integer operand used to compute the CRC-32C checksum.
324 *  \returns The result of adding operand \a __C to the CRC-32C checksum of
325 *     operand \a __D.
326 */
327static __inline__ unsigned int __DEFAULT_FN_ATTRS_SSE42
328__crc32d(unsigned int __C, unsigned int __D)
329{
330  return __builtin_ia32_crc32si(__C, __D);
331}
332
333#ifdef __x86_64__
334/** Adds the unsigned integer operand to the CRC-32C checksum of the
335 *     unsigned 64-bit integer operand.
336 *
337 *  \headerfile <x86intrin.h>
338 *
339 *  This intrinsic corresponds to the <c> CRC32Q </c> instruction.
340 *
341 *  \param __C
342 *     An unsigned integer operand to add to the CRC-32C checksum of operand
343 *     \a  __D.
344 *  \param __D
345 *     An unsigned 64-bit integer operand used to compute the CRC-32C checksum.
346 *  \returns The result of adding operand \a __C to the CRC-32C checksum of
347 *     operand \a __D.
348 */
349static __inline__ unsigned long long __DEFAULT_FN_ATTRS_SSE42
350__crc32q(unsigned long long __C, unsigned long long __D)
351{
352  return __builtin_ia32_crc32di(__C, __D);
353}
354#endif /* __x86_64__ */
355
356static __inline__ unsigned long long __DEFAULT_FN_ATTRS
357__rdpmc(int __A) {
358  return __builtin_ia32_rdpmc(__A);
359}
360
361/* __rdtscp */
362static __inline__ unsigned long long __DEFAULT_FN_ATTRS
363__rdtscp(unsigned int *__A) {
364  return __builtin_ia32_rdtscp(__A);
365}
366
367#define _rdtsc() __rdtsc()
368
369#define _rdpmc(A) __rdpmc(A)
370
371static __inline__ void __DEFAULT_FN_ATTRS
372_wbinvd(void) {
373  __builtin_ia32_wbinvd();
374}
375
376static __inline__ unsigned char __DEFAULT_FN_ATTRS_CONSTEXPR
377__rolb(unsigned char __X, int __C) {
378  return __builtin_rotateleft8(__X, __C);
379}
380
381static __inline__ unsigned char __DEFAULT_FN_ATTRS_CONSTEXPR
382__rorb(unsigned char __X, int __C) {
383  return __builtin_rotateright8(__X, __C);
384}
385
386static __inline__ unsigned short __DEFAULT_FN_ATTRS_CONSTEXPR
387__rolw(unsigned short __X, int __C) {
388  return __builtin_rotateleft16(__X, __C);
389}
390
391static __inline__ unsigned short __DEFAULT_FN_ATTRS_CONSTEXPR
392__rorw(unsigned short __X, int __C) {
393  return __builtin_rotateright16(__X, __C);
394}
395
396static __inline__ unsigned int __DEFAULT_FN_ATTRS_CONSTEXPR
397__rold(unsigned int __X, int __C) {
398  return __builtin_rotateleft32(__X, __C);
399}
400
401static __inline__ unsigned int __DEFAULT_FN_ATTRS_CONSTEXPR
402__rord(unsigned int __X, int __C) {
403  return __builtin_rotateright32(__X, __C);
404}
405
406#ifdef __x86_64__
407static __inline__ unsigned long long __DEFAULT_FN_ATTRS_CONSTEXPR
408__rolq(unsigned long long __X, int __C) {
409  return __builtin_rotateleft64(__X, __C);
410}
411
412static __inline__ unsigned long long __DEFAULT_FN_ATTRS_CONSTEXPR
413__rorq(unsigned long long __X, int __C) {
414  return __builtin_rotateright64(__X, __C);
415}
416#endif /* __x86_64__ */
417
418#ifndef _MSC_VER
419/* These are already provided as builtins for MSVC. */
420/* Select the correct function based on the size of long. */
421#ifdef __LP64__
422#define _lrotl(a,b) __rolq((a), (b))
423#define _lrotr(a,b) __rorq((a), (b))
424#else
425#define _lrotl(a,b) __rold((a), (b))
426#define _lrotr(a,b) __rord((a), (b))
427#endif
428#define _rotl(a,b) __rold((a), (b))
429#define _rotr(a,b) __rord((a), (b))
430#endif // _MSC_VER
431
432/* These are not builtins so need to be provided in all modes. */
433#define _rotwl(a,b) __rolw((a), (b))
434#define _rotwr(a,b) __rorw((a), (b))
435
436#undef __DEFAULT_FN_ATTRS
437#undef __DEFAULT_FN_ATTRS_CAST
438#undef __DEFAULT_FN_ATTRS_SSE42
439#undef __DEFAULT_FN_ATTRS_CONSTEXPR
440
441#endif /* __IA32INTRIN_H */
442