immintrin.h revision 314564
1/*===---- immintrin.h - Intel intrinsics -----------------------------------===
2 *
3 * Permission is hereby granted, free of charge, to any person obtaining a copy
4 * of this software and associated documentation files (the "Software"), to deal
5 * in the Software without restriction, including without limitation the rights
6 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7 * copies of the Software, and to permit persons to whom the Software is
8 * furnished to do so, subject to the following conditions:
9 *
10 * The above copyright notice and this permission notice shall be included in
11 * all copies or substantial portions of the Software.
12 *
13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19 * THE SOFTWARE.
20 *
21 *===-----------------------------------------------------------------------===
22 */
23
24#ifndef __IMMINTRIN_H
25#define __IMMINTRIN_H
26
27#if !defined(_MSC_VER) || __has_feature(modules) || defined(__MMX__)
28#include <mmintrin.h>
29#endif
30
31#if !defined(_MSC_VER) || __has_feature(modules) || defined(__SSE__)
32#include <xmmintrin.h>
33#endif
34
35#if !defined(_MSC_VER) || __has_feature(modules) || defined(__SSE2__)
36#include <emmintrin.h>
37#endif
38
39#if !defined(_MSC_VER) || __has_feature(modules) || defined(__SSE3__)
40#include <pmmintrin.h>
41#endif
42
43#if !defined(_MSC_VER) || __has_feature(modules) || defined(__SSSE3__)
44#include <tmmintrin.h>
45#endif
46
47#if !defined(_MSC_VER) || __has_feature(modules) || \
48    (defined(__SSE4_2__) || defined(__SSE4_1__))
49#include <smmintrin.h>
50#endif
51
52#if !defined(_MSC_VER) || __has_feature(modules) || \
53    (defined(__AES__) || defined(__PCLMUL__))
54#include <wmmintrin.h>
55#endif
56
57#if !defined(_MSC_VER) || __has_feature(modules) || defined(__CLFLUSHOPT__)
58#include <clflushoptintrin.h>
59#endif
60
61#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX__)
62#include <avxintrin.h>
63#endif
64
65#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX2__)
66#include <avx2intrin.h>
67
68/* The 256-bit versions of functions in f16cintrin.h.
69   Intel documents these as being in immintrin.h, and
70   they depend on typedefs from avxintrin.h. */
71
72/// \brief Converts a 256-bit vector of [8 x float] into a 128-bit vector
73///    containing 16-bit half-precision float values.
74///
75/// \headerfile <x86intrin.h>
76///
77/// \code
78/// __m128i _mm256_cvtps_ph(__m256 a, const int imm);
79/// \endcode
80///
81/// This intrinsic corresponds to the <c> VCVTPS2PH </c> instruction.
82///
83/// \param a
84///    A 256-bit vector containing 32-bit single-precision float values to be
85///    converted to 16-bit half-precision float values.
86/// \param imm
87///    An immediate value controlling rounding using bits [2:0]: \n
88///    000: Nearest \n
89///    001: Down \n
90///    010: Up \n
91///    011: Truncate \n
92///    1XX: Use MXCSR.RC for rounding
93/// \returns A 128-bit vector containing the converted 16-bit half-precision
94///    float values.
95#define _mm256_cvtps_ph(a, imm) __extension__ ({ \
96 (__m128i)__builtin_ia32_vcvtps2ph256((__v8sf)(__m256)(a), (imm)); })
97
98/// \brief Converts a 128-bit vector containing 16-bit half-precision float
99///    values into a 256-bit vector of [8 x float].
100///
101/// \headerfile <x86intrin.h>
102///
103/// This intrinsic corresponds to the <c> VCVTPH2PS </c> instruction.
104///
105/// \param __a
106///    A 128-bit vector containing 16-bit half-precision float values to be
107///    converted to 32-bit single-precision float values.
108/// \returns A vector of [8 x float] containing the converted 32-bit
109///    single-precision float values.
110static __inline __m256 __attribute__((__always_inline__, __nodebug__, __target__("f16c")))
111_mm256_cvtph_ps(__m128i __a)
112{
113  return (__m256)__builtin_ia32_vcvtph2ps256((__v8hi)__a);
114}
115#endif /* __AVX2__ */
116
117#if !defined(_MSC_VER) || __has_feature(modules) || defined(__BMI__)
118#include <bmiintrin.h>
119#endif
120
121#if !defined(_MSC_VER) || __has_feature(modules) || defined(__BMI2__)
122#include <bmi2intrin.h>
123#endif
124
125#if !defined(_MSC_VER) || __has_feature(modules) || defined(__LZCNT__)
126#include <lzcntintrin.h>
127#endif
128
129#if !defined(_MSC_VER) || __has_feature(modules) || defined(__FMA__)
130#include <fmaintrin.h>
131#endif
132
133#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512F__)
134#include <avx512fintrin.h>
135#endif
136
137#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512VL__)
138#include <avx512vlintrin.h>
139#endif
140
141#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512BW__)
142#include <avx512bwintrin.h>
143#endif
144
145#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512CD__)
146#include <avx512cdintrin.h>
147#endif
148
149#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512DQ__)
150#include <avx512dqintrin.h>
151#endif
152
153#if !defined(_MSC_VER) || __has_feature(modules) || \
154    (defined(__AVX512VL__) && defined(__AVX512BW__))
155#include <avx512vlbwintrin.h>
156#endif
157
158#if !defined(_MSC_VER) || __has_feature(modules) || \
159    (defined(__AVX512VL__) && defined(__AVX512CD__))
160#include <avx512vlcdintrin.h>
161#endif
162
163#if !defined(_MSC_VER) || __has_feature(modules) || \
164    (defined(__AVX512VL__) && defined(__AVX512DQ__))
165#include <avx512vldqintrin.h>
166#endif
167
168#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512ER__)
169#include <avx512erintrin.h>
170#endif
171
172#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512IFMA__)
173#include <avx512ifmaintrin.h>
174#endif
175
176#if !defined(_MSC_VER) || __has_feature(modules) || \
177    (defined(__AVX512IFMA__) && defined(__AVX512VL__))
178#include <avx512ifmavlintrin.h>
179#endif
180
181#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512VBMI__)
182#include <avx512vbmiintrin.h>
183#endif
184
185#if !defined(_MSC_VER) || __has_feature(modules) || \
186    (defined(__AVX512VBMI__) && defined(__AVX512VL__))
187#include <avx512vbmivlintrin.h>
188#endif
189
190#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512PF__)
191#include <avx512pfintrin.h>
192#endif
193
194#if !defined(_MSC_VER) || __has_feature(modules) || defined(__PKU__)
195#include <pkuintrin.h>
196#endif
197
198#if !defined(_MSC_VER) || __has_feature(modules) || defined(__RDRND__)
199static __inline__ int __attribute__((__always_inline__, __nodebug__, __target__("rdrnd")))
200_rdrand16_step(unsigned short *__p)
201{
202  return __builtin_ia32_rdrand16_step(__p);
203}
204
205static __inline__ int __attribute__((__always_inline__, __nodebug__, __target__("rdrnd")))
206_rdrand32_step(unsigned int *__p)
207{
208  return __builtin_ia32_rdrand32_step(__p);
209}
210
211/* __bit_scan_forward */
212static __inline__ int __attribute__((__always_inline__, __nodebug__))
213_bit_scan_forward(int __A) {
214  return __builtin_ctz(__A);
215}
216
217/* __bit_scan_reverse */
218static __inline__ int __attribute__((__always_inline__, __nodebug__))
219_bit_scan_reverse(int __A) {
220  return 31 - __builtin_clz(__A);
221}
222
223#ifdef __x86_64__
224static __inline__ int __attribute__((__always_inline__, __nodebug__, __target__("rdrnd")))
225_rdrand64_step(unsigned long long *__p)
226{
227  return __builtin_ia32_rdrand64_step(__p);
228}
229#endif
230#endif /* __RDRND__ */
231
232#if !defined(_MSC_VER) || __has_feature(modules) || defined(__FSGSBASE__)
233#ifdef __x86_64__
234static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase")))
235_readfsbase_u32(void)
236{
237  return __builtin_ia32_rdfsbase32();
238}
239
240static __inline__ unsigned long long __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase")))
241_readfsbase_u64(void)
242{
243  return __builtin_ia32_rdfsbase64();
244}
245
246static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase")))
247_readgsbase_u32(void)
248{
249  return __builtin_ia32_rdgsbase32();
250}
251
252static __inline__ unsigned long long __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase")))
253_readgsbase_u64(void)
254{
255  return __builtin_ia32_rdgsbase64();
256}
257
258static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase")))
259_writefsbase_u32(unsigned int __V)
260{
261  return __builtin_ia32_wrfsbase32(__V);
262}
263
264static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase")))
265_writefsbase_u64(unsigned long long __V)
266{
267  return __builtin_ia32_wrfsbase64(__V);
268}
269
270static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase")))
271_writegsbase_u32(unsigned int __V)
272{
273  return __builtin_ia32_wrgsbase32(__V);
274}
275
276static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase")))
277_writegsbase_u64(unsigned long long __V)
278{
279  return __builtin_ia32_wrgsbase64(__V);
280}
281
282#endif
283#endif /* __FSGSBASE__ */
284
285#if !defined(_MSC_VER) || __has_feature(modules) || defined(__RTM__)
286#include <rtmintrin.h>
287#include <xtestintrin.h>
288#endif
289
290#if !defined(_MSC_VER) || __has_feature(modules) || defined(__SHA__)
291#include <shaintrin.h>
292#endif
293
294#if !defined(_MSC_VER) || __has_feature(modules) || defined(__FXSR__)
295#include <fxsrintrin.h>
296#endif
297
298#if !defined(_MSC_VER) || __has_feature(modules) || defined(__XSAVE__)
299#include <xsaveintrin.h>
300#endif
301
302#if !defined(_MSC_VER) || __has_feature(modules) || defined(__XSAVEOPT__)
303#include <xsaveoptintrin.h>
304#endif
305
306#if !defined(_MSC_VER) || __has_feature(modules) || defined(__XSAVEC__)
307#include <xsavecintrin.h>
308#endif
309
310#if !defined(_MSC_VER) || __has_feature(modules) || defined(__XSAVES__)
311#include <xsavesintrin.h>
312#endif
313
314/* Some intrinsics inside adxintrin.h are available only on processors with ADX,
315 * whereas others are also available at all times. */
316#include <adxintrin.h>
317
318#endif /* __IMMINTRIN_H */
319