immintrin.h revision 314564
1/*===---- immintrin.h - Intel intrinsics -----------------------------------=== 2 * 3 * Permission is hereby granted, free of charge, to any person obtaining a copy 4 * of this software and associated documentation files (the "Software"), to deal 5 * in the Software without restriction, including without limitation the rights 6 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 * copies of the Software, and to permit persons to whom the Software is 8 * furnished to do so, subject to the following conditions: 9 * 10 * The above copyright notice and this permission notice shall be included in 11 * all copies or substantial portions of the Software. 12 * 13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 * THE SOFTWARE. 20 * 21 *===-----------------------------------------------------------------------=== 22 */ 23 24#ifndef __IMMINTRIN_H 25#define __IMMINTRIN_H 26 27#if !defined(_MSC_VER) || __has_feature(modules) || defined(__MMX__) 28#include <mmintrin.h> 29#endif 30 31#if !defined(_MSC_VER) || __has_feature(modules) || defined(__SSE__) 32#include <xmmintrin.h> 33#endif 34 35#if !defined(_MSC_VER) || __has_feature(modules) || defined(__SSE2__) 36#include <emmintrin.h> 37#endif 38 39#if !defined(_MSC_VER) || __has_feature(modules) || defined(__SSE3__) 40#include <pmmintrin.h> 41#endif 42 43#if !defined(_MSC_VER) || __has_feature(modules) || defined(__SSSE3__) 44#include <tmmintrin.h> 45#endif 46 47#if !defined(_MSC_VER) || __has_feature(modules) || \ 48 (defined(__SSE4_2__) || defined(__SSE4_1__)) 49#include <smmintrin.h> 50#endif 51 52#if !defined(_MSC_VER) || __has_feature(modules) || \ 53 (defined(__AES__) || defined(__PCLMUL__)) 54#include <wmmintrin.h> 55#endif 56 57#if !defined(_MSC_VER) || __has_feature(modules) || defined(__CLFLUSHOPT__) 58#include <clflushoptintrin.h> 59#endif 60 61#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX__) 62#include <avxintrin.h> 63#endif 64 65#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX2__) 66#include <avx2intrin.h> 67 68/* The 256-bit versions of functions in f16cintrin.h. 69 Intel documents these as being in immintrin.h, and 70 they depend on typedefs from avxintrin.h. */ 71 72/// \brief Converts a 256-bit vector of [8 x float] into a 128-bit vector 73/// containing 16-bit half-precision float values. 74/// 75/// \headerfile <x86intrin.h> 76/// 77/// \code 78/// __m128i _mm256_cvtps_ph(__m256 a, const int imm); 79/// \endcode 80/// 81/// This intrinsic corresponds to the <c> VCVTPS2PH </c> instruction. 82/// 83/// \param a 84/// A 256-bit vector containing 32-bit single-precision float values to be 85/// converted to 16-bit half-precision float values. 86/// \param imm 87/// An immediate value controlling rounding using bits [2:0]: \n 88/// 000: Nearest \n 89/// 001: Down \n 90/// 010: Up \n 91/// 011: Truncate \n 92/// 1XX: Use MXCSR.RC for rounding 93/// \returns A 128-bit vector containing the converted 16-bit half-precision 94/// float values. 95#define _mm256_cvtps_ph(a, imm) __extension__ ({ \ 96 (__m128i)__builtin_ia32_vcvtps2ph256((__v8sf)(__m256)(a), (imm)); }) 97 98/// \brief Converts a 128-bit vector containing 16-bit half-precision float 99/// values into a 256-bit vector of [8 x float]. 100/// 101/// \headerfile <x86intrin.h> 102/// 103/// This intrinsic corresponds to the <c> VCVTPH2PS </c> instruction. 104/// 105/// \param __a 106/// A 128-bit vector containing 16-bit half-precision float values to be 107/// converted to 32-bit single-precision float values. 108/// \returns A vector of [8 x float] containing the converted 32-bit 109/// single-precision float values. 110static __inline __m256 __attribute__((__always_inline__, __nodebug__, __target__("f16c"))) 111_mm256_cvtph_ps(__m128i __a) 112{ 113 return (__m256)__builtin_ia32_vcvtph2ps256((__v8hi)__a); 114} 115#endif /* __AVX2__ */ 116 117#if !defined(_MSC_VER) || __has_feature(modules) || defined(__BMI__) 118#include <bmiintrin.h> 119#endif 120 121#if !defined(_MSC_VER) || __has_feature(modules) || defined(__BMI2__) 122#include <bmi2intrin.h> 123#endif 124 125#if !defined(_MSC_VER) || __has_feature(modules) || defined(__LZCNT__) 126#include <lzcntintrin.h> 127#endif 128 129#if !defined(_MSC_VER) || __has_feature(modules) || defined(__FMA__) 130#include <fmaintrin.h> 131#endif 132 133#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512F__) 134#include <avx512fintrin.h> 135#endif 136 137#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512VL__) 138#include <avx512vlintrin.h> 139#endif 140 141#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512BW__) 142#include <avx512bwintrin.h> 143#endif 144 145#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512CD__) 146#include <avx512cdintrin.h> 147#endif 148 149#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512DQ__) 150#include <avx512dqintrin.h> 151#endif 152 153#if !defined(_MSC_VER) || __has_feature(modules) || \ 154 (defined(__AVX512VL__) && defined(__AVX512BW__)) 155#include <avx512vlbwintrin.h> 156#endif 157 158#if !defined(_MSC_VER) || __has_feature(modules) || \ 159 (defined(__AVX512VL__) && defined(__AVX512CD__)) 160#include <avx512vlcdintrin.h> 161#endif 162 163#if !defined(_MSC_VER) || __has_feature(modules) || \ 164 (defined(__AVX512VL__) && defined(__AVX512DQ__)) 165#include <avx512vldqintrin.h> 166#endif 167 168#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512ER__) 169#include <avx512erintrin.h> 170#endif 171 172#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512IFMA__) 173#include <avx512ifmaintrin.h> 174#endif 175 176#if !defined(_MSC_VER) || __has_feature(modules) || \ 177 (defined(__AVX512IFMA__) && defined(__AVX512VL__)) 178#include <avx512ifmavlintrin.h> 179#endif 180 181#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512VBMI__) 182#include <avx512vbmiintrin.h> 183#endif 184 185#if !defined(_MSC_VER) || __has_feature(modules) || \ 186 (defined(__AVX512VBMI__) && defined(__AVX512VL__)) 187#include <avx512vbmivlintrin.h> 188#endif 189 190#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512PF__) 191#include <avx512pfintrin.h> 192#endif 193 194#if !defined(_MSC_VER) || __has_feature(modules) || defined(__PKU__) 195#include <pkuintrin.h> 196#endif 197 198#if !defined(_MSC_VER) || __has_feature(modules) || defined(__RDRND__) 199static __inline__ int __attribute__((__always_inline__, __nodebug__, __target__("rdrnd"))) 200_rdrand16_step(unsigned short *__p) 201{ 202 return __builtin_ia32_rdrand16_step(__p); 203} 204 205static __inline__ int __attribute__((__always_inline__, __nodebug__, __target__("rdrnd"))) 206_rdrand32_step(unsigned int *__p) 207{ 208 return __builtin_ia32_rdrand32_step(__p); 209} 210 211/* __bit_scan_forward */ 212static __inline__ int __attribute__((__always_inline__, __nodebug__)) 213_bit_scan_forward(int __A) { 214 return __builtin_ctz(__A); 215} 216 217/* __bit_scan_reverse */ 218static __inline__ int __attribute__((__always_inline__, __nodebug__)) 219_bit_scan_reverse(int __A) { 220 return 31 - __builtin_clz(__A); 221} 222 223#ifdef __x86_64__ 224static __inline__ int __attribute__((__always_inline__, __nodebug__, __target__("rdrnd"))) 225_rdrand64_step(unsigned long long *__p) 226{ 227 return __builtin_ia32_rdrand64_step(__p); 228} 229#endif 230#endif /* __RDRND__ */ 231 232#if !defined(_MSC_VER) || __has_feature(modules) || defined(__FSGSBASE__) 233#ifdef __x86_64__ 234static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase"))) 235_readfsbase_u32(void) 236{ 237 return __builtin_ia32_rdfsbase32(); 238} 239 240static __inline__ unsigned long long __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase"))) 241_readfsbase_u64(void) 242{ 243 return __builtin_ia32_rdfsbase64(); 244} 245 246static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase"))) 247_readgsbase_u32(void) 248{ 249 return __builtin_ia32_rdgsbase32(); 250} 251 252static __inline__ unsigned long long __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase"))) 253_readgsbase_u64(void) 254{ 255 return __builtin_ia32_rdgsbase64(); 256} 257 258static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase"))) 259_writefsbase_u32(unsigned int __V) 260{ 261 return __builtin_ia32_wrfsbase32(__V); 262} 263 264static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase"))) 265_writefsbase_u64(unsigned long long __V) 266{ 267 return __builtin_ia32_wrfsbase64(__V); 268} 269 270static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase"))) 271_writegsbase_u32(unsigned int __V) 272{ 273 return __builtin_ia32_wrgsbase32(__V); 274} 275 276static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase"))) 277_writegsbase_u64(unsigned long long __V) 278{ 279 return __builtin_ia32_wrgsbase64(__V); 280} 281 282#endif 283#endif /* __FSGSBASE__ */ 284 285#if !defined(_MSC_VER) || __has_feature(modules) || defined(__RTM__) 286#include <rtmintrin.h> 287#include <xtestintrin.h> 288#endif 289 290#if !defined(_MSC_VER) || __has_feature(modules) || defined(__SHA__) 291#include <shaintrin.h> 292#endif 293 294#if !defined(_MSC_VER) || __has_feature(modules) || defined(__FXSR__) 295#include <fxsrintrin.h> 296#endif 297 298#if !defined(_MSC_VER) || __has_feature(modules) || defined(__XSAVE__) 299#include <xsaveintrin.h> 300#endif 301 302#if !defined(_MSC_VER) || __has_feature(modules) || defined(__XSAVEOPT__) 303#include <xsaveoptintrin.h> 304#endif 305 306#if !defined(_MSC_VER) || __has_feature(modules) || defined(__XSAVEC__) 307#include <xsavecintrin.h> 308#endif 309 310#if !defined(_MSC_VER) || __has_feature(modules) || defined(__XSAVES__) 311#include <xsavesintrin.h> 312#endif 313 314/* Some intrinsics inside adxintrin.h are available only on processors with ADX, 315 * whereas others are also available at all times. */ 316#include <adxintrin.h> 317 318#endif /* __IMMINTRIN_H */ 319