avx512fintrin.h revision 321369
1296417Sdim/*===---- avx512fintrin.h - AVX512F intrinsics -----------------------------=== 2277325Sdim * 3277325Sdim * Permission is hereby granted, free of charge, to any person obtaining a copy 4277325Sdim * of this software and associated documentation files (the "Software"), to deal 5277325Sdim * in the Software without restriction, including without limitation the rights 6277325Sdim * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7277325Sdim * copies of the Software, and to permit persons to whom the Software is 8277325Sdim * furnished to do so, subject to the following conditions: 9277325Sdim * 10277325Sdim * The above copyright notice and this permission notice shall be included in 11277325Sdim * all copies or substantial portions of the Software. 12277325Sdim * 13277325Sdim * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14277325Sdim * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15277325Sdim * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16277325Sdim * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17277325Sdim * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18277325Sdim * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19277325Sdim * THE SOFTWARE. 20277325Sdim * 21277325Sdim *===-----------------------------------------------------------------------=== 22277325Sdim */ 23277325Sdim#ifndef __IMMINTRIN_H 24277325Sdim#error "Never use <avx512fintrin.h> directly; include <immintrin.h> instead." 25277325Sdim#endif 26277325Sdim 27277325Sdim#ifndef __AVX512FINTRIN_H 28277325Sdim#define __AVX512FINTRIN_H 29277325Sdim 30309124Sdimtypedef char __v64qi __attribute__((__vector_size__(64))); 31309124Sdimtypedef short __v32hi __attribute__((__vector_size__(64))); 32277325Sdimtypedef double __v8df __attribute__((__vector_size__(64))); 33277325Sdimtypedef float __v16sf __attribute__((__vector_size__(64))); 34277325Sdimtypedef long long __v8di __attribute__((__vector_size__(64))); 35277325Sdimtypedef int __v16si __attribute__((__vector_size__(64))); 36277325Sdim 37309124Sdim/* Unsigned types */ 38309124Sdimtypedef unsigned char __v64qu __attribute__((__vector_size__(64))); 39309124Sdimtypedef unsigned short __v32hu __attribute__((__vector_size__(64))); 40309124Sdimtypedef unsigned long long __v8du __attribute__((__vector_size__(64))); 41309124Sdimtypedef unsigned int __v16su __attribute__((__vector_size__(64))); 42309124Sdim 43277325Sdimtypedef float __m512 __attribute__((__vector_size__(64))); 44277325Sdimtypedef double __m512d __attribute__((__vector_size__(64))); 45277325Sdimtypedef long long __m512i __attribute__((__vector_size__(64))); 46277325Sdim 47277325Sdimtypedef unsigned char __mmask8; 48277325Sdimtypedef unsigned short __mmask16; 49277325Sdim 50277325Sdim/* Rounding mode macros. */ 51277325Sdim#define _MM_FROUND_TO_NEAREST_INT 0x00 52277325Sdim#define _MM_FROUND_TO_NEG_INF 0x01 53277325Sdim#define _MM_FROUND_TO_POS_INF 0x02 54277325Sdim#define _MM_FROUND_TO_ZERO 0x03 55277325Sdim#define _MM_FROUND_CUR_DIRECTION 0x04 56277325Sdim 57314564Sdim/* Constants for integer comparison predicates */ 58314564Sdimtypedef enum { 59314564Sdim _MM_CMPINT_EQ, /* Equal */ 60314564Sdim _MM_CMPINT_LT, /* Less than */ 61314564Sdim _MM_CMPINT_LE, /* Less than or Equal */ 62314564Sdim _MM_CMPINT_UNUSED, 63314564Sdim _MM_CMPINT_NE, /* Not Equal */ 64314564Sdim _MM_CMPINT_NLT, /* Not Less than */ 65314564Sdim#define _MM_CMPINT_GE _MM_CMPINT_NLT /* Greater than or Equal */ 66314564Sdim _MM_CMPINT_NLE /* Not Less than or Equal */ 67314564Sdim#define _MM_CMPINT_GT _MM_CMPINT_NLE /* Greater than */ 68314564Sdim} _MM_CMPINT_ENUM; 69314564Sdim 70309124Sdimtypedef enum 71309124Sdim{ 72309124Sdim _MM_PERM_AAAA = 0x00, _MM_PERM_AAAB = 0x01, _MM_PERM_AAAC = 0x02, 73309124Sdim _MM_PERM_AAAD = 0x03, _MM_PERM_AABA = 0x04, _MM_PERM_AABB = 0x05, 74309124Sdim _MM_PERM_AABC = 0x06, _MM_PERM_AABD = 0x07, _MM_PERM_AACA = 0x08, 75309124Sdim _MM_PERM_AACB = 0x09, _MM_PERM_AACC = 0x0A, _MM_PERM_AACD = 0x0B, 76309124Sdim _MM_PERM_AADA = 0x0C, _MM_PERM_AADB = 0x0D, _MM_PERM_AADC = 0x0E, 77309124Sdim _MM_PERM_AADD = 0x0F, _MM_PERM_ABAA = 0x10, _MM_PERM_ABAB = 0x11, 78309124Sdim _MM_PERM_ABAC = 0x12, _MM_PERM_ABAD = 0x13, _MM_PERM_ABBA = 0x14, 79309124Sdim _MM_PERM_ABBB = 0x15, _MM_PERM_ABBC = 0x16, _MM_PERM_ABBD = 0x17, 80309124Sdim _MM_PERM_ABCA = 0x18, _MM_PERM_ABCB = 0x19, _MM_PERM_ABCC = 0x1A, 81309124Sdim _MM_PERM_ABCD = 0x1B, _MM_PERM_ABDA = 0x1C, _MM_PERM_ABDB = 0x1D, 82309124Sdim _MM_PERM_ABDC = 0x1E, _MM_PERM_ABDD = 0x1F, _MM_PERM_ACAA = 0x20, 83309124Sdim _MM_PERM_ACAB = 0x21, _MM_PERM_ACAC = 0x22, _MM_PERM_ACAD = 0x23, 84309124Sdim _MM_PERM_ACBA = 0x24, _MM_PERM_ACBB = 0x25, _MM_PERM_ACBC = 0x26, 85309124Sdim _MM_PERM_ACBD = 0x27, _MM_PERM_ACCA = 0x28, _MM_PERM_ACCB = 0x29, 86309124Sdim _MM_PERM_ACCC = 0x2A, _MM_PERM_ACCD = 0x2B, _MM_PERM_ACDA = 0x2C, 87309124Sdim _MM_PERM_ACDB = 0x2D, _MM_PERM_ACDC = 0x2E, _MM_PERM_ACDD = 0x2F, 88309124Sdim _MM_PERM_ADAA = 0x30, _MM_PERM_ADAB = 0x31, _MM_PERM_ADAC = 0x32, 89309124Sdim _MM_PERM_ADAD = 0x33, _MM_PERM_ADBA = 0x34, _MM_PERM_ADBB = 0x35, 90309124Sdim _MM_PERM_ADBC = 0x36, _MM_PERM_ADBD = 0x37, _MM_PERM_ADCA = 0x38, 91309124Sdim _MM_PERM_ADCB = 0x39, _MM_PERM_ADCC = 0x3A, _MM_PERM_ADCD = 0x3B, 92309124Sdim _MM_PERM_ADDA = 0x3C, _MM_PERM_ADDB = 0x3D, _MM_PERM_ADDC = 0x3E, 93309124Sdim _MM_PERM_ADDD = 0x3F, _MM_PERM_BAAA = 0x40, _MM_PERM_BAAB = 0x41, 94309124Sdim _MM_PERM_BAAC = 0x42, _MM_PERM_BAAD = 0x43, _MM_PERM_BABA = 0x44, 95309124Sdim _MM_PERM_BABB = 0x45, _MM_PERM_BABC = 0x46, _MM_PERM_BABD = 0x47, 96309124Sdim _MM_PERM_BACA = 0x48, _MM_PERM_BACB = 0x49, _MM_PERM_BACC = 0x4A, 97309124Sdim _MM_PERM_BACD = 0x4B, _MM_PERM_BADA = 0x4C, _MM_PERM_BADB = 0x4D, 98309124Sdim _MM_PERM_BADC = 0x4E, _MM_PERM_BADD = 0x4F, _MM_PERM_BBAA = 0x50, 99309124Sdim _MM_PERM_BBAB = 0x51, _MM_PERM_BBAC = 0x52, _MM_PERM_BBAD = 0x53, 100309124Sdim _MM_PERM_BBBA = 0x54, _MM_PERM_BBBB = 0x55, _MM_PERM_BBBC = 0x56, 101309124Sdim _MM_PERM_BBBD = 0x57, _MM_PERM_BBCA = 0x58, _MM_PERM_BBCB = 0x59, 102309124Sdim _MM_PERM_BBCC = 0x5A, _MM_PERM_BBCD = 0x5B, _MM_PERM_BBDA = 0x5C, 103309124Sdim _MM_PERM_BBDB = 0x5D, _MM_PERM_BBDC = 0x5E, _MM_PERM_BBDD = 0x5F, 104309124Sdim _MM_PERM_BCAA = 0x60, _MM_PERM_BCAB = 0x61, _MM_PERM_BCAC = 0x62, 105309124Sdim _MM_PERM_BCAD = 0x63, _MM_PERM_BCBA = 0x64, _MM_PERM_BCBB = 0x65, 106309124Sdim _MM_PERM_BCBC = 0x66, _MM_PERM_BCBD = 0x67, _MM_PERM_BCCA = 0x68, 107309124Sdim _MM_PERM_BCCB = 0x69, _MM_PERM_BCCC = 0x6A, _MM_PERM_BCCD = 0x6B, 108309124Sdim _MM_PERM_BCDA = 0x6C, _MM_PERM_BCDB = 0x6D, _MM_PERM_BCDC = 0x6E, 109309124Sdim _MM_PERM_BCDD = 0x6F, _MM_PERM_BDAA = 0x70, _MM_PERM_BDAB = 0x71, 110309124Sdim _MM_PERM_BDAC = 0x72, _MM_PERM_BDAD = 0x73, _MM_PERM_BDBA = 0x74, 111309124Sdim _MM_PERM_BDBB = 0x75, _MM_PERM_BDBC = 0x76, _MM_PERM_BDBD = 0x77, 112309124Sdim _MM_PERM_BDCA = 0x78, _MM_PERM_BDCB = 0x79, _MM_PERM_BDCC = 0x7A, 113309124Sdim _MM_PERM_BDCD = 0x7B, _MM_PERM_BDDA = 0x7C, _MM_PERM_BDDB = 0x7D, 114309124Sdim _MM_PERM_BDDC = 0x7E, _MM_PERM_BDDD = 0x7F, _MM_PERM_CAAA = 0x80, 115309124Sdim _MM_PERM_CAAB = 0x81, _MM_PERM_CAAC = 0x82, _MM_PERM_CAAD = 0x83, 116309124Sdim _MM_PERM_CABA = 0x84, _MM_PERM_CABB = 0x85, _MM_PERM_CABC = 0x86, 117309124Sdim _MM_PERM_CABD = 0x87, _MM_PERM_CACA = 0x88, _MM_PERM_CACB = 0x89, 118309124Sdim _MM_PERM_CACC = 0x8A, _MM_PERM_CACD = 0x8B, _MM_PERM_CADA = 0x8C, 119309124Sdim _MM_PERM_CADB = 0x8D, _MM_PERM_CADC = 0x8E, _MM_PERM_CADD = 0x8F, 120309124Sdim _MM_PERM_CBAA = 0x90, _MM_PERM_CBAB = 0x91, _MM_PERM_CBAC = 0x92, 121309124Sdim _MM_PERM_CBAD = 0x93, _MM_PERM_CBBA = 0x94, _MM_PERM_CBBB = 0x95, 122309124Sdim _MM_PERM_CBBC = 0x96, _MM_PERM_CBBD = 0x97, _MM_PERM_CBCA = 0x98, 123309124Sdim _MM_PERM_CBCB = 0x99, _MM_PERM_CBCC = 0x9A, _MM_PERM_CBCD = 0x9B, 124309124Sdim _MM_PERM_CBDA = 0x9C, _MM_PERM_CBDB = 0x9D, _MM_PERM_CBDC = 0x9E, 125309124Sdim _MM_PERM_CBDD = 0x9F, _MM_PERM_CCAA = 0xA0, _MM_PERM_CCAB = 0xA1, 126309124Sdim _MM_PERM_CCAC = 0xA2, _MM_PERM_CCAD = 0xA3, _MM_PERM_CCBA = 0xA4, 127309124Sdim _MM_PERM_CCBB = 0xA5, _MM_PERM_CCBC = 0xA6, _MM_PERM_CCBD = 0xA7, 128309124Sdim _MM_PERM_CCCA = 0xA8, _MM_PERM_CCCB = 0xA9, _MM_PERM_CCCC = 0xAA, 129309124Sdim _MM_PERM_CCCD = 0xAB, _MM_PERM_CCDA = 0xAC, _MM_PERM_CCDB = 0xAD, 130309124Sdim _MM_PERM_CCDC = 0xAE, _MM_PERM_CCDD = 0xAF, _MM_PERM_CDAA = 0xB0, 131309124Sdim _MM_PERM_CDAB = 0xB1, _MM_PERM_CDAC = 0xB2, _MM_PERM_CDAD = 0xB3, 132309124Sdim _MM_PERM_CDBA = 0xB4, _MM_PERM_CDBB = 0xB5, _MM_PERM_CDBC = 0xB6, 133309124Sdim _MM_PERM_CDBD = 0xB7, _MM_PERM_CDCA = 0xB8, _MM_PERM_CDCB = 0xB9, 134309124Sdim _MM_PERM_CDCC = 0xBA, _MM_PERM_CDCD = 0xBB, _MM_PERM_CDDA = 0xBC, 135309124Sdim _MM_PERM_CDDB = 0xBD, _MM_PERM_CDDC = 0xBE, _MM_PERM_CDDD = 0xBF, 136309124Sdim _MM_PERM_DAAA = 0xC0, _MM_PERM_DAAB = 0xC1, _MM_PERM_DAAC = 0xC2, 137309124Sdim _MM_PERM_DAAD = 0xC3, _MM_PERM_DABA = 0xC4, _MM_PERM_DABB = 0xC5, 138309124Sdim _MM_PERM_DABC = 0xC6, _MM_PERM_DABD = 0xC7, _MM_PERM_DACA = 0xC8, 139309124Sdim _MM_PERM_DACB = 0xC9, _MM_PERM_DACC = 0xCA, _MM_PERM_DACD = 0xCB, 140309124Sdim _MM_PERM_DADA = 0xCC, _MM_PERM_DADB = 0xCD, _MM_PERM_DADC = 0xCE, 141309124Sdim _MM_PERM_DADD = 0xCF, _MM_PERM_DBAA = 0xD0, _MM_PERM_DBAB = 0xD1, 142309124Sdim _MM_PERM_DBAC = 0xD2, _MM_PERM_DBAD = 0xD3, _MM_PERM_DBBA = 0xD4, 143309124Sdim _MM_PERM_DBBB = 0xD5, _MM_PERM_DBBC = 0xD6, _MM_PERM_DBBD = 0xD7, 144309124Sdim _MM_PERM_DBCA = 0xD8, _MM_PERM_DBCB = 0xD9, _MM_PERM_DBCC = 0xDA, 145309124Sdim _MM_PERM_DBCD = 0xDB, _MM_PERM_DBDA = 0xDC, _MM_PERM_DBDB = 0xDD, 146309124Sdim _MM_PERM_DBDC = 0xDE, _MM_PERM_DBDD = 0xDF, _MM_PERM_DCAA = 0xE0, 147309124Sdim _MM_PERM_DCAB = 0xE1, _MM_PERM_DCAC = 0xE2, _MM_PERM_DCAD = 0xE3, 148309124Sdim _MM_PERM_DCBA = 0xE4, _MM_PERM_DCBB = 0xE5, _MM_PERM_DCBC = 0xE6, 149309124Sdim _MM_PERM_DCBD = 0xE7, _MM_PERM_DCCA = 0xE8, _MM_PERM_DCCB = 0xE9, 150309124Sdim _MM_PERM_DCCC = 0xEA, _MM_PERM_DCCD = 0xEB, _MM_PERM_DCDA = 0xEC, 151309124Sdim _MM_PERM_DCDB = 0xED, _MM_PERM_DCDC = 0xEE, _MM_PERM_DCDD = 0xEF, 152309124Sdim _MM_PERM_DDAA = 0xF0, _MM_PERM_DDAB = 0xF1, _MM_PERM_DDAC = 0xF2, 153309124Sdim _MM_PERM_DDAD = 0xF3, _MM_PERM_DDBA = 0xF4, _MM_PERM_DDBB = 0xF5, 154309124Sdim _MM_PERM_DDBC = 0xF6, _MM_PERM_DDBD = 0xF7, _MM_PERM_DDCA = 0xF8, 155309124Sdim _MM_PERM_DDCB = 0xF9, _MM_PERM_DDCC = 0xFA, _MM_PERM_DDCD = 0xFB, 156309124Sdim _MM_PERM_DDDA = 0xFC, _MM_PERM_DDDB = 0xFD, _MM_PERM_DDDC = 0xFE, 157309124Sdim _MM_PERM_DDDD = 0xFF 158309124Sdim} _MM_PERM_ENUM; 159309124Sdim 160309124Sdimtypedef enum 161309124Sdim{ 162309124Sdim _MM_MANT_NORM_1_2, /* interval [1, 2) */ 163309124Sdim _MM_MANT_NORM_p5_2, /* interval [0.5, 2) */ 164309124Sdim _MM_MANT_NORM_p5_1, /* interval [0.5, 1) */ 165309124Sdim _MM_MANT_NORM_p75_1p5 /* interval [0.75, 1.5) */ 166309124Sdim} _MM_MANTISSA_NORM_ENUM; 167309124Sdim 168309124Sdimtypedef enum 169309124Sdim{ 170309124Sdim _MM_MANT_SIGN_src, /* sign = sign(SRC) */ 171309124Sdim _MM_MANT_SIGN_zero, /* sign = 0 */ 172309124Sdim _MM_MANT_SIGN_nan /* DEST = NaN if sign(SRC) = 1 */ 173309124Sdim} _MM_MANTISSA_SIGN_ENUM; 174309124Sdim 175288943Sdim/* Define the default attributes for the functions in this file. */ 176296417Sdim#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512f"))) 177288943Sdim 178277325Sdim/* Create vectors with repeated elements */ 179277325Sdim 180288943Sdimstatic __inline __m512i __DEFAULT_FN_ATTRS 181277325Sdim_mm512_setzero_si512(void) 182277325Sdim{ 183277325Sdim return (__m512i)(__v8di){ 0, 0, 0, 0, 0, 0, 0, 0 }; 184277325Sdim} 185277325Sdim 186309124Sdim#define _mm512_setzero_epi32 _mm512_setzero_si512 187309124Sdim 188296417Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS 189309124Sdim_mm512_undefined_pd(void) 190296417Sdim{ 191296417Sdim return (__m512d)__builtin_ia32_undef512(); 192296417Sdim} 193296417Sdim 194296417Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS 195309124Sdim_mm512_undefined(void) 196296417Sdim{ 197296417Sdim return (__m512)__builtin_ia32_undef512(); 198296417Sdim} 199296417Sdim 200296417Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS 201309124Sdim_mm512_undefined_ps(void) 202296417Sdim{ 203296417Sdim return (__m512)__builtin_ia32_undef512(); 204296417Sdim} 205296417Sdim 206296417Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 207309124Sdim_mm512_undefined_epi32(void) 208296417Sdim{ 209296417Sdim return (__m512i)__builtin_ia32_undef512(); 210296417Sdim} 211296417Sdim 212309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 213309124Sdim_mm512_broadcastd_epi32 (__m128i __A) 214309124Sdim{ 215309124Sdim return (__m512i)__builtin_shufflevector((__v4si) __A, 216309124Sdim (__v4si)_mm_undefined_si128(), 217309124Sdim 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); 218309124Sdim} 219309124Sdim 220309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 221309124Sdim_mm512_mask_broadcastd_epi32 (__m512i __O, __mmask16 __M, __m128i __A) 222309124Sdim{ 223309124Sdim return (__m512i)__builtin_ia32_selectd_512(__M, 224309124Sdim (__v16si) _mm512_broadcastd_epi32(__A), 225309124Sdim (__v16si) __O); 226309124Sdim} 227309124Sdim 228309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 229309124Sdim_mm512_maskz_broadcastd_epi32 (__mmask16 __M, __m128i __A) 230309124Sdim{ 231309124Sdim return (__m512i)__builtin_ia32_selectd_512(__M, 232309124Sdim (__v16si) _mm512_broadcastd_epi32(__A), 233309124Sdim (__v16si) _mm512_setzero_si512()); 234309124Sdim} 235309124Sdim 236309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 237309124Sdim_mm512_broadcastq_epi64 (__m128i __A) 238309124Sdim{ 239309124Sdim return (__m512i)__builtin_shufflevector((__v2di) __A, 240309124Sdim (__v2di) _mm_undefined_si128(), 241309124Sdim 0, 0, 0, 0, 0, 0, 0, 0); 242309124Sdim} 243309124Sdim 244309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 245309124Sdim_mm512_mask_broadcastq_epi64 (__m512i __O, __mmask8 __M, __m128i __A) 246309124Sdim{ 247309124Sdim return (__m512i)__builtin_ia32_selectq_512(__M, 248309124Sdim (__v8di) _mm512_broadcastq_epi64(__A), 249309124Sdim (__v8di) __O); 250309124Sdim 251309124Sdim} 252309124Sdim 253309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 254309124Sdim_mm512_maskz_broadcastq_epi64 (__mmask8 __M, __m128i __A) 255309124Sdim{ 256309124Sdim return (__m512i)__builtin_ia32_selectq_512(__M, 257309124Sdim (__v8di) _mm512_broadcastq_epi64(__A), 258309124Sdim (__v8di) _mm512_setzero_si512()); 259309124Sdim} 260309124Sdim 261288943Sdimstatic __inline __m512i __DEFAULT_FN_ATTRS 262277325Sdim_mm512_maskz_set1_epi32(__mmask16 __M, int __A) 263277325Sdim{ 264277325Sdim return (__m512i) __builtin_ia32_pbroadcastd512_gpr_mask (__A, 265277325Sdim (__v16si) 266277325Sdim _mm512_setzero_si512 (), 267277325Sdim __M); 268277325Sdim} 269277325Sdim 270288943Sdimstatic __inline __m512i __DEFAULT_FN_ATTRS 271277325Sdim_mm512_maskz_set1_epi64(__mmask8 __M, long long __A) 272277325Sdim{ 273277325Sdim#ifdef __x86_64__ 274277325Sdim return (__m512i) __builtin_ia32_pbroadcastq512_gpr_mask (__A, 275277325Sdim (__v8di) 276277325Sdim _mm512_setzero_si512 (), 277277325Sdim __M); 278277325Sdim#else 279277325Sdim return (__m512i) __builtin_ia32_pbroadcastq512_mem_mask (__A, 280277325Sdim (__v8di) 281277325Sdim _mm512_setzero_si512 (), 282277325Sdim __M); 283277325Sdim#endif 284277325Sdim} 285277325Sdim 286288943Sdimstatic __inline __m512 __DEFAULT_FN_ATTRS 287277325Sdim_mm512_setzero_ps(void) 288277325Sdim{ 289277325Sdim return (__m512){ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 290277325Sdim 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 }; 291277325Sdim} 292309124Sdim 293309124Sdim#define _mm512_setzero _mm512_setzero_ps 294309124Sdim 295288943Sdimstatic __inline __m512d __DEFAULT_FN_ATTRS 296277325Sdim_mm512_setzero_pd(void) 297277325Sdim{ 298277325Sdim return (__m512d){ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 }; 299277325Sdim} 300277325Sdim 301288943Sdimstatic __inline __m512 __DEFAULT_FN_ATTRS 302277325Sdim_mm512_set1_ps(float __w) 303277325Sdim{ 304277325Sdim return (__m512){ __w, __w, __w, __w, __w, __w, __w, __w, 305277325Sdim __w, __w, __w, __w, __w, __w, __w, __w }; 306277325Sdim} 307277325Sdim 308288943Sdimstatic __inline __m512d __DEFAULT_FN_ATTRS 309277325Sdim_mm512_set1_pd(double __w) 310277325Sdim{ 311277325Sdim return (__m512d){ __w, __w, __w, __w, __w, __w, __w, __w }; 312277325Sdim} 313277325Sdim 314288943Sdimstatic __inline __m512i __DEFAULT_FN_ATTRS 315309124Sdim_mm512_set1_epi8(char __w) 316309124Sdim{ 317309124Sdim return (__m512i)(__v64qi){ __w, __w, __w, __w, __w, __w, __w, __w, 318309124Sdim __w, __w, __w, __w, __w, __w, __w, __w, 319309124Sdim __w, __w, __w, __w, __w, __w, __w, __w, 320309124Sdim __w, __w, __w, __w, __w, __w, __w, __w, 321309124Sdim __w, __w, __w, __w, __w, __w, __w, __w, 322309124Sdim __w, __w, __w, __w, __w, __w, __w, __w, 323309124Sdim __w, __w, __w, __w, __w, __w, __w, __w, 324309124Sdim __w, __w, __w, __w, __w, __w, __w, __w }; 325309124Sdim} 326309124Sdim 327309124Sdimstatic __inline __m512i __DEFAULT_FN_ATTRS 328309124Sdim_mm512_set1_epi16(short __w) 329309124Sdim{ 330309124Sdim return (__m512i)(__v32hi){ __w, __w, __w, __w, __w, __w, __w, __w, 331309124Sdim __w, __w, __w, __w, __w, __w, __w, __w, 332309124Sdim __w, __w, __w, __w, __w, __w, __w, __w, 333309124Sdim __w, __w, __w, __w, __w, __w, __w, __w }; 334309124Sdim} 335309124Sdim 336309124Sdimstatic __inline __m512i __DEFAULT_FN_ATTRS 337277325Sdim_mm512_set1_epi32(int __s) 338277325Sdim{ 339277325Sdim return (__m512i)(__v16si){ __s, __s, __s, __s, __s, __s, __s, __s, 340277325Sdim __s, __s, __s, __s, __s, __s, __s, __s }; 341277325Sdim} 342277325Sdim 343288943Sdimstatic __inline __m512i __DEFAULT_FN_ATTRS 344277325Sdim_mm512_set1_epi64(long long __d) 345277325Sdim{ 346277325Sdim return (__m512i)(__v8di){ __d, __d, __d, __d, __d, __d, __d, __d }; 347277325Sdim} 348277325Sdim 349288943Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS 350309124Sdim_mm512_broadcastss_ps(__m128 __A) 351277325Sdim{ 352309124Sdim return (__m512)__builtin_shufflevector((__v4sf) __A, 353309124Sdim (__v4sf)_mm_undefined_ps(), 354309124Sdim 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); 355277325Sdim} 356277325Sdim 357309124Sdimstatic __inline __m512i __DEFAULT_FN_ATTRS 358309124Sdim_mm512_set4_epi32 (int __A, int __B, int __C, int __D) 359309124Sdim{ 360309124Sdim return (__m512i)(__v16si) 361309124Sdim { __D, __C, __B, __A, __D, __C, __B, __A, 362309124Sdim __D, __C, __B, __A, __D, __C, __B, __A }; 363309124Sdim} 364309124Sdim 365309124Sdimstatic __inline __m512i __DEFAULT_FN_ATTRS 366309124Sdim_mm512_set4_epi64 (long long __A, long long __B, long long __C, 367309124Sdim long long __D) 368309124Sdim{ 369309124Sdim return (__m512i) (__v8di) 370309124Sdim { __D, __C, __B, __A, __D, __C, __B, __A }; 371309124Sdim} 372309124Sdim 373309124Sdimstatic __inline __m512d __DEFAULT_FN_ATTRS 374309124Sdim_mm512_set4_pd (double __A, double __B, double __C, double __D) 375309124Sdim{ 376309124Sdim return (__m512d) 377309124Sdim { __D, __C, __B, __A, __D, __C, __B, __A }; 378309124Sdim} 379309124Sdim 380309124Sdimstatic __inline __m512 __DEFAULT_FN_ATTRS 381309124Sdim_mm512_set4_ps (float __A, float __B, float __C, float __D) 382309124Sdim{ 383309124Sdim return (__m512) 384309124Sdim { __D, __C, __B, __A, __D, __C, __B, __A, 385309124Sdim __D, __C, __B, __A, __D, __C, __B, __A }; 386309124Sdim} 387309124Sdim 388309124Sdim#define _mm512_setr4_epi32(e0,e1,e2,e3) \ 389309124Sdim _mm512_set4_epi32((e3),(e2),(e1),(e0)) 390309124Sdim 391309124Sdim#define _mm512_setr4_epi64(e0,e1,e2,e3) \ 392309124Sdim _mm512_set4_epi64((e3),(e2),(e1),(e0)) 393309124Sdim 394309124Sdim#define _mm512_setr4_pd(e0,e1,e2,e3) \ 395309124Sdim _mm512_set4_pd((e3),(e2),(e1),(e0)) 396309124Sdim 397309124Sdim#define _mm512_setr4_ps(e0,e1,e2,e3) \ 398309124Sdim _mm512_set4_ps((e3),(e2),(e1),(e0)) 399309124Sdim 400288943Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS 401309124Sdim_mm512_broadcastsd_pd(__m128d __A) 402277325Sdim{ 403309124Sdim return (__m512d)__builtin_shufflevector((__v2df) __A, 404309124Sdim (__v2df) _mm_undefined_pd(), 405309124Sdim 0, 0, 0, 0, 0, 0, 0, 0); 406277325Sdim} 407277325Sdim 408277325Sdim/* Cast between vector types */ 409277325Sdim 410288943Sdimstatic __inline __m512d __DEFAULT_FN_ATTRS 411277325Sdim_mm512_castpd256_pd512(__m256d __a) 412277325Sdim{ 413277325Sdim return __builtin_shufflevector(__a, __a, 0, 1, 2, 3, -1, -1, -1, -1); 414277325Sdim} 415277325Sdim 416288943Sdimstatic __inline __m512 __DEFAULT_FN_ATTRS 417277325Sdim_mm512_castps256_ps512(__m256 __a) 418277325Sdim{ 419277325Sdim return __builtin_shufflevector(__a, __a, 0, 1, 2, 3, 4, 5, 6, 7, 420277325Sdim -1, -1, -1, -1, -1, -1, -1, -1); 421277325Sdim} 422277325Sdim 423288943Sdimstatic __inline __m128d __DEFAULT_FN_ATTRS 424277325Sdim_mm512_castpd512_pd128(__m512d __a) 425277325Sdim{ 426277325Sdim return __builtin_shufflevector(__a, __a, 0, 1); 427277325Sdim} 428277325Sdim 429309124Sdimstatic __inline __m256d __DEFAULT_FN_ATTRS 430309124Sdim_mm512_castpd512_pd256 (__m512d __A) 431309124Sdim{ 432309124Sdim return __builtin_shufflevector(__A, __A, 0, 1, 2, 3); 433309124Sdim} 434309124Sdim 435288943Sdimstatic __inline __m128 __DEFAULT_FN_ATTRS 436277325Sdim_mm512_castps512_ps128(__m512 __a) 437277325Sdim{ 438277325Sdim return __builtin_shufflevector(__a, __a, 0, 1, 2, 3); 439277325Sdim} 440277325Sdim 441309124Sdimstatic __inline __m256 __DEFAULT_FN_ATTRS 442309124Sdim_mm512_castps512_ps256 (__m512 __A) 443309124Sdim{ 444309124Sdim return __builtin_shufflevector(__A, __A, 0, 1, 2, 3, 4, 5, 6, 7); 445309124Sdim} 446309124Sdim 447309124Sdimstatic __inline __m512 __DEFAULT_FN_ATTRS 448309124Sdim_mm512_castpd_ps (__m512d __A) 449309124Sdim{ 450309124Sdim return (__m512) (__A); 451309124Sdim} 452309124Sdim 453309124Sdimstatic __inline __m512i __DEFAULT_FN_ATTRS 454309124Sdim_mm512_castpd_si512 (__m512d __A) 455309124Sdim{ 456309124Sdim return (__m512i) (__A); 457309124Sdim} 458309124Sdim 459309124Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS 460309124Sdim_mm512_castpd128_pd512 (__m128d __A) 461309124Sdim{ 462309124Sdim return __builtin_shufflevector( __A, __A, 0, 1, -1, -1, -1, -1, -1, -1); 463309124Sdim} 464309124Sdim 465309124Sdimstatic __inline __m512d __DEFAULT_FN_ATTRS 466309124Sdim_mm512_castps_pd (__m512 __A) 467309124Sdim{ 468309124Sdim return (__m512d) (__A); 469309124Sdim} 470309124Sdim 471309124Sdimstatic __inline __m512i __DEFAULT_FN_ATTRS 472309124Sdim_mm512_castps_si512 (__m512 __A) 473309124Sdim{ 474309124Sdim return (__m512i) (__A); 475309124Sdim} 476309124Sdim 477309124Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS 478309124Sdim_mm512_castps128_ps512 (__m128 __A) 479309124Sdim{ 480309124Sdim return __builtin_shufflevector( __A, __A, 0, 1, 2, 3, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1); 481309124Sdim} 482309124Sdim 483309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 484309124Sdim_mm512_castsi128_si512 (__m128i __A) 485309124Sdim{ 486309124Sdim return __builtin_shufflevector( __A, __A, 0, 1, -1, -1, -1, -1, -1, -1); 487309124Sdim} 488309124Sdim 489309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 490309124Sdim_mm512_castsi256_si512 (__m256i __A) 491309124Sdim{ 492309124Sdim return __builtin_shufflevector( __A, __A, 0, 1, 2, 3, -1, -1, -1, -1); 493309124Sdim} 494309124Sdim 495309124Sdimstatic __inline __m512 __DEFAULT_FN_ATTRS 496309124Sdim_mm512_castsi512_ps (__m512i __A) 497309124Sdim{ 498309124Sdim return (__m512) (__A); 499309124Sdim} 500309124Sdim 501309124Sdimstatic __inline __m512d __DEFAULT_FN_ATTRS 502309124Sdim_mm512_castsi512_pd (__m512i __A) 503309124Sdim{ 504309124Sdim return (__m512d) (__A); 505309124Sdim} 506309124Sdim 507309124Sdimstatic __inline __m128i __DEFAULT_FN_ATTRS 508309124Sdim_mm512_castsi512_si128 (__m512i __A) 509309124Sdim{ 510309124Sdim return (__m128i)__builtin_shufflevector(__A, __A , 0, 1); 511309124Sdim} 512309124Sdim 513309124Sdimstatic __inline __m256i __DEFAULT_FN_ATTRS 514309124Sdim_mm512_castsi512_si256 (__m512i __A) 515309124Sdim{ 516309124Sdim return (__m256i)__builtin_shufflevector(__A, __A , 0, 1, 2, 3); 517309124Sdim} 518309124Sdim 519314564Sdimstatic __inline__ __mmask16 __DEFAULT_FN_ATTRS 520314564Sdim_mm512_int2mask(int __a) 521314564Sdim{ 522314564Sdim return (__mmask16)__a; 523314564Sdim} 524314564Sdim 525314564Sdimstatic __inline__ int __DEFAULT_FN_ATTRS 526314564Sdim_mm512_mask2int(__mmask16 __a) 527314564Sdim{ 528314564Sdim return (int)__a; 529314564Sdim} 530314564Sdim 531321369Sdim/// \brief Constructs a 512-bit floating-point vector of [8 x double] from a 532321369Sdim/// 128-bit floating-point vector of [2 x double]. The lower 128 bits 533321369Sdim/// contain the value of the source vector. The upper 384 bits are set 534321369Sdim/// to zero. 535321369Sdim/// 536321369Sdim/// \headerfile <x86intrin.h> 537321369Sdim/// 538321369Sdim/// This intrinsic has no corresponding instruction. 539321369Sdim/// 540321369Sdim/// \param __a 541321369Sdim/// A 128-bit vector of [2 x double]. 542321369Sdim/// \returns A 512-bit floating-point vector of [8 x double]. The lower 128 bits 543321369Sdim/// contain the value of the parameter. The upper 384 bits are set to zero. 544321369Sdimstatic __inline __m512d __DEFAULT_FN_ATTRS 545321369Sdim_mm512_zextpd128_pd512(__m128d __a) 546321369Sdim{ 547321369Sdim return __builtin_shufflevector((__v2df)__a, (__v2df)_mm_setzero_pd(), 0, 1, 2, 3, 2, 3, 2, 3); 548321369Sdim} 549321369Sdim 550321369Sdim/// \brief Constructs a 512-bit floating-point vector of [8 x double] from a 551321369Sdim/// 256-bit floating-point vector of [4 x double]. The lower 256 bits 552321369Sdim/// contain the value of the source vector. The upper 256 bits are set 553321369Sdim/// to zero. 554321369Sdim/// 555321369Sdim/// \headerfile <x86intrin.h> 556321369Sdim/// 557321369Sdim/// This intrinsic has no corresponding instruction. 558321369Sdim/// 559321369Sdim/// \param __a 560321369Sdim/// A 256-bit vector of [4 x double]. 561321369Sdim/// \returns A 512-bit floating-point vector of [8 x double]. The lower 256 bits 562321369Sdim/// contain the value of the parameter. The upper 256 bits are set to zero. 563321369Sdimstatic __inline __m512d __DEFAULT_FN_ATTRS 564321369Sdim_mm512_zextpd256_pd512(__m256d __a) 565321369Sdim{ 566321369Sdim return __builtin_shufflevector((__v4df)__a, (__v4df)_mm256_setzero_pd(), 0, 1, 2, 3, 4, 5, 6, 7); 567321369Sdim} 568321369Sdim 569321369Sdim/// \brief Constructs a 512-bit floating-point vector of [16 x float] from a 570321369Sdim/// 128-bit floating-point vector of [4 x float]. The lower 128 bits contain 571321369Sdim/// the value of the source vector. The upper 384 bits are set to zero. 572321369Sdim/// 573321369Sdim/// \headerfile <x86intrin.h> 574321369Sdim/// 575321369Sdim/// This intrinsic has no corresponding instruction. 576321369Sdim/// 577321369Sdim/// \param __a 578321369Sdim/// A 128-bit vector of [4 x float]. 579321369Sdim/// \returns A 512-bit floating-point vector of [16 x float]. The lower 128 bits 580321369Sdim/// contain the value of the parameter. The upper 384 bits are set to zero. 581321369Sdimstatic __inline __m512 __DEFAULT_FN_ATTRS 582321369Sdim_mm512_zextps128_ps512(__m128 __a) 583321369Sdim{ 584321369Sdim return __builtin_shufflevector((__v4sf)__a, (__v4sf)_mm_setzero_ps(), 0, 1, 2, 3, 4, 5, 6, 7, 4, 5, 6, 7, 4, 5, 6, 7); 585321369Sdim} 586321369Sdim 587321369Sdim/// \brief Constructs a 512-bit floating-point vector of [16 x float] from a 588321369Sdim/// 256-bit floating-point vector of [8 x float]. The lower 256 bits contain 589321369Sdim/// the value of the source vector. The upper 256 bits are set to zero. 590321369Sdim/// 591321369Sdim/// \headerfile <x86intrin.h> 592321369Sdim/// 593321369Sdim/// This intrinsic has no corresponding instruction. 594321369Sdim/// 595321369Sdim/// \param __a 596321369Sdim/// A 256-bit vector of [8 x float]. 597321369Sdim/// \returns A 512-bit floating-point vector of [16 x float]. The lower 256 bits 598321369Sdim/// contain the value of the parameter. The upper 256 bits are set to zero. 599321369Sdimstatic __inline __m512 __DEFAULT_FN_ATTRS 600321369Sdim_mm512_zextps256_ps512(__m256 __a) 601321369Sdim{ 602321369Sdim return __builtin_shufflevector((__v8sf)__a, (__v8sf)_mm256_setzero_ps(), 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); 603321369Sdim} 604321369Sdim 605321369Sdim/// \brief Constructs a 512-bit integer vector from a 128-bit integer vector. 606321369Sdim/// The lower 128 bits contain the value of the source vector. The upper 607321369Sdim/// 384 bits are set to zero. 608321369Sdim/// 609321369Sdim/// \headerfile <x86intrin.h> 610321369Sdim/// 611321369Sdim/// This intrinsic has no corresponding instruction. 612321369Sdim/// 613321369Sdim/// \param __a 614321369Sdim/// A 128-bit integer vector. 615321369Sdim/// \returns A 512-bit integer vector. The lower 128 bits contain the value of 616321369Sdim/// the parameter. The upper 384 bits are set to zero. 617321369Sdimstatic __inline __m512i __DEFAULT_FN_ATTRS 618321369Sdim_mm512_zextsi128_si512(__m128i __a) 619321369Sdim{ 620321369Sdim return __builtin_shufflevector((__v2di)__a, (__v2di)_mm_setzero_si128(), 0, 1, 2, 3, 2, 3, 2, 3); 621321369Sdim} 622321369Sdim 623321369Sdim/// \brief Constructs a 512-bit integer vector from a 256-bit integer vector. 624321369Sdim/// The lower 256 bits contain the value of the source vector. The upper 625321369Sdim/// 256 bits are set to zero. 626321369Sdim/// 627321369Sdim/// \headerfile <x86intrin.h> 628321369Sdim/// 629321369Sdim/// This intrinsic has no corresponding instruction. 630321369Sdim/// 631321369Sdim/// \param __a 632321369Sdim/// A 256-bit integer vector. 633321369Sdim/// \returns A 512-bit integer vector. The lower 256 bits contain the value of 634321369Sdim/// the parameter. The upper 256 bits are set to zero. 635321369Sdimstatic __inline __m512i __DEFAULT_FN_ATTRS 636321369Sdim_mm512_zextsi256_si512(__m256i __a) 637321369Sdim{ 638321369Sdim return __builtin_shufflevector((__v4di)__a, (__v4di)_mm256_setzero_si256(), 0, 1, 2, 3, 4, 5, 6, 7); 639321369Sdim} 640321369Sdim 641288943Sdim/* Bitwise operators */ 642288943Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 643288943Sdim_mm512_and_epi32(__m512i __a, __m512i __b) 644288943Sdim{ 645309124Sdim return (__m512i)((__v16su)__a & (__v16su)__b); 646288943Sdim} 647288943Sdim 648288943Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 649288943Sdim_mm512_mask_and_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b) 650288943Sdim{ 651309124Sdim return (__m512i)__builtin_ia32_selectd_512((__mmask16)__k, 652309124Sdim (__v16si) _mm512_and_epi32(__a, __b), 653309124Sdim (__v16si) __src); 654288943Sdim} 655309124Sdim 656288943Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 657288943Sdim_mm512_maskz_and_epi32(__mmask16 __k, __m512i __a, __m512i __b) 658288943Sdim{ 659309124Sdim return (__m512i) _mm512_mask_and_epi32(_mm512_setzero_si512 (), 660309124Sdim __k, __a, __b); 661288943Sdim} 662288943Sdim 663288943Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 664288943Sdim_mm512_and_epi64(__m512i __a, __m512i __b) 665288943Sdim{ 666309124Sdim return (__m512i)((__v8du)__a & (__v8du)__b); 667288943Sdim} 668288943Sdim 669288943Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 670288943Sdim_mm512_mask_and_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b) 671288943Sdim{ 672309124Sdim return (__m512i) __builtin_ia32_selectq_512 ((__mmask8) __k, 673309124Sdim (__v8di) _mm512_and_epi64(__a, __b), 674309124Sdim (__v8di) __src); 675288943Sdim} 676309124Sdim 677288943Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 678288943Sdim_mm512_maskz_and_epi64(__mmask8 __k, __m512i __a, __m512i __b) 679288943Sdim{ 680309124Sdim return (__m512i) _mm512_mask_and_epi64(_mm512_setzero_si512 (), 681309124Sdim __k, __a, __b); 682288943Sdim} 683288943Sdim 684288943Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 685309124Sdim_mm512_andnot_si512 (__m512i __A, __m512i __B) 686309124Sdim{ 687309124Sdim return (__m512i)(~(__v8du)(__A) & (__v8du)__B); 688309124Sdim} 689309124Sdim 690309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 691288943Sdim_mm512_andnot_epi32 (__m512i __A, __m512i __B) 692288943Sdim{ 693309124Sdim return (__m512i)(~(__v16su)(__A) & (__v16su)__B); 694288943Sdim} 695288943Sdim 696288943Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 697309124Sdim_mm512_mask_andnot_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B) 698288943Sdim{ 699309124Sdim return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, 700309124Sdim (__v16si)_mm512_andnot_epi32(__A, __B), 701309124Sdim (__v16si)__W); 702288943Sdim} 703288943Sdim 704288943Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 705309124Sdim_mm512_maskz_andnot_epi32(__mmask16 __U, __m512i __A, __m512i __B) 706288943Sdim{ 707309124Sdim return (__m512i)_mm512_mask_andnot_epi32(_mm512_setzero_si512(), 708309124Sdim __U, __A, __B); 709288943Sdim} 710288943Sdim 711288943Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 712309124Sdim_mm512_andnot_epi64(__m512i __A, __m512i __B) 713288943Sdim{ 714309124Sdim return (__m512i)(~(__v8du)(__A) & (__v8du)__B); 715288943Sdim} 716288943Sdim 717288943Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 718309124Sdim_mm512_mask_andnot_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) 719288943Sdim{ 720309124Sdim return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, 721309124Sdim (__v8di)_mm512_andnot_epi64(__A, __B), 722309124Sdim (__v8di)__W); 723288943Sdim} 724288943Sdim 725288943Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 726309124Sdim_mm512_maskz_andnot_epi64(__mmask8 __U, __m512i __A, __m512i __B) 727288943Sdim{ 728309124Sdim return (__m512i)_mm512_mask_andnot_epi64(_mm512_setzero_si512(), 729309124Sdim __U, __A, __B); 730288943Sdim} 731309124Sdim 732288943Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 733288943Sdim_mm512_or_epi32(__m512i __a, __m512i __b) 734288943Sdim{ 735309124Sdim return (__m512i)((__v16su)__a | (__v16su)__b); 736288943Sdim} 737288943Sdim 738288943Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 739288943Sdim_mm512_mask_or_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b) 740288943Sdim{ 741309124Sdim return (__m512i)__builtin_ia32_selectd_512((__mmask16)__k, 742309124Sdim (__v16si)_mm512_or_epi32(__a, __b), 743309124Sdim (__v16si)__src); 744288943Sdim} 745309124Sdim 746288943Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 747288943Sdim_mm512_maskz_or_epi32(__mmask16 __k, __m512i __a, __m512i __b) 748288943Sdim{ 749309124Sdim return (__m512i)_mm512_mask_or_epi32(_mm512_setzero_si512(), __k, __a, __b); 750288943Sdim} 751288943Sdim 752288943Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 753288943Sdim_mm512_or_epi64(__m512i __a, __m512i __b) 754288943Sdim{ 755309124Sdim return (__m512i)((__v8du)__a | (__v8du)__b); 756288943Sdim} 757288943Sdim 758288943Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 759288943Sdim_mm512_mask_or_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b) 760288943Sdim{ 761309124Sdim return (__m512i)__builtin_ia32_selectq_512((__mmask8)__k, 762309124Sdim (__v8di)_mm512_or_epi64(__a, __b), 763309124Sdim (__v8di)__src); 764288943Sdim} 765309124Sdim 766288943Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 767288943Sdim_mm512_maskz_or_epi64(__mmask8 __k, __m512i __a, __m512i __b) 768288943Sdim{ 769309124Sdim return (__m512i)_mm512_mask_or_epi64(_mm512_setzero_si512(), __k, __a, __b); 770288943Sdim} 771288943Sdim 772288943Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 773288943Sdim_mm512_xor_epi32(__m512i __a, __m512i __b) 774288943Sdim{ 775309124Sdim return (__m512i)((__v16su)__a ^ (__v16su)__b); 776288943Sdim} 777288943Sdim 778288943Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 779288943Sdim_mm512_mask_xor_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b) 780288943Sdim{ 781309124Sdim return (__m512i)__builtin_ia32_selectd_512((__mmask16)__k, 782309124Sdim (__v16si)_mm512_xor_epi32(__a, __b), 783309124Sdim (__v16si)__src); 784288943Sdim} 785309124Sdim 786288943Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 787288943Sdim_mm512_maskz_xor_epi32(__mmask16 __k, __m512i __a, __m512i __b) 788288943Sdim{ 789309124Sdim return (__m512i)_mm512_mask_xor_epi32(_mm512_setzero_si512(), __k, __a, __b); 790288943Sdim} 791288943Sdim 792288943Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 793288943Sdim_mm512_xor_epi64(__m512i __a, __m512i __b) 794288943Sdim{ 795309124Sdim return (__m512i)((__v8du)__a ^ (__v8du)__b); 796288943Sdim} 797288943Sdim 798288943Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 799288943Sdim_mm512_mask_xor_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b) 800288943Sdim{ 801309124Sdim return (__m512i)__builtin_ia32_selectq_512((__mmask8)__k, 802309124Sdim (__v8di)_mm512_xor_epi64(__a, __b), 803309124Sdim (__v8di)__src); 804288943Sdim} 805309124Sdim 806288943Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 807288943Sdim_mm512_maskz_xor_epi64(__mmask8 __k, __m512i __a, __m512i __b) 808288943Sdim{ 809309124Sdim return (__m512i)_mm512_mask_xor_epi64(_mm512_setzero_si512(), __k, __a, __b); 810288943Sdim} 811288943Sdim 812288943Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 813288943Sdim_mm512_and_si512(__m512i __a, __m512i __b) 814288943Sdim{ 815309124Sdim return (__m512i)((__v8du)__a & (__v8du)__b); 816288943Sdim} 817288943Sdim 818288943Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 819288943Sdim_mm512_or_si512(__m512i __a, __m512i __b) 820288943Sdim{ 821309124Sdim return (__m512i)((__v8du)__a | (__v8du)__b); 822288943Sdim} 823288943Sdim 824288943Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 825288943Sdim_mm512_xor_si512(__m512i __a, __m512i __b) 826288943Sdim{ 827309124Sdim return (__m512i)((__v8du)__a ^ (__v8du)__b); 828288943Sdim} 829309124Sdim 830277325Sdim/* Arithmetic */ 831277325Sdim 832288943Sdimstatic __inline __m512d __DEFAULT_FN_ATTRS 833277325Sdim_mm512_add_pd(__m512d __a, __m512d __b) 834277325Sdim{ 835309124Sdim return (__m512d)((__v8df)__a + (__v8df)__b); 836277325Sdim} 837277325Sdim 838288943Sdimstatic __inline __m512 __DEFAULT_FN_ATTRS 839277325Sdim_mm512_add_ps(__m512 __a, __m512 __b) 840277325Sdim{ 841309124Sdim return (__m512)((__v16sf)__a + (__v16sf)__b); 842277325Sdim} 843277325Sdim 844288943Sdimstatic __inline __m512d __DEFAULT_FN_ATTRS 845277325Sdim_mm512_mul_pd(__m512d __a, __m512d __b) 846277325Sdim{ 847309124Sdim return (__m512d)((__v8df)__a * (__v8df)__b); 848277325Sdim} 849277325Sdim 850288943Sdimstatic __inline __m512 __DEFAULT_FN_ATTRS 851277325Sdim_mm512_mul_ps(__m512 __a, __m512 __b) 852277325Sdim{ 853309124Sdim return (__m512)((__v16sf)__a * (__v16sf)__b); 854277325Sdim} 855277325Sdim 856288943Sdimstatic __inline __m512d __DEFAULT_FN_ATTRS 857277325Sdim_mm512_sub_pd(__m512d __a, __m512d __b) 858277325Sdim{ 859309124Sdim return (__m512d)((__v8df)__a - (__v8df)__b); 860277325Sdim} 861277325Sdim 862288943Sdimstatic __inline __m512 __DEFAULT_FN_ATTRS 863277325Sdim_mm512_sub_ps(__m512 __a, __m512 __b) 864277325Sdim{ 865309124Sdim return (__m512)((__v16sf)__a - (__v16sf)__b); 866277325Sdim} 867277325Sdim 868288943Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 869288943Sdim_mm512_add_epi64 (__m512i __A, __m512i __B) 870288943Sdim{ 871309124Sdim return (__m512i) ((__v8du) __A + (__v8du) __B); 872288943Sdim} 873288943Sdim 874288943Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 875314564Sdim_mm512_mask_add_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) 876288943Sdim{ 877314564Sdim return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, 878314564Sdim (__v8di)_mm512_add_epi64(__A, __B), 879314564Sdim (__v8di)__W); 880288943Sdim} 881288943Sdim 882288943Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 883314564Sdim_mm512_maskz_add_epi64(__mmask8 __U, __m512i __A, __m512i __B) 884288943Sdim{ 885314564Sdim return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, 886314564Sdim (__v8di)_mm512_add_epi64(__A, __B), 887314564Sdim (__v8di)_mm512_setzero_si512()); 888288943Sdim} 889288943Sdim 890288943Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 891288943Sdim_mm512_sub_epi64 (__m512i __A, __m512i __B) 892288943Sdim{ 893309124Sdim return (__m512i) ((__v8du) __A - (__v8du) __B); 894288943Sdim} 895288943Sdim 896288943Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 897314564Sdim_mm512_mask_sub_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) 898288943Sdim{ 899314564Sdim return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, 900314564Sdim (__v8di)_mm512_sub_epi64(__A, __B), 901314564Sdim (__v8di)__W); 902288943Sdim} 903288943Sdim 904288943Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 905314564Sdim_mm512_maskz_sub_epi64(__mmask8 __U, __m512i __A, __m512i __B) 906288943Sdim{ 907314564Sdim return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, 908314564Sdim (__v8di)_mm512_sub_epi64(__A, __B), 909314564Sdim (__v8di)_mm512_setzero_si512()); 910288943Sdim} 911288943Sdim 912288943Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 913288943Sdim_mm512_add_epi32 (__m512i __A, __m512i __B) 914288943Sdim{ 915309124Sdim return (__m512i) ((__v16su) __A + (__v16su) __B); 916288943Sdim} 917288943Sdim 918288943Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 919314564Sdim_mm512_mask_add_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B) 920288943Sdim{ 921314564Sdim return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, 922314564Sdim (__v16si)_mm512_add_epi32(__A, __B), 923314564Sdim (__v16si)__W); 924288943Sdim} 925288943Sdim 926288943Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 927288943Sdim_mm512_maskz_add_epi32 (__mmask16 __U, __m512i __A, __m512i __B) 928288943Sdim{ 929314564Sdim return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, 930314564Sdim (__v16si)_mm512_add_epi32(__A, __B), 931314564Sdim (__v16si)_mm512_setzero_si512()); 932288943Sdim} 933288943Sdim 934288943Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 935288943Sdim_mm512_sub_epi32 (__m512i __A, __m512i __B) 936288943Sdim{ 937309124Sdim return (__m512i) ((__v16su) __A - (__v16su) __B); 938288943Sdim} 939288943Sdim 940288943Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 941314564Sdim_mm512_mask_sub_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B) 942288943Sdim{ 943314564Sdim return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, 944314564Sdim (__v16si)_mm512_sub_epi32(__A, __B), 945314564Sdim (__v16si)__W); 946288943Sdim} 947288943Sdim 948288943Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 949314564Sdim_mm512_maskz_sub_epi32(__mmask16 __U, __m512i __A, __m512i __B) 950288943Sdim{ 951314564Sdim return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, 952314564Sdim (__v16si)_mm512_sub_epi32(__A, __B), 953314564Sdim (__v16si)_mm512_setzero_si512()); 954288943Sdim} 955288943Sdim 956309124Sdim#define _mm512_mask_max_round_pd(W, U, A, B, R) __extension__ ({ \ 957309124Sdim (__m512d)__builtin_ia32_maxpd512_mask((__v8df)(__m512d)(A), \ 958309124Sdim (__v8df)(__m512d)(B), \ 959309124Sdim (__v8df)(__m512d)(W), (__mmask8)(U), \ 960309124Sdim (int)(R)); }) 961309124Sdim 962309124Sdim#define _mm512_maskz_max_round_pd(U, A, B, R) __extension__ ({ \ 963309124Sdim (__m512d)__builtin_ia32_maxpd512_mask((__v8df)(__m512d)(A), \ 964309124Sdim (__v8df)(__m512d)(B), \ 965309124Sdim (__v8df)_mm512_setzero_pd(), \ 966309124Sdim (__mmask8)(U), (int)(R)); }) 967309124Sdim 968309124Sdim#define _mm512_max_round_pd(A, B, R) __extension__ ({ \ 969309124Sdim (__m512d)__builtin_ia32_maxpd512_mask((__v8df)(__m512d)(A), \ 970309124Sdim (__v8df)(__m512d)(B), \ 971309124Sdim (__v8df)_mm512_undefined_pd(), \ 972309124Sdim (__mmask8)-1, (int)(R)); }) 973309124Sdim 974288943Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS 975277325Sdim_mm512_max_pd(__m512d __A, __m512d __B) 976277325Sdim{ 977277325Sdim return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A, 978277325Sdim (__v8df) __B, 979277325Sdim (__v8df) 980277325Sdim _mm512_setzero_pd (), 981277325Sdim (__mmask8) -1, 982277325Sdim _MM_FROUND_CUR_DIRECTION); 983277325Sdim} 984277325Sdim 985309124Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS 986309124Sdim_mm512_mask_max_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) 987309124Sdim{ 988309124Sdim return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A, 989309124Sdim (__v8df) __B, 990309124Sdim (__v8df) __W, 991309124Sdim (__mmask8) __U, 992309124Sdim _MM_FROUND_CUR_DIRECTION); 993309124Sdim} 994309124Sdim 995309124Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS 996309124Sdim_mm512_maskz_max_pd (__mmask8 __U, __m512d __A, __m512d __B) 997309124Sdim{ 998309124Sdim return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A, 999309124Sdim (__v8df) __B, 1000309124Sdim (__v8df) 1001309124Sdim _mm512_setzero_pd (), 1002309124Sdim (__mmask8) __U, 1003309124Sdim _MM_FROUND_CUR_DIRECTION); 1004309124Sdim} 1005309124Sdim 1006309124Sdim#define _mm512_mask_max_round_ps(W, U, A, B, R) __extension__ ({ \ 1007309124Sdim (__m512)__builtin_ia32_maxps512_mask((__v16sf)(__m512)(A), \ 1008309124Sdim (__v16sf)(__m512)(B), \ 1009309124Sdim (__v16sf)(__m512)(W), (__mmask16)(U), \ 1010309124Sdim (int)(R)); }) 1011309124Sdim 1012309124Sdim#define _mm512_maskz_max_round_ps(U, A, B, R) __extension__ ({ \ 1013309124Sdim (__m512)__builtin_ia32_maxps512_mask((__v16sf)(__m512)(A), \ 1014309124Sdim (__v16sf)(__m512)(B), \ 1015309124Sdim (__v16sf)_mm512_setzero_ps(), \ 1016309124Sdim (__mmask16)(U), (int)(R)); }) 1017309124Sdim 1018309124Sdim#define _mm512_max_round_ps(A, B, R) __extension__ ({ \ 1019309124Sdim (__m512)__builtin_ia32_maxps512_mask((__v16sf)(__m512)(A), \ 1020309124Sdim (__v16sf)(__m512)(B), \ 1021309124Sdim (__v16sf)_mm512_undefined_ps(), \ 1022309124Sdim (__mmask16)-1, (int)(R)); }) 1023309124Sdim 1024288943Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS 1025277325Sdim_mm512_max_ps(__m512 __A, __m512 __B) 1026277325Sdim{ 1027277325Sdim return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A, 1028277325Sdim (__v16sf) __B, 1029277325Sdim (__v16sf) 1030277325Sdim _mm512_setzero_ps (), 1031277325Sdim (__mmask16) -1, 1032277325Sdim _MM_FROUND_CUR_DIRECTION); 1033277325Sdim} 1034277325Sdim 1035309124Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS 1036309124Sdim_mm512_mask_max_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) 1037309124Sdim{ 1038309124Sdim return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A, 1039309124Sdim (__v16sf) __B, 1040309124Sdim (__v16sf) __W, 1041309124Sdim (__mmask16) __U, 1042309124Sdim _MM_FROUND_CUR_DIRECTION); 1043309124Sdim} 1044309124Sdim 1045309124Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS 1046309124Sdim_mm512_maskz_max_ps (__mmask16 __U, __m512 __A, __m512 __B) 1047309124Sdim{ 1048309124Sdim return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A, 1049309124Sdim (__v16sf) __B, 1050309124Sdim (__v16sf) 1051309124Sdim _mm512_setzero_ps (), 1052309124Sdim (__mmask16) __U, 1053309124Sdim _MM_FROUND_CUR_DIRECTION); 1054309124Sdim} 1055309124Sdim 1056296417Sdimstatic __inline__ __m128 __DEFAULT_FN_ATTRS 1057296417Sdim_mm_mask_max_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) { 1058309124Sdim return (__m128) __builtin_ia32_maxss_round_mask ((__v4sf) __A, 1059296417Sdim (__v4sf) __B, 1060296417Sdim (__v4sf) __W, 1061296417Sdim (__mmask8) __U, 1062296417Sdim _MM_FROUND_CUR_DIRECTION); 1063296417Sdim} 1064296417Sdim 1065296417Sdimstatic __inline__ __m128 __DEFAULT_FN_ATTRS 1066296417Sdim_mm_maskz_max_ss(__mmask8 __U,__m128 __A, __m128 __B) { 1067309124Sdim return (__m128) __builtin_ia32_maxss_round_mask ((__v4sf) __A, 1068296417Sdim (__v4sf) __B, 1069296417Sdim (__v4sf) _mm_setzero_ps (), 1070296417Sdim (__mmask8) __U, 1071296417Sdim _MM_FROUND_CUR_DIRECTION); 1072296417Sdim} 1073296417Sdim 1074309124Sdim#define _mm_max_round_ss(A, B, R) __extension__ ({ \ 1075309124Sdim (__m128)__builtin_ia32_maxss_round_mask((__v4sf)(__m128)(A), \ 1076309124Sdim (__v4sf)(__m128)(B), \ 1077309124Sdim (__v4sf)_mm_setzero_ps(), \ 1078309124Sdim (__mmask8)-1, (int)(R)); }) 1079296417Sdim 1080309124Sdim#define _mm_mask_max_round_ss(W, U, A, B, R) __extension__ ({ \ 1081309124Sdim (__m128)__builtin_ia32_maxss_round_mask((__v4sf)(__m128)(A), \ 1082309124Sdim (__v4sf)(__m128)(B), \ 1083309124Sdim (__v4sf)(__m128)(W), (__mmask8)(U), \ 1084309124Sdim (int)(R)); }) 1085296417Sdim 1086309124Sdim#define _mm_maskz_max_round_ss(U, A, B, R) __extension__ ({ \ 1087309124Sdim (__m128)__builtin_ia32_maxss_round_mask((__v4sf)(__m128)(A), \ 1088309124Sdim (__v4sf)(__m128)(B), \ 1089309124Sdim (__v4sf)_mm_setzero_ps(), \ 1090309124Sdim (__mmask8)(U), (int)(R)); }) 1091296417Sdim 1092296417Sdimstatic __inline__ __m128d __DEFAULT_FN_ATTRS 1093296417Sdim_mm_mask_max_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) { 1094309124Sdim return (__m128d) __builtin_ia32_maxsd_round_mask ((__v2df) __A, 1095296417Sdim (__v2df) __B, 1096296417Sdim (__v2df) __W, 1097296417Sdim (__mmask8) __U, 1098296417Sdim _MM_FROUND_CUR_DIRECTION); 1099296417Sdim} 1100296417Sdim 1101296417Sdimstatic __inline__ __m128d __DEFAULT_FN_ATTRS 1102296417Sdim_mm_maskz_max_sd(__mmask8 __U,__m128d __A, __m128d __B) { 1103309124Sdim return (__m128d) __builtin_ia32_maxsd_round_mask ((__v2df) __A, 1104296417Sdim (__v2df) __B, 1105296417Sdim (__v2df) _mm_setzero_pd (), 1106296417Sdim (__mmask8) __U, 1107296417Sdim _MM_FROUND_CUR_DIRECTION); 1108296417Sdim} 1109296417Sdim 1110309124Sdim#define _mm_max_round_sd(A, B, R) __extension__ ({ \ 1111309124Sdim (__m128d)__builtin_ia32_maxsd_round_mask((__v2df)(__m128d)(A), \ 1112309124Sdim (__v2df)(__m128d)(B), \ 1113309124Sdim (__v2df)_mm_setzero_pd(), \ 1114309124Sdim (__mmask8)-1, (int)(R)); }) 1115296417Sdim 1116309124Sdim#define _mm_mask_max_round_sd(W, U, A, B, R) __extension__ ({ \ 1117309124Sdim (__m128d)__builtin_ia32_maxsd_round_mask((__v2df)(__m128d)(A), \ 1118309124Sdim (__v2df)(__m128d)(B), \ 1119309124Sdim (__v2df)(__m128d)(W), \ 1120309124Sdim (__mmask8)(U), (int)(R)); }) 1121296417Sdim 1122309124Sdim#define _mm_maskz_max_round_sd(U, A, B, R) __extension__ ({ \ 1123309124Sdim (__m128d)__builtin_ia32_maxsd_round_mask((__v2df)(__m128d)(A), \ 1124309124Sdim (__v2df)(__m128d)(B), \ 1125309124Sdim (__v2df)_mm_setzero_pd(), \ 1126309124Sdim (__mmask8)(U), (int)(R)); }) 1127296417Sdim 1128277325Sdimstatic __inline __m512i 1129288943Sdim__DEFAULT_FN_ATTRS 1130277325Sdim_mm512_max_epi32(__m512i __A, __m512i __B) 1131277325Sdim{ 1132277325Sdim return (__m512i) __builtin_ia32_pmaxsd512_mask ((__v16si) __A, 1133277325Sdim (__v16si) __B, 1134277325Sdim (__v16si) 1135277325Sdim _mm512_setzero_si512 (), 1136277325Sdim (__mmask16) -1); 1137277325Sdim} 1138277325Sdim 1139309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 1140309124Sdim_mm512_mask_max_epi32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B) 1141309124Sdim{ 1142309124Sdim return (__m512i) __builtin_ia32_pmaxsd512_mask ((__v16si) __A, 1143309124Sdim (__v16si) __B, 1144309124Sdim (__v16si) __W, __M); 1145309124Sdim} 1146309124Sdim 1147309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 1148309124Sdim_mm512_maskz_max_epi32 (__mmask16 __M, __m512i __A, __m512i __B) 1149309124Sdim{ 1150309124Sdim return (__m512i) __builtin_ia32_pmaxsd512_mask ((__v16si) __A, 1151309124Sdim (__v16si) __B, 1152309124Sdim (__v16si) 1153309124Sdim _mm512_setzero_si512 (), 1154309124Sdim __M); 1155309124Sdim} 1156309124Sdim 1157288943Sdimstatic __inline __m512i __DEFAULT_FN_ATTRS 1158277325Sdim_mm512_max_epu32(__m512i __A, __m512i __B) 1159277325Sdim{ 1160277325Sdim return (__m512i) __builtin_ia32_pmaxud512_mask ((__v16si) __A, 1161277325Sdim (__v16si) __B, 1162277325Sdim (__v16si) 1163277325Sdim _mm512_setzero_si512 (), 1164277325Sdim (__mmask16) -1); 1165277325Sdim} 1166277325Sdim 1167309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 1168309124Sdim_mm512_mask_max_epu32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B) 1169309124Sdim{ 1170309124Sdim return (__m512i) __builtin_ia32_pmaxud512_mask ((__v16si) __A, 1171309124Sdim (__v16si) __B, 1172309124Sdim (__v16si) __W, __M); 1173309124Sdim} 1174309124Sdim 1175309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 1176309124Sdim_mm512_maskz_max_epu32 (__mmask16 __M, __m512i __A, __m512i __B) 1177309124Sdim{ 1178309124Sdim return (__m512i) __builtin_ia32_pmaxud512_mask ((__v16si) __A, 1179309124Sdim (__v16si) __B, 1180309124Sdim (__v16si) 1181309124Sdim _mm512_setzero_si512 (), 1182309124Sdim __M); 1183309124Sdim} 1184309124Sdim 1185288943Sdimstatic __inline __m512i __DEFAULT_FN_ATTRS 1186277325Sdim_mm512_max_epi64(__m512i __A, __m512i __B) 1187277325Sdim{ 1188277325Sdim return (__m512i) __builtin_ia32_pmaxsq512_mask ((__v8di) __A, 1189277325Sdim (__v8di) __B, 1190277325Sdim (__v8di) 1191277325Sdim _mm512_setzero_si512 (), 1192277325Sdim (__mmask8) -1); 1193277325Sdim} 1194277325Sdim 1195309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 1196309124Sdim_mm512_mask_max_epi64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B) 1197309124Sdim{ 1198309124Sdim return (__m512i) __builtin_ia32_pmaxsq512_mask ((__v8di) __A, 1199309124Sdim (__v8di) __B, 1200309124Sdim (__v8di) __W, __M); 1201309124Sdim} 1202309124Sdim 1203309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 1204309124Sdim_mm512_maskz_max_epi64 (__mmask8 __M, __m512i __A, __m512i __B) 1205309124Sdim{ 1206309124Sdim return (__m512i) __builtin_ia32_pmaxsq512_mask ((__v8di) __A, 1207309124Sdim (__v8di) __B, 1208309124Sdim (__v8di) 1209309124Sdim _mm512_setzero_si512 (), 1210309124Sdim __M); 1211309124Sdim} 1212309124Sdim 1213288943Sdimstatic __inline __m512i __DEFAULT_FN_ATTRS 1214277325Sdim_mm512_max_epu64(__m512i __A, __m512i __B) 1215277325Sdim{ 1216277325Sdim return (__m512i) __builtin_ia32_pmaxuq512_mask ((__v8di) __A, 1217277325Sdim (__v8di) __B, 1218277325Sdim (__v8di) 1219277325Sdim _mm512_setzero_si512 (), 1220277325Sdim (__mmask8) -1); 1221277325Sdim} 1222277325Sdim 1223309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 1224309124Sdim_mm512_mask_max_epu64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B) 1225309124Sdim{ 1226309124Sdim return (__m512i) __builtin_ia32_pmaxuq512_mask ((__v8di) __A, 1227309124Sdim (__v8di) __B, 1228309124Sdim (__v8di) __W, __M); 1229309124Sdim} 1230309124Sdim 1231309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 1232309124Sdim_mm512_maskz_max_epu64 (__mmask8 __M, __m512i __A, __m512i __B) 1233309124Sdim{ 1234309124Sdim return (__m512i) __builtin_ia32_pmaxuq512_mask ((__v8di) __A, 1235309124Sdim (__v8di) __B, 1236309124Sdim (__v8di) 1237309124Sdim _mm512_setzero_si512 (), 1238309124Sdim __M); 1239309124Sdim} 1240309124Sdim 1241309124Sdim#define _mm512_mask_min_round_pd(W, U, A, B, R) __extension__ ({ \ 1242309124Sdim (__m512d)__builtin_ia32_minpd512_mask((__v8df)(__m512d)(A), \ 1243309124Sdim (__v8df)(__m512d)(B), \ 1244309124Sdim (__v8df)(__m512d)(W), (__mmask8)(U), \ 1245309124Sdim (int)(R)); }) 1246309124Sdim 1247309124Sdim#define _mm512_maskz_min_round_pd(U, A, B, R) __extension__ ({ \ 1248309124Sdim (__m512d)__builtin_ia32_minpd512_mask((__v8df)(__m512d)(A), \ 1249309124Sdim (__v8df)(__m512d)(B), \ 1250309124Sdim (__v8df)_mm512_setzero_pd(), \ 1251309124Sdim (__mmask8)(U), (int)(R)); }) 1252309124Sdim 1253309124Sdim#define _mm512_min_round_pd(A, B, R) __extension__ ({ \ 1254309124Sdim (__m512d)__builtin_ia32_minpd512_mask((__v8df)(__m512d)(A), \ 1255309124Sdim (__v8df)(__m512d)(B), \ 1256309124Sdim (__v8df)_mm512_undefined_pd(), \ 1257309124Sdim (__mmask8)-1, (int)(R)); }) 1258309124Sdim 1259288943Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS 1260277325Sdim_mm512_min_pd(__m512d __A, __m512d __B) 1261277325Sdim{ 1262277325Sdim return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A, 1263277325Sdim (__v8df) __B, 1264277325Sdim (__v8df) 1265277325Sdim _mm512_setzero_pd (), 1266277325Sdim (__mmask8) -1, 1267277325Sdim _MM_FROUND_CUR_DIRECTION); 1268277325Sdim} 1269277325Sdim 1270309124Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS 1271309124Sdim_mm512_mask_min_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) 1272309124Sdim{ 1273309124Sdim return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A, 1274309124Sdim (__v8df) __B, 1275309124Sdim (__v8df) __W, 1276309124Sdim (__mmask8) __U, 1277309124Sdim _MM_FROUND_CUR_DIRECTION); 1278309124Sdim} 1279309124Sdim 1280309124Sdim#define _mm512_mask_min_round_ps(W, U, A, B, R) __extension__ ({ \ 1281309124Sdim (__m512)__builtin_ia32_minps512_mask((__v16sf)(__m512)(A), \ 1282309124Sdim (__v16sf)(__m512)(B), \ 1283309124Sdim (__v16sf)(__m512)(W), (__mmask16)(U), \ 1284309124Sdim (int)(R)); }) 1285309124Sdim 1286309124Sdim#define _mm512_maskz_min_round_ps(U, A, B, R) __extension__ ({ \ 1287309124Sdim (__m512)__builtin_ia32_minps512_mask((__v16sf)(__m512)(A), \ 1288309124Sdim (__v16sf)(__m512)(B), \ 1289309124Sdim (__v16sf)_mm512_setzero_ps(), \ 1290309124Sdim (__mmask16)(U), (int)(R)); }) 1291309124Sdim 1292309124Sdim#define _mm512_min_round_ps(A, B, R) __extension__ ({ \ 1293309124Sdim (__m512)__builtin_ia32_minps512_mask((__v16sf)(__m512)(A), \ 1294309124Sdim (__v16sf)(__m512)(B), \ 1295309124Sdim (__v16sf)_mm512_undefined_ps(), \ 1296309124Sdim (__mmask16)-1, (int)(R)); }) 1297309124Sdim 1298309124Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS 1299309124Sdim_mm512_maskz_min_pd (__mmask8 __U, __m512d __A, __m512d __B) 1300309124Sdim{ 1301309124Sdim return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A, 1302309124Sdim (__v8df) __B, 1303309124Sdim (__v8df) 1304309124Sdim _mm512_setzero_pd (), 1305309124Sdim (__mmask8) __U, 1306309124Sdim _MM_FROUND_CUR_DIRECTION); 1307309124Sdim} 1308309124Sdim 1309288943Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS 1310277325Sdim_mm512_min_ps(__m512 __A, __m512 __B) 1311277325Sdim{ 1312277325Sdim return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A, 1313277325Sdim (__v16sf) __B, 1314277325Sdim (__v16sf) 1315277325Sdim _mm512_setzero_ps (), 1316277325Sdim (__mmask16) -1, 1317277325Sdim _MM_FROUND_CUR_DIRECTION); 1318277325Sdim} 1319277325Sdim 1320309124Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS 1321309124Sdim_mm512_mask_min_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) 1322309124Sdim{ 1323309124Sdim return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A, 1324309124Sdim (__v16sf) __B, 1325309124Sdim (__v16sf) __W, 1326309124Sdim (__mmask16) __U, 1327309124Sdim _MM_FROUND_CUR_DIRECTION); 1328309124Sdim} 1329309124Sdim 1330309124Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS 1331309124Sdim_mm512_maskz_min_ps (__mmask16 __U, __m512 __A, __m512 __B) 1332309124Sdim{ 1333309124Sdim return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A, 1334309124Sdim (__v16sf) __B, 1335309124Sdim (__v16sf) 1336309124Sdim _mm512_setzero_ps (), 1337309124Sdim (__mmask16) __U, 1338309124Sdim _MM_FROUND_CUR_DIRECTION); 1339309124Sdim} 1340309124Sdim 1341296417Sdimstatic __inline__ __m128 __DEFAULT_FN_ATTRS 1342296417Sdim_mm_mask_min_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) { 1343309124Sdim return (__m128) __builtin_ia32_minss_round_mask ((__v4sf) __A, 1344296417Sdim (__v4sf) __B, 1345296417Sdim (__v4sf) __W, 1346296417Sdim (__mmask8) __U, 1347296417Sdim _MM_FROUND_CUR_DIRECTION); 1348296417Sdim} 1349296417Sdim 1350296417Sdimstatic __inline__ __m128 __DEFAULT_FN_ATTRS 1351296417Sdim_mm_maskz_min_ss(__mmask8 __U,__m128 __A, __m128 __B) { 1352309124Sdim return (__m128) __builtin_ia32_minss_round_mask ((__v4sf) __A, 1353296417Sdim (__v4sf) __B, 1354296417Sdim (__v4sf) _mm_setzero_ps (), 1355296417Sdim (__mmask8) __U, 1356296417Sdim _MM_FROUND_CUR_DIRECTION); 1357296417Sdim} 1358296417Sdim 1359309124Sdim#define _mm_min_round_ss(A, B, R) __extension__ ({ \ 1360309124Sdim (__m128)__builtin_ia32_minss_round_mask((__v4sf)(__m128)(A), \ 1361309124Sdim (__v4sf)(__m128)(B), \ 1362309124Sdim (__v4sf)_mm_setzero_ps(), \ 1363309124Sdim (__mmask8)-1, (int)(R)); }) 1364296417Sdim 1365309124Sdim#define _mm_mask_min_round_ss(W, U, A, B, R) __extension__ ({ \ 1366309124Sdim (__m128)__builtin_ia32_minss_round_mask((__v4sf)(__m128)(A), \ 1367309124Sdim (__v4sf)(__m128)(B), \ 1368309124Sdim (__v4sf)(__m128)(W), (__mmask8)(U), \ 1369309124Sdim (int)(R)); }) 1370296417Sdim 1371309124Sdim#define _mm_maskz_min_round_ss(U, A, B, R) __extension__ ({ \ 1372309124Sdim (__m128)__builtin_ia32_minss_round_mask((__v4sf)(__m128)(A), \ 1373309124Sdim (__v4sf)(__m128)(B), \ 1374309124Sdim (__v4sf)_mm_setzero_ps(), \ 1375309124Sdim (__mmask8)(U), (int)(R)); }) 1376296417Sdim 1377296417Sdimstatic __inline__ __m128d __DEFAULT_FN_ATTRS 1378296417Sdim_mm_mask_min_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) { 1379309124Sdim return (__m128d) __builtin_ia32_minsd_round_mask ((__v2df) __A, 1380296417Sdim (__v2df) __B, 1381296417Sdim (__v2df) __W, 1382296417Sdim (__mmask8) __U, 1383296417Sdim _MM_FROUND_CUR_DIRECTION); 1384296417Sdim} 1385296417Sdim 1386296417Sdimstatic __inline__ __m128d __DEFAULT_FN_ATTRS 1387296417Sdim_mm_maskz_min_sd(__mmask8 __U,__m128d __A, __m128d __B) { 1388309124Sdim return (__m128d) __builtin_ia32_minsd_round_mask ((__v2df) __A, 1389296417Sdim (__v2df) __B, 1390296417Sdim (__v2df) _mm_setzero_pd (), 1391296417Sdim (__mmask8) __U, 1392296417Sdim _MM_FROUND_CUR_DIRECTION); 1393296417Sdim} 1394296417Sdim 1395309124Sdim#define _mm_min_round_sd(A, B, R) __extension__ ({ \ 1396309124Sdim (__m128d)__builtin_ia32_minsd_round_mask((__v2df)(__m128d)(A), \ 1397309124Sdim (__v2df)(__m128d)(B), \ 1398309124Sdim (__v2df)_mm_setzero_pd(), \ 1399309124Sdim (__mmask8)-1, (int)(R)); }) 1400296417Sdim 1401309124Sdim#define _mm_mask_min_round_sd(W, U, A, B, R) __extension__ ({ \ 1402309124Sdim (__m128d)__builtin_ia32_minsd_round_mask((__v2df)(__m128d)(A), \ 1403309124Sdim (__v2df)(__m128d)(B), \ 1404309124Sdim (__v2df)(__m128d)(W), \ 1405309124Sdim (__mmask8)(U), (int)(R)); }) 1406296417Sdim 1407309124Sdim#define _mm_maskz_min_round_sd(U, A, B, R) __extension__ ({ \ 1408309124Sdim (__m128d)__builtin_ia32_minsd_round_mask((__v2df)(__m128d)(A), \ 1409309124Sdim (__v2df)(__m128d)(B), \ 1410309124Sdim (__v2df)_mm_setzero_pd(), \ 1411309124Sdim (__mmask8)(U), (int)(R)); }) 1412296417Sdim 1413277325Sdimstatic __inline __m512i 1414288943Sdim__DEFAULT_FN_ATTRS 1415277325Sdim_mm512_min_epi32(__m512i __A, __m512i __B) 1416277325Sdim{ 1417277325Sdim return (__m512i) __builtin_ia32_pminsd512_mask ((__v16si) __A, 1418277325Sdim (__v16si) __B, 1419277325Sdim (__v16si) 1420277325Sdim _mm512_setzero_si512 (), 1421277325Sdim (__mmask16) -1); 1422277325Sdim} 1423277325Sdim 1424309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 1425309124Sdim_mm512_mask_min_epi32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B) 1426309124Sdim{ 1427309124Sdim return (__m512i) __builtin_ia32_pminsd512_mask ((__v16si) __A, 1428309124Sdim (__v16si) __B, 1429309124Sdim (__v16si) __W, __M); 1430309124Sdim} 1431309124Sdim 1432309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 1433309124Sdim_mm512_maskz_min_epi32 (__mmask16 __M, __m512i __A, __m512i __B) 1434309124Sdim{ 1435309124Sdim return (__m512i) __builtin_ia32_pminsd512_mask ((__v16si) __A, 1436309124Sdim (__v16si) __B, 1437309124Sdim (__v16si) 1438309124Sdim _mm512_setzero_si512 (), 1439309124Sdim __M); 1440309124Sdim} 1441309124Sdim 1442288943Sdimstatic __inline __m512i __DEFAULT_FN_ATTRS 1443277325Sdim_mm512_min_epu32(__m512i __A, __m512i __B) 1444277325Sdim{ 1445277325Sdim return (__m512i) __builtin_ia32_pminud512_mask ((__v16si) __A, 1446277325Sdim (__v16si) __B, 1447277325Sdim (__v16si) 1448277325Sdim _mm512_setzero_si512 (), 1449277325Sdim (__mmask16) -1); 1450277325Sdim} 1451277325Sdim 1452309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 1453309124Sdim_mm512_mask_min_epu32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B) 1454309124Sdim{ 1455309124Sdim return (__m512i) __builtin_ia32_pminud512_mask ((__v16si) __A, 1456309124Sdim (__v16si) __B, 1457309124Sdim (__v16si) __W, __M); 1458309124Sdim} 1459309124Sdim 1460309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 1461309124Sdim_mm512_maskz_min_epu32 (__mmask16 __M, __m512i __A, __m512i __B) 1462309124Sdim{ 1463309124Sdim return (__m512i) __builtin_ia32_pminud512_mask ((__v16si) __A, 1464309124Sdim (__v16si) __B, 1465309124Sdim (__v16si) 1466309124Sdim _mm512_setzero_si512 (), 1467309124Sdim __M); 1468309124Sdim} 1469309124Sdim 1470288943Sdimstatic __inline __m512i __DEFAULT_FN_ATTRS 1471277325Sdim_mm512_min_epi64(__m512i __A, __m512i __B) 1472277325Sdim{ 1473277325Sdim return (__m512i) __builtin_ia32_pminsq512_mask ((__v8di) __A, 1474277325Sdim (__v8di) __B, 1475277325Sdim (__v8di) 1476277325Sdim _mm512_setzero_si512 (), 1477277325Sdim (__mmask8) -1); 1478277325Sdim} 1479277325Sdim 1480309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 1481309124Sdim_mm512_mask_min_epi64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B) 1482309124Sdim{ 1483309124Sdim return (__m512i) __builtin_ia32_pminsq512_mask ((__v8di) __A, 1484309124Sdim (__v8di) __B, 1485309124Sdim (__v8di) __W, __M); 1486309124Sdim} 1487309124Sdim 1488309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 1489309124Sdim_mm512_maskz_min_epi64 (__mmask8 __M, __m512i __A, __m512i __B) 1490309124Sdim{ 1491309124Sdim return (__m512i) __builtin_ia32_pminsq512_mask ((__v8di) __A, 1492309124Sdim (__v8di) __B, 1493309124Sdim (__v8di) 1494309124Sdim _mm512_setzero_si512 (), 1495309124Sdim __M); 1496309124Sdim} 1497309124Sdim 1498288943Sdimstatic __inline __m512i __DEFAULT_FN_ATTRS 1499277325Sdim_mm512_min_epu64(__m512i __A, __m512i __B) 1500277325Sdim{ 1501277325Sdim return (__m512i) __builtin_ia32_pminuq512_mask ((__v8di) __A, 1502277325Sdim (__v8di) __B, 1503277325Sdim (__v8di) 1504277325Sdim _mm512_setzero_si512 (), 1505277325Sdim (__mmask8) -1); 1506277325Sdim} 1507277325Sdim 1508309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 1509309124Sdim_mm512_mask_min_epu64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B) 1510309124Sdim{ 1511309124Sdim return (__m512i) __builtin_ia32_pminuq512_mask ((__v8di) __A, 1512309124Sdim (__v8di) __B, 1513309124Sdim (__v8di) __W, __M); 1514309124Sdim} 1515309124Sdim 1516309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 1517309124Sdim_mm512_maskz_min_epu64 (__mmask8 __M, __m512i __A, __m512i __B) 1518309124Sdim{ 1519309124Sdim return (__m512i) __builtin_ia32_pminuq512_mask ((__v8di) __A, 1520309124Sdim (__v8di) __B, 1521309124Sdim (__v8di) 1522309124Sdim _mm512_setzero_si512 (), 1523309124Sdim __M); 1524309124Sdim} 1525309124Sdim 1526288943Sdimstatic __inline __m512i __DEFAULT_FN_ATTRS 1527277325Sdim_mm512_mul_epi32(__m512i __X, __m512i __Y) 1528277325Sdim{ 1529314564Sdim return (__m512i)__builtin_ia32_pmuldq512((__v16si)__X, (__v16si) __Y); 1530277325Sdim} 1531277325Sdim 1532288943Sdimstatic __inline __m512i __DEFAULT_FN_ATTRS 1533314564Sdim_mm512_mask_mul_epi32(__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y) 1534288943Sdim{ 1535314564Sdim return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M, 1536314564Sdim (__v8di)_mm512_mul_epi32(__X, __Y), 1537314564Sdim (__v8di)__W); 1538288943Sdim} 1539288943Sdim 1540288943Sdimstatic __inline __m512i __DEFAULT_FN_ATTRS 1541314564Sdim_mm512_maskz_mul_epi32(__mmask8 __M, __m512i __X, __m512i __Y) 1542288943Sdim{ 1543314564Sdim return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M, 1544314564Sdim (__v8di)_mm512_mul_epi32(__X, __Y), 1545314564Sdim (__v8di)_mm512_setzero_si512 ()); 1546288943Sdim} 1547288943Sdim 1548288943Sdimstatic __inline __m512i __DEFAULT_FN_ATTRS 1549277325Sdim_mm512_mul_epu32(__m512i __X, __m512i __Y) 1550277325Sdim{ 1551314564Sdim return (__m512i)__builtin_ia32_pmuludq512((__v16si)__X, (__v16si)__Y); 1552277325Sdim} 1553277325Sdim 1554288943Sdimstatic __inline __m512i __DEFAULT_FN_ATTRS 1555314564Sdim_mm512_mask_mul_epu32(__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y) 1556288943Sdim{ 1557314564Sdim return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M, 1558314564Sdim (__v8di)_mm512_mul_epu32(__X, __Y), 1559314564Sdim (__v8di)__W); 1560288943Sdim} 1561288943Sdim 1562288943Sdimstatic __inline __m512i __DEFAULT_FN_ATTRS 1563314564Sdim_mm512_maskz_mul_epu32(__mmask8 __M, __m512i __X, __m512i __Y) 1564288943Sdim{ 1565314564Sdim return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M, 1566314564Sdim (__v8di)_mm512_mul_epu32(__X, __Y), 1567314564Sdim (__v8di)_mm512_setzero_si512 ()); 1568288943Sdim} 1569288943Sdim 1570288943Sdimstatic __inline __m512i __DEFAULT_FN_ATTRS 1571288943Sdim_mm512_mullo_epi32 (__m512i __A, __m512i __B) 1572288943Sdim{ 1573309124Sdim return (__m512i) ((__v16su) __A * (__v16su) __B); 1574288943Sdim} 1575288943Sdim 1576288943Sdimstatic __inline __m512i __DEFAULT_FN_ATTRS 1577314564Sdim_mm512_maskz_mullo_epi32(__mmask16 __M, __m512i __A, __m512i __B) 1578288943Sdim{ 1579314564Sdim return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M, 1580314564Sdim (__v16si)_mm512_mullo_epi32(__A, __B), 1581314564Sdim (__v16si)_mm512_setzero_si512()); 1582288943Sdim} 1583288943Sdim 1584288943Sdimstatic __inline __m512i __DEFAULT_FN_ATTRS 1585314564Sdim_mm512_mask_mullo_epi32(__m512i __W, __mmask16 __M, __m512i __A, __m512i __B) 1586288943Sdim{ 1587314564Sdim return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M, 1588314564Sdim (__v16si)_mm512_mullo_epi32(__A, __B), 1589314564Sdim (__v16si)__W); 1590288943Sdim} 1591288943Sdim 1592309124Sdim#define _mm512_mask_sqrt_round_pd(W, U, A, R) __extension__ ({ \ 1593309124Sdim (__m512d)__builtin_ia32_sqrtpd512_mask((__v8df)(__m512d)(A), \ 1594309124Sdim (__v8df)(__m512d)(W), (__mmask8)(U), \ 1595309124Sdim (int)(R)); }) 1596309124Sdim 1597309124Sdim#define _mm512_maskz_sqrt_round_pd(U, A, R) __extension__ ({ \ 1598309124Sdim (__m512d)__builtin_ia32_sqrtpd512_mask((__v8df)(__m512d)(A), \ 1599309124Sdim (__v8df)_mm512_setzero_pd(), \ 1600309124Sdim (__mmask8)(U), (int)(R)); }) 1601309124Sdim 1602309124Sdim#define _mm512_sqrt_round_pd(A, R) __extension__ ({ \ 1603309124Sdim (__m512d)__builtin_ia32_sqrtpd512_mask((__v8df)(__m512d)(A), \ 1604309124Sdim (__v8df)_mm512_undefined_pd(), \ 1605309124Sdim (__mmask8)-1, (int)(R)); }) 1606309124Sdim 1607288943Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS 1608296417Sdim_mm512_sqrt_pd(__m512d __a) 1609277325Sdim{ 1610296417Sdim return (__m512d)__builtin_ia32_sqrtpd512_mask((__v8df)__a, 1611277325Sdim (__v8df) _mm512_setzero_pd (), 1612277325Sdim (__mmask8) -1, 1613277325Sdim _MM_FROUND_CUR_DIRECTION); 1614277325Sdim} 1615277325Sdim 1616309124Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS 1617309124Sdim_mm512_mask_sqrt_pd (__m512d __W, __mmask8 __U, __m512d __A) 1618309124Sdim{ 1619309124Sdim return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A, 1620309124Sdim (__v8df) __W, 1621309124Sdim (__mmask8) __U, 1622309124Sdim _MM_FROUND_CUR_DIRECTION); 1623309124Sdim} 1624309124Sdim 1625309124Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS 1626309124Sdim_mm512_maskz_sqrt_pd (__mmask8 __U, __m512d __A) 1627309124Sdim{ 1628309124Sdim return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A, 1629309124Sdim (__v8df) 1630309124Sdim _mm512_setzero_pd (), 1631309124Sdim (__mmask8) __U, 1632309124Sdim _MM_FROUND_CUR_DIRECTION); 1633309124Sdim} 1634309124Sdim 1635309124Sdim#define _mm512_mask_sqrt_round_ps(W, U, A, R) __extension__ ({ \ 1636309124Sdim (__m512)__builtin_ia32_sqrtps512_mask((__v16sf)(__m512)(A), \ 1637309124Sdim (__v16sf)(__m512)(W), (__mmask16)(U), \ 1638309124Sdim (int)(R)); }) 1639309124Sdim 1640309124Sdim#define _mm512_maskz_sqrt_round_ps(U, A, R) __extension__ ({ \ 1641309124Sdim (__m512)__builtin_ia32_sqrtps512_mask((__v16sf)(__m512)(A), \ 1642309124Sdim (__v16sf)_mm512_setzero_ps(), \ 1643309124Sdim (__mmask16)(U), (int)(R)); }) 1644309124Sdim 1645309124Sdim#define _mm512_sqrt_round_ps(A, R) __extension__ ({ \ 1646309124Sdim (__m512)__builtin_ia32_sqrtps512_mask((__v16sf)(__m512)(A), \ 1647309124Sdim (__v16sf)_mm512_undefined_ps(), \ 1648309124Sdim (__mmask16)-1, (int)(R)); }) 1649309124Sdim 1650288943Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS 1651296417Sdim_mm512_sqrt_ps(__m512 __a) 1652277325Sdim{ 1653296417Sdim return (__m512)__builtin_ia32_sqrtps512_mask((__v16sf)__a, 1654277325Sdim (__v16sf) _mm512_setzero_ps (), 1655277325Sdim (__mmask16) -1, 1656277325Sdim _MM_FROUND_CUR_DIRECTION); 1657277325Sdim} 1658277325Sdim 1659309124Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS 1660309124Sdim_mm512_mask_sqrt_ps(__m512 __W, __mmask16 __U, __m512 __A) 1661309124Sdim{ 1662309124Sdim return (__m512)__builtin_ia32_sqrtps512_mask((__v16sf)__A, 1663309124Sdim (__v16sf) __W, 1664309124Sdim (__mmask16) __U, 1665309124Sdim _MM_FROUND_CUR_DIRECTION); 1666309124Sdim} 1667309124Sdim 1668309124Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS 1669309124Sdim_mm512_maskz_sqrt_ps( __mmask16 __U, __m512 __A) 1670309124Sdim{ 1671309124Sdim return (__m512)__builtin_ia32_sqrtps512_mask((__v16sf)__A, 1672309124Sdim (__v16sf) _mm512_setzero_ps (), 1673309124Sdim (__mmask16) __U, 1674309124Sdim _MM_FROUND_CUR_DIRECTION); 1675309124Sdim} 1676309124Sdim 1677288943Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS 1678277325Sdim_mm512_rsqrt14_pd(__m512d __A) 1679277325Sdim{ 1680277325Sdim return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A, 1681277325Sdim (__v8df) 1682277325Sdim _mm512_setzero_pd (), 1683277325Sdim (__mmask8) -1);} 1684277325Sdim 1685309124Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS 1686309124Sdim_mm512_mask_rsqrt14_pd (__m512d __W, __mmask8 __U, __m512d __A) 1687309124Sdim{ 1688309124Sdim return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A, 1689309124Sdim (__v8df) __W, 1690309124Sdim (__mmask8) __U); 1691309124Sdim} 1692309124Sdim 1693309124Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS 1694309124Sdim_mm512_maskz_rsqrt14_pd (__mmask8 __U, __m512d __A) 1695309124Sdim{ 1696309124Sdim return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A, 1697309124Sdim (__v8df) 1698309124Sdim _mm512_setzero_pd (), 1699309124Sdim (__mmask8) __U); 1700309124Sdim} 1701309124Sdim 1702288943Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS 1703277325Sdim_mm512_rsqrt14_ps(__m512 __A) 1704277325Sdim{ 1705277325Sdim return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A, 1706277325Sdim (__v16sf) 1707277325Sdim _mm512_setzero_ps (), 1708277325Sdim (__mmask16) -1); 1709277325Sdim} 1710277325Sdim 1711309124Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS 1712309124Sdim_mm512_mask_rsqrt14_ps (__m512 __W, __mmask16 __U, __m512 __A) 1713309124Sdim{ 1714309124Sdim return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A, 1715309124Sdim (__v16sf) __W, 1716309124Sdim (__mmask16) __U); 1717309124Sdim} 1718309124Sdim 1719309124Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS 1720309124Sdim_mm512_maskz_rsqrt14_ps (__mmask16 __U, __m512 __A) 1721309124Sdim{ 1722309124Sdim return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A, 1723309124Sdim (__v16sf) 1724309124Sdim _mm512_setzero_ps (), 1725309124Sdim (__mmask16) __U); 1726309124Sdim} 1727309124Sdim 1728288943Sdimstatic __inline__ __m128 __DEFAULT_FN_ATTRS 1729277325Sdim_mm_rsqrt14_ss(__m128 __A, __m128 __B) 1730277325Sdim{ 1731309124Sdim return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __A, 1732277325Sdim (__v4sf) __B, 1733277325Sdim (__v4sf) 1734277325Sdim _mm_setzero_ps (), 1735277325Sdim (__mmask8) -1); 1736277325Sdim} 1737277325Sdim 1738309124Sdimstatic __inline__ __m128 __DEFAULT_FN_ATTRS 1739309124Sdim_mm_mask_rsqrt14_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) 1740309124Sdim{ 1741309124Sdim return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __A, 1742309124Sdim (__v4sf) __B, 1743309124Sdim (__v4sf) __W, 1744309124Sdim (__mmask8) __U); 1745309124Sdim} 1746309124Sdim 1747309124Sdimstatic __inline__ __m128 __DEFAULT_FN_ATTRS 1748309124Sdim_mm_maskz_rsqrt14_ss (__mmask8 __U, __m128 __A, __m128 __B) 1749309124Sdim{ 1750309124Sdim return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __A, 1751309124Sdim (__v4sf) __B, 1752309124Sdim (__v4sf) _mm_setzero_ps (), 1753309124Sdim (__mmask8) __U); 1754309124Sdim} 1755309124Sdim 1756288943Sdimstatic __inline__ __m128d __DEFAULT_FN_ATTRS 1757277325Sdim_mm_rsqrt14_sd(__m128d __A, __m128d __B) 1758277325Sdim{ 1759309124Sdim return (__m128d) __builtin_ia32_rsqrt14sd_mask ((__v2df) __A, 1760277325Sdim (__v2df) __B, 1761277325Sdim (__v2df) 1762277325Sdim _mm_setzero_pd (), 1763277325Sdim (__mmask8) -1); 1764277325Sdim} 1765277325Sdim 1766309124Sdimstatic __inline__ __m128d __DEFAULT_FN_ATTRS 1767309124Sdim_mm_mask_rsqrt14_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) 1768309124Sdim{ 1769309124Sdim return (__m128d) __builtin_ia32_rsqrt14sd_mask ( (__v2df) __A, 1770309124Sdim (__v2df) __B, 1771309124Sdim (__v2df) __W, 1772309124Sdim (__mmask8) __U); 1773309124Sdim} 1774309124Sdim 1775309124Sdimstatic __inline__ __m128d __DEFAULT_FN_ATTRS 1776309124Sdim_mm_maskz_rsqrt14_sd (__mmask8 __U, __m128d __A, __m128d __B) 1777309124Sdim{ 1778309124Sdim return (__m128d) __builtin_ia32_rsqrt14sd_mask ( (__v2df) __A, 1779309124Sdim (__v2df) __B, 1780309124Sdim (__v2df) _mm_setzero_pd (), 1781309124Sdim (__mmask8) __U); 1782309124Sdim} 1783309124Sdim 1784288943Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS 1785277325Sdim_mm512_rcp14_pd(__m512d __A) 1786277325Sdim{ 1787277325Sdim return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A, 1788277325Sdim (__v8df) 1789277325Sdim _mm512_setzero_pd (), 1790277325Sdim (__mmask8) -1); 1791277325Sdim} 1792277325Sdim 1793309124Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS 1794309124Sdim_mm512_mask_rcp14_pd (__m512d __W, __mmask8 __U, __m512d __A) 1795309124Sdim{ 1796309124Sdim return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A, 1797309124Sdim (__v8df) __W, 1798309124Sdim (__mmask8) __U); 1799309124Sdim} 1800309124Sdim 1801309124Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS 1802309124Sdim_mm512_maskz_rcp14_pd (__mmask8 __U, __m512d __A) 1803309124Sdim{ 1804309124Sdim return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A, 1805309124Sdim (__v8df) 1806309124Sdim _mm512_setzero_pd (), 1807309124Sdim (__mmask8) __U); 1808309124Sdim} 1809309124Sdim 1810288943Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS 1811277325Sdim_mm512_rcp14_ps(__m512 __A) 1812277325Sdim{ 1813277325Sdim return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A, 1814277325Sdim (__v16sf) 1815277325Sdim _mm512_setzero_ps (), 1816277325Sdim (__mmask16) -1); 1817277325Sdim} 1818309124Sdim 1819309124Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS 1820309124Sdim_mm512_mask_rcp14_ps (__m512 __W, __mmask16 __U, __m512 __A) 1821309124Sdim{ 1822309124Sdim return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A, 1823309124Sdim (__v16sf) __W, 1824309124Sdim (__mmask16) __U); 1825309124Sdim} 1826309124Sdim 1827309124Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS 1828309124Sdim_mm512_maskz_rcp14_ps (__mmask16 __U, __m512 __A) 1829309124Sdim{ 1830309124Sdim return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A, 1831309124Sdim (__v16sf) 1832309124Sdim _mm512_setzero_ps (), 1833309124Sdim (__mmask16) __U); 1834309124Sdim} 1835309124Sdim 1836288943Sdimstatic __inline__ __m128 __DEFAULT_FN_ATTRS 1837277325Sdim_mm_rcp14_ss(__m128 __A, __m128 __B) 1838277325Sdim{ 1839309124Sdim return (__m128) __builtin_ia32_rcp14ss_mask ((__v4sf) __A, 1840277325Sdim (__v4sf) __B, 1841277325Sdim (__v4sf) 1842277325Sdim _mm_setzero_ps (), 1843277325Sdim (__mmask8) -1); 1844277325Sdim} 1845277325Sdim 1846309124Sdimstatic __inline__ __m128 __DEFAULT_FN_ATTRS 1847309124Sdim_mm_mask_rcp14_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) 1848309124Sdim{ 1849309124Sdim return (__m128) __builtin_ia32_rcp14ss_mask ((__v4sf) __A, 1850309124Sdim (__v4sf) __B, 1851309124Sdim (__v4sf) __W, 1852309124Sdim (__mmask8) __U); 1853309124Sdim} 1854309124Sdim 1855309124Sdimstatic __inline__ __m128 __DEFAULT_FN_ATTRS 1856309124Sdim_mm_maskz_rcp14_ss (__mmask8 __U, __m128 __A, __m128 __B) 1857309124Sdim{ 1858309124Sdim return (__m128) __builtin_ia32_rcp14ss_mask ((__v4sf) __A, 1859309124Sdim (__v4sf) __B, 1860309124Sdim (__v4sf) _mm_setzero_ps (), 1861309124Sdim (__mmask8) __U); 1862309124Sdim} 1863309124Sdim 1864288943Sdimstatic __inline__ __m128d __DEFAULT_FN_ATTRS 1865277325Sdim_mm_rcp14_sd(__m128d __A, __m128d __B) 1866277325Sdim{ 1867309124Sdim return (__m128d) __builtin_ia32_rcp14sd_mask ((__v2df) __A, 1868277325Sdim (__v2df) __B, 1869277325Sdim (__v2df) 1870277325Sdim _mm_setzero_pd (), 1871277325Sdim (__mmask8) -1); 1872277325Sdim} 1873277325Sdim 1874309124Sdimstatic __inline__ __m128d __DEFAULT_FN_ATTRS 1875309124Sdim_mm_mask_rcp14_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) 1876309124Sdim{ 1877309124Sdim return (__m128d) __builtin_ia32_rcp14sd_mask ( (__v2df) __A, 1878309124Sdim (__v2df) __B, 1879309124Sdim (__v2df) __W, 1880309124Sdim (__mmask8) __U); 1881309124Sdim} 1882309124Sdim 1883309124Sdimstatic __inline__ __m128d __DEFAULT_FN_ATTRS 1884309124Sdim_mm_maskz_rcp14_sd (__mmask8 __U, __m128d __A, __m128d __B) 1885309124Sdim{ 1886309124Sdim return (__m128d) __builtin_ia32_rcp14sd_mask ( (__v2df) __A, 1887309124Sdim (__v2df) __B, 1888309124Sdim (__v2df) _mm_setzero_pd (), 1889309124Sdim (__mmask8) __U); 1890309124Sdim} 1891309124Sdim 1892288943Sdimstatic __inline __m512 __DEFAULT_FN_ATTRS 1893277325Sdim_mm512_floor_ps(__m512 __A) 1894277325Sdim{ 1895277325Sdim return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A, 1896277325Sdim _MM_FROUND_FLOOR, 1897277325Sdim (__v16sf) __A, -1, 1898277325Sdim _MM_FROUND_CUR_DIRECTION); 1899277325Sdim} 1900277325Sdim 1901309124Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS 1902309124Sdim_mm512_mask_floor_ps (__m512 __W, __mmask16 __U, __m512 __A) 1903309124Sdim{ 1904309124Sdim return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A, 1905309124Sdim _MM_FROUND_FLOOR, 1906309124Sdim (__v16sf) __W, __U, 1907309124Sdim _MM_FROUND_CUR_DIRECTION); 1908309124Sdim} 1909309124Sdim 1910288943Sdimstatic __inline __m512d __DEFAULT_FN_ATTRS 1911277325Sdim_mm512_floor_pd(__m512d __A) 1912277325Sdim{ 1913277325Sdim return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A, 1914277325Sdim _MM_FROUND_FLOOR, 1915277325Sdim (__v8df) __A, -1, 1916277325Sdim _MM_FROUND_CUR_DIRECTION); 1917277325Sdim} 1918277325Sdim 1919309124Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS 1920309124Sdim_mm512_mask_floor_pd (__m512d __W, __mmask8 __U, __m512d __A) 1921309124Sdim{ 1922309124Sdim return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A, 1923309124Sdim _MM_FROUND_FLOOR, 1924309124Sdim (__v8df) __W, __U, 1925309124Sdim _MM_FROUND_CUR_DIRECTION); 1926309124Sdim} 1927309124Sdim 1928309124Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS 1929309124Sdim_mm512_mask_ceil_ps (__m512 __W, __mmask16 __U, __m512 __A) 1930309124Sdim{ 1931309124Sdim return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A, 1932309124Sdim _MM_FROUND_CEIL, 1933309124Sdim (__v16sf) __W, __U, 1934309124Sdim _MM_FROUND_CUR_DIRECTION); 1935309124Sdim} 1936309124Sdim 1937288943Sdimstatic __inline __m512 __DEFAULT_FN_ATTRS 1938277325Sdim_mm512_ceil_ps(__m512 __A) 1939277325Sdim{ 1940277325Sdim return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A, 1941277325Sdim _MM_FROUND_CEIL, 1942277325Sdim (__v16sf) __A, -1, 1943277325Sdim _MM_FROUND_CUR_DIRECTION); 1944277325Sdim} 1945277325Sdim 1946288943Sdimstatic __inline __m512d __DEFAULT_FN_ATTRS 1947277325Sdim_mm512_ceil_pd(__m512d __A) 1948277325Sdim{ 1949277325Sdim return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A, 1950277325Sdim _MM_FROUND_CEIL, 1951277325Sdim (__v8df) __A, -1, 1952277325Sdim _MM_FROUND_CUR_DIRECTION); 1953277325Sdim} 1954277325Sdim 1955309124Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS 1956309124Sdim_mm512_mask_ceil_pd (__m512d __W, __mmask8 __U, __m512d __A) 1957309124Sdim{ 1958309124Sdim return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A, 1959309124Sdim _MM_FROUND_CEIL, 1960309124Sdim (__v8df) __W, __U, 1961309124Sdim _MM_FROUND_CUR_DIRECTION); 1962309124Sdim} 1963309124Sdim 1964288943Sdimstatic __inline __m512i __DEFAULT_FN_ATTRS 1965277325Sdim_mm512_abs_epi64(__m512i __A) 1966277325Sdim{ 1967277325Sdim return (__m512i) __builtin_ia32_pabsq512_mask ((__v8di) __A, 1968277325Sdim (__v8di) 1969277325Sdim _mm512_setzero_si512 (), 1970277325Sdim (__mmask8) -1); 1971277325Sdim} 1972277325Sdim 1973309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 1974309124Sdim_mm512_mask_abs_epi64 (__m512i __W, __mmask8 __U, __m512i __A) 1975309124Sdim{ 1976309124Sdim return (__m512i) __builtin_ia32_pabsq512_mask ((__v8di) __A, 1977309124Sdim (__v8di) __W, 1978309124Sdim (__mmask8) __U); 1979309124Sdim} 1980309124Sdim 1981309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 1982309124Sdim_mm512_maskz_abs_epi64 (__mmask8 __U, __m512i __A) 1983309124Sdim{ 1984309124Sdim return (__m512i) __builtin_ia32_pabsq512_mask ((__v8di) __A, 1985309124Sdim (__v8di) 1986309124Sdim _mm512_setzero_si512 (), 1987309124Sdim (__mmask8) __U); 1988309124Sdim} 1989309124Sdim 1990288943Sdimstatic __inline __m512i __DEFAULT_FN_ATTRS 1991277325Sdim_mm512_abs_epi32(__m512i __A) 1992277325Sdim{ 1993277325Sdim return (__m512i) __builtin_ia32_pabsd512_mask ((__v16si) __A, 1994277325Sdim (__v16si) 1995277325Sdim _mm512_setzero_si512 (), 1996277325Sdim (__mmask16) -1); 1997277325Sdim} 1998277325Sdim 1999309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 2000309124Sdim_mm512_mask_abs_epi32 (__m512i __W, __mmask16 __U, __m512i __A) 2001309124Sdim{ 2002309124Sdim return (__m512i) __builtin_ia32_pabsd512_mask ((__v16si) __A, 2003309124Sdim (__v16si) __W, 2004309124Sdim (__mmask16) __U); 2005309124Sdim} 2006309124Sdim 2007309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 2008309124Sdim_mm512_maskz_abs_epi32 (__mmask16 __U, __m512i __A) 2009309124Sdim{ 2010309124Sdim return (__m512i) __builtin_ia32_pabsd512_mask ((__v16si) __A, 2011309124Sdim (__v16si) 2012309124Sdim _mm512_setzero_si512 (), 2013309124Sdim (__mmask16) __U); 2014309124Sdim} 2015309124Sdim 2016296417Sdimstatic __inline__ __m128 __DEFAULT_FN_ATTRS 2017296417Sdim_mm_mask_add_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) { 2018309124Sdim return (__m128) __builtin_ia32_addss_round_mask ((__v4sf) __A, 2019296417Sdim (__v4sf) __B, 2020296417Sdim (__v4sf) __W, 2021296417Sdim (__mmask8) __U, 2022296417Sdim _MM_FROUND_CUR_DIRECTION); 2023296417Sdim} 2024296417Sdim 2025296417Sdimstatic __inline__ __m128 __DEFAULT_FN_ATTRS 2026296417Sdim_mm_maskz_add_ss(__mmask8 __U,__m128 __A, __m128 __B) { 2027309124Sdim return (__m128) __builtin_ia32_addss_round_mask ((__v4sf) __A, 2028296417Sdim (__v4sf) __B, 2029296417Sdim (__v4sf) _mm_setzero_ps (), 2030296417Sdim (__mmask8) __U, 2031296417Sdim _MM_FROUND_CUR_DIRECTION); 2032296417Sdim} 2033296417Sdim 2034309124Sdim#define _mm_add_round_ss(A, B, R) __extension__ ({ \ 2035309124Sdim (__m128)__builtin_ia32_addss_round_mask((__v4sf)(__m128)(A), \ 2036309124Sdim (__v4sf)(__m128)(B), \ 2037309124Sdim (__v4sf)_mm_setzero_ps(), \ 2038309124Sdim (__mmask8)-1, (int)(R)); }) 2039296417Sdim 2040309124Sdim#define _mm_mask_add_round_ss(W, U, A, B, R) __extension__ ({ \ 2041309124Sdim (__m128)__builtin_ia32_addss_round_mask((__v4sf)(__m128)(A), \ 2042309124Sdim (__v4sf)(__m128)(B), \ 2043309124Sdim (__v4sf)(__m128)(W), (__mmask8)(U), \ 2044309124Sdim (int)(R)); }) 2045296417Sdim 2046309124Sdim#define _mm_maskz_add_round_ss(U, A, B, R) __extension__ ({ \ 2047309124Sdim (__m128)__builtin_ia32_addss_round_mask((__v4sf)(__m128)(A), \ 2048309124Sdim (__v4sf)(__m128)(B), \ 2049309124Sdim (__v4sf)_mm_setzero_ps(), \ 2050309124Sdim (__mmask8)(U), (int)(R)); }) 2051296417Sdim 2052296417Sdimstatic __inline__ __m128d __DEFAULT_FN_ATTRS 2053296417Sdim_mm_mask_add_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) { 2054309124Sdim return (__m128d) __builtin_ia32_addsd_round_mask ((__v2df) __A, 2055296417Sdim (__v2df) __B, 2056296417Sdim (__v2df) __W, 2057296417Sdim (__mmask8) __U, 2058296417Sdim _MM_FROUND_CUR_DIRECTION); 2059296417Sdim} 2060296417Sdim 2061296417Sdimstatic __inline__ __m128d __DEFAULT_FN_ATTRS 2062296417Sdim_mm_maskz_add_sd(__mmask8 __U,__m128d __A, __m128d __B) { 2063309124Sdim return (__m128d) __builtin_ia32_addsd_round_mask ((__v2df) __A, 2064296417Sdim (__v2df) __B, 2065296417Sdim (__v2df) _mm_setzero_pd (), 2066296417Sdim (__mmask8) __U, 2067296417Sdim _MM_FROUND_CUR_DIRECTION); 2068296417Sdim} 2069309124Sdim#define _mm_add_round_sd(A, B, R) __extension__ ({ \ 2070309124Sdim (__m128d)__builtin_ia32_addsd_round_mask((__v2df)(__m128d)(A), \ 2071309124Sdim (__v2df)(__m128d)(B), \ 2072309124Sdim (__v2df)_mm_setzero_pd(), \ 2073309124Sdim (__mmask8)-1, (int)(R)); }) 2074296417Sdim 2075309124Sdim#define _mm_mask_add_round_sd(W, U, A, B, R) __extension__ ({ \ 2076309124Sdim (__m128d)__builtin_ia32_addsd_round_mask((__v2df)(__m128d)(A), \ 2077309124Sdim (__v2df)(__m128d)(B), \ 2078309124Sdim (__v2df)(__m128d)(W), \ 2079309124Sdim (__mmask8)(U), (int)(R)); }) 2080296417Sdim 2081309124Sdim#define _mm_maskz_add_round_sd(U, A, B, R) __extension__ ({ \ 2082309124Sdim (__m128d)__builtin_ia32_addsd_round_mask((__v2df)(__m128d)(A), \ 2083309124Sdim (__v2df)(__m128d)(B), \ 2084309124Sdim (__v2df)_mm_setzero_pd(), \ 2085309124Sdim (__mmask8)(U), (int)(R)); }) 2086296417Sdim 2087296417Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS 2088296417Sdim_mm512_mask_add_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) { 2089314564Sdim return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, 2090314564Sdim (__v8df)_mm512_add_pd(__A, __B), 2091314564Sdim (__v8df)__W); 2092296417Sdim} 2093296417Sdim 2094296417Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS 2095296417Sdim_mm512_maskz_add_pd(__mmask8 __U, __m512d __A, __m512d __B) { 2096314564Sdim return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, 2097314564Sdim (__v8df)_mm512_add_pd(__A, __B), 2098314564Sdim (__v8df)_mm512_setzero_pd()); 2099296417Sdim} 2100296417Sdim 2101296417Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS 2102296417Sdim_mm512_mask_add_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) { 2103314564Sdim return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, 2104314564Sdim (__v16sf)_mm512_add_ps(__A, __B), 2105314564Sdim (__v16sf)__W); 2106296417Sdim} 2107296417Sdim 2108296417Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS 2109296417Sdim_mm512_maskz_add_ps(__mmask16 __U, __m512 __A, __m512 __B) { 2110314564Sdim return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, 2111314564Sdim (__v16sf)_mm512_add_ps(__A, __B), 2112314564Sdim (__v16sf)_mm512_setzero_ps()); 2113296417Sdim} 2114296417Sdim 2115309124Sdim#define _mm512_add_round_pd(A, B, R) __extension__ ({ \ 2116309124Sdim (__m512d)__builtin_ia32_addpd512_mask((__v8df)(__m512d)(A), \ 2117309124Sdim (__v8df)(__m512d)(B), \ 2118309124Sdim (__v8df)_mm512_setzero_pd(), \ 2119309124Sdim (__mmask8)-1, (int)(R)); }) 2120296417Sdim 2121309124Sdim#define _mm512_mask_add_round_pd(W, U, A, B, R) __extension__ ({ \ 2122309124Sdim (__m512d)__builtin_ia32_addpd512_mask((__v8df)(__m512d)(A), \ 2123309124Sdim (__v8df)(__m512d)(B), \ 2124309124Sdim (__v8df)(__m512d)(W), (__mmask8)(U), \ 2125309124Sdim (int)(R)); }) 2126296417Sdim 2127309124Sdim#define _mm512_maskz_add_round_pd(U, A, B, R) __extension__ ({ \ 2128309124Sdim (__m512d)__builtin_ia32_addpd512_mask((__v8df)(__m512d)(A), \ 2129309124Sdim (__v8df)(__m512d)(B), \ 2130309124Sdim (__v8df)_mm512_setzero_pd(), \ 2131309124Sdim (__mmask8)(U), (int)(R)); }) 2132296417Sdim 2133309124Sdim#define _mm512_add_round_ps(A, B, R) __extension__ ({ \ 2134309124Sdim (__m512)__builtin_ia32_addps512_mask((__v16sf)(__m512)(A), \ 2135309124Sdim (__v16sf)(__m512)(B), \ 2136309124Sdim (__v16sf)_mm512_setzero_ps(), \ 2137309124Sdim (__mmask16)-1, (int)(R)); }) 2138296417Sdim 2139309124Sdim#define _mm512_mask_add_round_ps(W, U, A, B, R) __extension__ ({ \ 2140309124Sdim (__m512)__builtin_ia32_addps512_mask((__v16sf)(__m512)(A), \ 2141309124Sdim (__v16sf)(__m512)(B), \ 2142309124Sdim (__v16sf)(__m512)(W), (__mmask16)(U), \ 2143309124Sdim (int)(R)); }) 2144296417Sdim 2145309124Sdim#define _mm512_maskz_add_round_ps(U, A, B, R) __extension__ ({ \ 2146309124Sdim (__m512)__builtin_ia32_addps512_mask((__v16sf)(__m512)(A), \ 2147309124Sdim (__v16sf)(__m512)(B), \ 2148309124Sdim (__v16sf)_mm512_setzero_ps(), \ 2149309124Sdim (__mmask16)(U), (int)(R)); }) 2150296417Sdim 2151296417Sdimstatic __inline__ __m128 __DEFAULT_FN_ATTRS 2152296417Sdim_mm_mask_sub_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) { 2153309124Sdim return (__m128) __builtin_ia32_subss_round_mask ((__v4sf) __A, 2154296417Sdim (__v4sf) __B, 2155296417Sdim (__v4sf) __W, 2156296417Sdim (__mmask8) __U, 2157296417Sdim _MM_FROUND_CUR_DIRECTION); 2158296417Sdim} 2159296417Sdim 2160296417Sdimstatic __inline__ __m128 __DEFAULT_FN_ATTRS 2161296417Sdim_mm_maskz_sub_ss(__mmask8 __U,__m128 __A, __m128 __B) { 2162309124Sdim return (__m128) __builtin_ia32_subss_round_mask ((__v4sf) __A, 2163296417Sdim (__v4sf) __B, 2164296417Sdim (__v4sf) _mm_setzero_ps (), 2165296417Sdim (__mmask8) __U, 2166296417Sdim _MM_FROUND_CUR_DIRECTION); 2167296417Sdim} 2168309124Sdim#define _mm_sub_round_ss(A, B, R) __extension__ ({ \ 2169309124Sdim (__m128)__builtin_ia32_subss_round_mask((__v4sf)(__m128)(A), \ 2170309124Sdim (__v4sf)(__m128)(B), \ 2171309124Sdim (__v4sf)_mm_setzero_ps(), \ 2172309124Sdim (__mmask8)-1, (int)(R)); }) 2173296417Sdim 2174309124Sdim#define _mm_mask_sub_round_ss(W, U, A, B, R) __extension__ ({ \ 2175309124Sdim (__m128)__builtin_ia32_subss_round_mask((__v4sf)(__m128)(A), \ 2176309124Sdim (__v4sf)(__m128)(B), \ 2177309124Sdim (__v4sf)(__m128)(W), (__mmask8)(U), \ 2178309124Sdim (int)(R)); }) 2179296417Sdim 2180309124Sdim#define _mm_maskz_sub_round_ss(U, A, B, R) __extension__ ({ \ 2181309124Sdim (__m128)__builtin_ia32_subss_round_mask((__v4sf)(__m128)(A), \ 2182309124Sdim (__v4sf)(__m128)(B), \ 2183309124Sdim (__v4sf)_mm_setzero_ps(), \ 2184309124Sdim (__mmask8)(U), (int)(R)); }) 2185296417Sdim 2186296417Sdimstatic __inline__ __m128d __DEFAULT_FN_ATTRS 2187296417Sdim_mm_mask_sub_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) { 2188309124Sdim return (__m128d) __builtin_ia32_subsd_round_mask ((__v2df) __A, 2189296417Sdim (__v2df) __B, 2190296417Sdim (__v2df) __W, 2191296417Sdim (__mmask8) __U, 2192296417Sdim _MM_FROUND_CUR_DIRECTION); 2193296417Sdim} 2194296417Sdim 2195296417Sdimstatic __inline__ __m128d __DEFAULT_FN_ATTRS 2196296417Sdim_mm_maskz_sub_sd(__mmask8 __U,__m128d __A, __m128d __B) { 2197309124Sdim return (__m128d) __builtin_ia32_subsd_round_mask ((__v2df) __A, 2198296417Sdim (__v2df) __B, 2199296417Sdim (__v2df) _mm_setzero_pd (), 2200296417Sdim (__mmask8) __U, 2201296417Sdim _MM_FROUND_CUR_DIRECTION); 2202296417Sdim} 2203296417Sdim 2204309124Sdim#define _mm_sub_round_sd(A, B, R) __extension__ ({ \ 2205309124Sdim (__m128d)__builtin_ia32_subsd_round_mask((__v2df)(__m128d)(A), \ 2206309124Sdim (__v2df)(__m128d)(B), \ 2207309124Sdim (__v2df)_mm_setzero_pd(), \ 2208309124Sdim (__mmask8)-1, (int)(R)); }) 2209296417Sdim 2210309124Sdim#define _mm_mask_sub_round_sd(W, U, A, B, R) __extension__ ({ \ 2211309124Sdim (__m128d)__builtin_ia32_subsd_round_mask((__v2df)(__m128d)(A), \ 2212309124Sdim (__v2df)(__m128d)(B), \ 2213309124Sdim (__v2df)(__m128d)(W), \ 2214309124Sdim (__mmask8)(U), (int)(R)); }) 2215296417Sdim 2216309124Sdim#define _mm_maskz_sub_round_sd(U, A, B, R) __extension__ ({ \ 2217309124Sdim (__m128d)__builtin_ia32_subsd_round_mask((__v2df)(__m128d)(A), \ 2218309124Sdim (__v2df)(__m128d)(B), \ 2219309124Sdim (__v2df)_mm_setzero_pd(), \ 2220309124Sdim (__mmask8)(U), (int)(R)); }) 2221296417Sdim 2222296417Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS 2223296417Sdim_mm512_mask_sub_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) { 2224314564Sdim return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, 2225314564Sdim (__v8df)_mm512_sub_pd(__A, __B), 2226314564Sdim (__v8df)__W); 2227296417Sdim} 2228296417Sdim 2229296417Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS 2230296417Sdim_mm512_maskz_sub_pd(__mmask8 __U, __m512d __A, __m512d __B) { 2231314564Sdim return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, 2232314564Sdim (__v8df)_mm512_sub_pd(__A, __B), 2233314564Sdim (__v8df)_mm512_setzero_pd()); 2234296417Sdim} 2235296417Sdim 2236296417Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS 2237296417Sdim_mm512_mask_sub_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) { 2238314564Sdim return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, 2239314564Sdim (__v16sf)_mm512_sub_ps(__A, __B), 2240314564Sdim (__v16sf)__W); 2241296417Sdim} 2242296417Sdim 2243296417Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS 2244296417Sdim_mm512_maskz_sub_ps(__mmask16 __U, __m512 __A, __m512 __B) { 2245314564Sdim return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, 2246314564Sdim (__v16sf)_mm512_sub_ps(__A, __B), 2247314564Sdim (__v16sf)_mm512_setzero_ps()); 2248296417Sdim} 2249296417Sdim 2250309124Sdim#define _mm512_sub_round_pd(A, B, R) __extension__ ({ \ 2251309124Sdim (__m512d)__builtin_ia32_subpd512_mask((__v8df)(__m512d)(A), \ 2252309124Sdim (__v8df)(__m512d)(B), \ 2253309124Sdim (__v8df)_mm512_setzero_pd(), \ 2254309124Sdim (__mmask8)-1, (int)(R)); }) 2255296417Sdim 2256309124Sdim#define _mm512_mask_sub_round_pd(W, U, A, B, R) __extension__ ({ \ 2257309124Sdim (__m512d)__builtin_ia32_subpd512_mask((__v8df)(__m512d)(A), \ 2258309124Sdim (__v8df)(__m512d)(B), \ 2259309124Sdim (__v8df)(__m512d)(W), (__mmask8)(U), \ 2260309124Sdim (int)(R)); }) 2261296417Sdim 2262309124Sdim#define _mm512_maskz_sub_round_pd(U, A, B, R) __extension__ ({ \ 2263309124Sdim (__m512d)__builtin_ia32_subpd512_mask((__v8df)(__m512d)(A), \ 2264309124Sdim (__v8df)(__m512d)(B), \ 2265309124Sdim (__v8df)_mm512_setzero_pd(), \ 2266309124Sdim (__mmask8)(U), (int)(R)); }) 2267296417Sdim 2268309124Sdim#define _mm512_sub_round_ps(A, B, R) __extension__ ({ \ 2269309124Sdim (__m512)__builtin_ia32_subps512_mask((__v16sf)(__m512)(A), \ 2270309124Sdim (__v16sf)(__m512)(B), \ 2271309124Sdim (__v16sf)_mm512_setzero_ps(), \ 2272309124Sdim (__mmask16)-1, (int)(R)); }) 2273296417Sdim 2274309124Sdim#define _mm512_mask_sub_round_ps(W, U, A, B, R) __extension__ ({ \ 2275309124Sdim (__m512)__builtin_ia32_subps512_mask((__v16sf)(__m512)(A), \ 2276309124Sdim (__v16sf)(__m512)(B), \ 2277309124Sdim (__v16sf)(__m512)(W), (__mmask16)(U), \ 2278309124Sdim (int)(R)); }); 2279296417Sdim 2280309124Sdim#define _mm512_maskz_sub_round_ps(U, A, B, R) __extension__ ({ \ 2281309124Sdim (__m512)__builtin_ia32_subps512_mask((__v16sf)(__m512)(A), \ 2282309124Sdim (__v16sf)(__m512)(B), \ 2283309124Sdim (__v16sf)_mm512_setzero_ps(), \ 2284309124Sdim (__mmask16)(U), (int)(R)); }); 2285296417Sdim 2286296417Sdimstatic __inline__ __m128 __DEFAULT_FN_ATTRS 2287296417Sdim_mm_mask_mul_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) { 2288309124Sdim return (__m128) __builtin_ia32_mulss_round_mask ((__v4sf) __A, 2289296417Sdim (__v4sf) __B, 2290296417Sdim (__v4sf) __W, 2291296417Sdim (__mmask8) __U, 2292296417Sdim _MM_FROUND_CUR_DIRECTION); 2293296417Sdim} 2294296417Sdim 2295296417Sdimstatic __inline__ __m128 __DEFAULT_FN_ATTRS 2296296417Sdim_mm_maskz_mul_ss(__mmask8 __U,__m128 __A, __m128 __B) { 2297309124Sdim return (__m128) __builtin_ia32_mulss_round_mask ((__v4sf) __A, 2298296417Sdim (__v4sf) __B, 2299296417Sdim (__v4sf) _mm_setzero_ps (), 2300296417Sdim (__mmask8) __U, 2301296417Sdim _MM_FROUND_CUR_DIRECTION); 2302296417Sdim} 2303309124Sdim#define _mm_mul_round_ss(A, B, R) __extension__ ({ \ 2304309124Sdim (__m128)__builtin_ia32_mulss_round_mask((__v4sf)(__m128)(A), \ 2305309124Sdim (__v4sf)(__m128)(B), \ 2306309124Sdim (__v4sf)_mm_setzero_ps(), \ 2307309124Sdim (__mmask8)-1, (int)(R)); }) 2308296417Sdim 2309309124Sdim#define _mm_mask_mul_round_ss(W, U, A, B, R) __extension__ ({ \ 2310309124Sdim (__m128)__builtin_ia32_mulss_round_mask((__v4sf)(__m128)(A), \ 2311309124Sdim (__v4sf)(__m128)(B), \ 2312309124Sdim (__v4sf)(__m128)(W), (__mmask8)(U), \ 2313309124Sdim (int)(R)); }) 2314296417Sdim 2315309124Sdim#define _mm_maskz_mul_round_ss(U, A, B, R) __extension__ ({ \ 2316309124Sdim (__m128)__builtin_ia32_mulss_round_mask((__v4sf)(__m128)(A), \ 2317309124Sdim (__v4sf)(__m128)(B), \ 2318309124Sdim (__v4sf)_mm_setzero_ps(), \ 2319309124Sdim (__mmask8)(U), (int)(R)); }) 2320296417Sdim 2321296417Sdimstatic __inline__ __m128d __DEFAULT_FN_ATTRS 2322296417Sdim_mm_mask_mul_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) { 2323309124Sdim return (__m128d) __builtin_ia32_mulsd_round_mask ((__v2df) __A, 2324296417Sdim (__v2df) __B, 2325296417Sdim (__v2df) __W, 2326296417Sdim (__mmask8) __U, 2327296417Sdim _MM_FROUND_CUR_DIRECTION); 2328296417Sdim} 2329296417Sdim 2330296417Sdimstatic __inline__ __m128d __DEFAULT_FN_ATTRS 2331296417Sdim_mm_maskz_mul_sd(__mmask8 __U,__m128d __A, __m128d __B) { 2332309124Sdim return (__m128d) __builtin_ia32_mulsd_round_mask ((__v2df) __A, 2333296417Sdim (__v2df) __B, 2334296417Sdim (__v2df) _mm_setzero_pd (), 2335296417Sdim (__mmask8) __U, 2336296417Sdim _MM_FROUND_CUR_DIRECTION); 2337296417Sdim} 2338296417Sdim 2339309124Sdim#define _mm_mul_round_sd(A, B, R) __extension__ ({ \ 2340309124Sdim (__m128d)__builtin_ia32_mulsd_round_mask((__v2df)(__m128d)(A), \ 2341309124Sdim (__v2df)(__m128d)(B), \ 2342309124Sdim (__v2df)_mm_setzero_pd(), \ 2343309124Sdim (__mmask8)-1, (int)(R)); }) 2344296417Sdim 2345309124Sdim#define _mm_mask_mul_round_sd(W, U, A, B, R) __extension__ ({ \ 2346309124Sdim (__m128d)__builtin_ia32_mulsd_round_mask((__v2df)(__m128d)(A), \ 2347309124Sdim (__v2df)(__m128d)(B), \ 2348309124Sdim (__v2df)(__m128d)(W), \ 2349309124Sdim (__mmask8)(U), (int)(R)); }) 2350296417Sdim 2351309124Sdim#define _mm_maskz_mul_round_sd(U, A, B, R) __extension__ ({ \ 2352309124Sdim (__m128d)__builtin_ia32_mulsd_round_mask((__v2df)(__m128d)(A), \ 2353309124Sdim (__v2df)(__m128d)(B), \ 2354309124Sdim (__v2df)_mm_setzero_pd(), \ 2355309124Sdim (__mmask8)(U), (int)(R)); }) 2356296417Sdim 2357296417Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS 2358296417Sdim_mm512_mask_mul_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) { 2359314564Sdim return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, 2360314564Sdim (__v8df)_mm512_mul_pd(__A, __B), 2361314564Sdim (__v8df)__W); 2362296417Sdim} 2363296417Sdim 2364296417Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS 2365296417Sdim_mm512_maskz_mul_pd(__mmask8 __U, __m512d __A, __m512d __B) { 2366314564Sdim return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, 2367314564Sdim (__v8df)_mm512_mul_pd(__A, __B), 2368314564Sdim (__v8df)_mm512_setzero_pd()); 2369296417Sdim} 2370296417Sdim 2371296417Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS 2372296417Sdim_mm512_mask_mul_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) { 2373314564Sdim return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, 2374314564Sdim (__v16sf)_mm512_mul_ps(__A, __B), 2375314564Sdim (__v16sf)__W); 2376296417Sdim} 2377296417Sdim 2378296417Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS 2379296417Sdim_mm512_maskz_mul_ps(__mmask16 __U, __m512 __A, __m512 __B) { 2380314564Sdim return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, 2381314564Sdim (__v16sf)_mm512_mul_ps(__A, __B), 2382314564Sdim (__v16sf)_mm512_setzero_ps()); 2383296417Sdim} 2384296417Sdim 2385309124Sdim#define _mm512_mul_round_pd(A, B, R) __extension__ ({ \ 2386309124Sdim (__m512d)__builtin_ia32_mulpd512_mask((__v8df)(__m512d)(A), \ 2387309124Sdim (__v8df)(__m512d)(B), \ 2388309124Sdim (__v8df)_mm512_setzero_pd(), \ 2389309124Sdim (__mmask8)-1, (int)(R)); }) 2390296417Sdim 2391309124Sdim#define _mm512_mask_mul_round_pd(W, U, A, B, R) __extension__ ({ \ 2392309124Sdim (__m512d)__builtin_ia32_mulpd512_mask((__v8df)(__m512d)(A), \ 2393309124Sdim (__v8df)(__m512d)(B), \ 2394309124Sdim (__v8df)(__m512d)(W), (__mmask8)(U), \ 2395309124Sdim (int)(R)); }) 2396296417Sdim 2397309124Sdim#define _mm512_maskz_mul_round_pd(U, A, B, R) __extension__ ({ \ 2398309124Sdim (__m512d)__builtin_ia32_mulpd512_mask((__v8df)(__m512d)(A), \ 2399309124Sdim (__v8df)(__m512d)(B), \ 2400309124Sdim (__v8df)_mm512_setzero_pd(), \ 2401309124Sdim (__mmask8)(U), (int)(R)); }) 2402296417Sdim 2403309124Sdim#define _mm512_mul_round_ps(A, B, R) __extension__ ({ \ 2404309124Sdim (__m512)__builtin_ia32_mulps512_mask((__v16sf)(__m512)(A), \ 2405309124Sdim (__v16sf)(__m512)(B), \ 2406309124Sdim (__v16sf)_mm512_setzero_ps(), \ 2407309124Sdim (__mmask16)-1, (int)(R)); }) 2408296417Sdim 2409309124Sdim#define _mm512_mask_mul_round_ps(W, U, A, B, R) __extension__ ({ \ 2410309124Sdim (__m512)__builtin_ia32_mulps512_mask((__v16sf)(__m512)(A), \ 2411309124Sdim (__v16sf)(__m512)(B), \ 2412309124Sdim (__v16sf)(__m512)(W), (__mmask16)(U), \ 2413309124Sdim (int)(R)); }); 2414296417Sdim 2415309124Sdim#define _mm512_maskz_mul_round_ps(U, A, B, R) __extension__ ({ \ 2416309124Sdim (__m512)__builtin_ia32_mulps512_mask((__v16sf)(__m512)(A), \ 2417309124Sdim (__v16sf)(__m512)(B), \ 2418309124Sdim (__v16sf)_mm512_setzero_ps(), \ 2419309124Sdim (__mmask16)(U), (int)(R)); }); 2420296417Sdim 2421296417Sdimstatic __inline__ __m128 __DEFAULT_FN_ATTRS 2422296417Sdim_mm_mask_div_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) { 2423309124Sdim return (__m128) __builtin_ia32_divss_round_mask ((__v4sf) __A, 2424296417Sdim (__v4sf) __B, 2425296417Sdim (__v4sf) __W, 2426296417Sdim (__mmask8) __U, 2427296417Sdim _MM_FROUND_CUR_DIRECTION); 2428296417Sdim} 2429296417Sdim 2430296417Sdimstatic __inline__ __m128 __DEFAULT_FN_ATTRS 2431296417Sdim_mm_maskz_div_ss(__mmask8 __U,__m128 __A, __m128 __B) { 2432309124Sdim return (__m128) __builtin_ia32_divss_round_mask ((__v4sf) __A, 2433296417Sdim (__v4sf) __B, 2434296417Sdim (__v4sf) _mm_setzero_ps (), 2435296417Sdim (__mmask8) __U, 2436296417Sdim _MM_FROUND_CUR_DIRECTION); 2437296417Sdim} 2438296417Sdim 2439309124Sdim#define _mm_div_round_ss(A, B, R) __extension__ ({ \ 2440309124Sdim (__m128)__builtin_ia32_divss_round_mask((__v4sf)(__m128)(A), \ 2441309124Sdim (__v4sf)(__m128)(B), \ 2442309124Sdim (__v4sf)_mm_setzero_ps(), \ 2443309124Sdim (__mmask8)-1, (int)(R)); }) 2444296417Sdim 2445309124Sdim#define _mm_mask_div_round_ss(W, U, A, B, R) __extension__ ({ \ 2446309124Sdim (__m128)__builtin_ia32_divss_round_mask((__v4sf)(__m128)(A), \ 2447309124Sdim (__v4sf)(__m128)(B), \ 2448309124Sdim (__v4sf)(__m128)(W), (__mmask8)(U), \ 2449309124Sdim (int)(R)); }) 2450296417Sdim 2451309124Sdim#define _mm_maskz_div_round_ss(U, A, B, R) __extension__ ({ \ 2452309124Sdim (__m128)__builtin_ia32_divss_round_mask((__v4sf)(__m128)(A), \ 2453309124Sdim (__v4sf)(__m128)(B), \ 2454309124Sdim (__v4sf)_mm_setzero_ps(), \ 2455309124Sdim (__mmask8)(U), (int)(R)); }) 2456296417Sdim 2457296417Sdimstatic __inline__ __m128d __DEFAULT_FN_ATTRS 2458296417Sdim_mm_mask_div_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) { 2459309124Sdim return (__m128d) __builtin_ia32_divsd_round_mask ((__v2df) __A, 2460296417Sdim (__v2df) __B, 2461296417Sdim (__v2df) __W, 2462296417Sdim (__mmask8) __U, 2463296417Sdim _MM_FROUND_CUR_DIRECTION); 2464296417Sdim} 2465296417Sdim 2466296417Sdimstatic __inline__ __m128d __DEFAULT_FN_ATTRS 2467296417Sdim_mm_maskz_div_sd(__mmask8 __U,__m128d __A, __m128d __B) { 2468309124Sdim return (__m128d) __builtin_ia32_divsd_round_mask ((__v2df) __A, 2469296417Sdim (__v2df) __B, 2470296417Sdim (__v2df) _mm_setzero_pd (), 2471296417Sdim (__mmask8) __U, 2472296417Sdim _MM_FROUND_CUR_DIRECTION); 2473296417Sdim} 2474296417Sdim 2475309124Sdim#define _mm_div_round_sd(A, B, R) __extension__ ({ \ 2476309124Sdim (__m128d)__builtin_ia32_divsd_round_mask((__v2df)(__m128d)(A), \ 2477309124Sdim (__v2df)(__m128d)(B), \ 2478309124Sdim (__v2df)_mm_setzero_pd(), \ 2479309124Sdim (__mmask8)-1, (int)(R)); }) 2480296417Sdim 2481309124Sdim#define _mm_mask_div_round_sd(W, U, A, B, R) __extension__ ({ \ 2482309124Sdim (__m128d)__builtin_ia32_divsd_round_mask((__v2df)(__m128d)(A), \ 2483309124Sdim (__v2df)(__m128d)(B), \ 2484309124Sdim (__v2df)(__m128d)(W), \ 2485309124Sdim (__mmask8)(U), (int)(R)); }) 2486296417Sdim 2487309124Sdim#define _mm_maskz_div_round_sd(U, A, B, R) __extension__ ({ \ 2488309124Sdim (__m128d)__builtin_ia32_divsd_round_mask((__v2df)(__m128d)(A), \ 2489309124Sdim (__v2df)(__m128d)(B), \ 2490309124Sdim (__v2df)_mm_setzero_pd(), \ 2491309124Sdim (__mmask8)(U), (int)(R)); }) 2492296417Sdim 2493309124Sdimstatic __inline __m512d __DEFAULT_FN_ATTRS 2494309124Sdim_mm512_div_pd(__m512d __a, __m512d __b) 2495309124Sdim{ 2496309124Sdim return (__m512d)((__v8df)__a/(__v8df)__b); 2497309124Sdim} 2498309124Sdim 2499296417Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS 2500296417Sdim_mm512_mask_div_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) { 2501314564Sdim return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, 2502314564Sdim (__v8df)_mm512_div_pd(__A, __B), 2503314564Sdim (__v8df)__W); 2504296417Sdim} 2505296417Sdim 2506296417Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS 2507296417Sdim_mm512_maskz_div_pd(__mmask8 __U, __m512d __A, __m512d __B) { 2508314564Sdim return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, 2509314564Sdim (__v8df)_mm512_div_pd(__A, __B), 2510314564Sdim (__v8df)_mm512_setzero_pd()); 2511296417Sdim} 2512296417Sdim 2513309124Sdimstatic __inline __m512 __DEFAULT_FN_ATTRS 2514309124Sdim_mm512_div_ps(__m512 __a, __m512 __b) 2515309124Sdim{ 2516309124Sdim return (__m512)((__v16sf)__a/(__v16sf)__b); 2517309124Sdim} 2518309124Sdim 2519296417Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS 2520296417Sdim_mm512_mask_div_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) { 2521314564Sdim return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, 2522314564Sdim (__v16sf)_mm512_div_ps(__A, __B), 2523314564Sdim (__v16sf)__W); 2524296417Sdim} 2525296417Sdim 2526296417Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS 2527296417Sdim_mm512_maskz_div_ps(__mmask16 __U, __m512 __A, __m512 __B) { 2528314564Sdim return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, 2529314564Sdim (__v16sf)_mm512_div_ps(__A, __B), 2530314564Sdim (__v16sf)_mm512_setzero_ps()); 2531296417Sdim} 2532296417Sdim 2533309124Sdim#define _mm512_div_round_pd(A, B, R) __extension__ ({ \ 2534309124Sdim (__m512d)__builtin_ia32_divpd512_mask((__v8df)(__m512d)(A), \ 2535309124Sdim (__v8df)(__m512d)(B), \ 2536309124Sdim (__v8df)_mm512_setzero_pd(), \ 2537309124Sdim (__mmask8)-1, (int)(R)); }) 2538296417Sdim 2539309124Sdim#define _mm512_mask_div_round_pd(W, U, A, B, R) __extension__ ({ \ 2540309124Sdim (__m512d)__builtin_ia32_divpd512_mask((__v8df)(__m512d)(A), \ 2541309124Sdim (__v8df)(__m512d)(B), \ 2542309124Sdim (__v8df)(__m512d)(W), (__mmask8)(U), \ 2543309124Sdim (int)(R)); }) 2544296417Sdim 2545309124Sdim#define _mm512_maskz_div_round_pd(U, A, B, R) __extension__ ({ \ 2546309124Sdim (__m512d)__builtin_ia32_divpd512_mask((__v8df)(__m512d)(A), \ 2547309124Sdim (__v8df)(__m512d)(B), \ 2548309124Sdim (__v8df)_mm512_setzero_pd(), \ 2549309124Sdim (__mmask8)(U), (int)(R)); }) 2550296417Sdim 2551309124Sdim#define _mm512_div_round_ps(A, B, R) __extension__ ({ \ 2552309124Sdim (__m512)__builtin_ia32_divps512_mask((__v16sf)(__m512)(A), \ 2553309124Sdim (__v16sf)(__m512)(B), \ 2554309124Sdim (__v16sf)_mm512_setzero_ps(), \ 2555309124Sdim (__mmask16)-1, (int)(R)); }) 2556296417Sdim 2557309124Sdim#define _mm512_mask_div_round_ps(W, U, A, B, R) __extension__ ({ \ 2558309124Sdim (__m512)__builtin_ia32_divps512_mask((__v16sf)(__m512)(A), \ 2559309124Sdim (__v16sf)(__m512)(B), \ 2560309124Sdim (__v16sf)(__m512)(W), (__mmask16)(U), \ 2561309124Sdim (int)(R)); }); 2562296417Sdim 2563309124Sdim#define _mm512_maskz_div_round_ps(U, A, B, R) __extension__ ({ \ 2564309124Sdim (__m512)__builtin_ia32_divps512_mask((__v16sf)(__m512)(A), \ 2565309124Sdim (__v16sf)(__m512)(B), \ 2566309124Sdim (__v16sf)_mm512_setzero_ps(), \ 2567309124Sdim (__mmask16)(U), (int)(R)); }); 2568296417Sdim 2569288943Sdim#define _mm512_roundscale_ps(A, B) __extension__ ({ \ 2570309124Sdim (__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(A), (int)(B), \ 2571309124Sdim (__v16sf)(__m512)(A), (__mmask16)-1, \ 2572309124Sdim _MM_FROUND_CUR_DIRECTION); }) 2573288943Sdim 2574309124Sdim#define _mm512_mask_roundscale_ps(A, B, C, imm) __extension__ ({\ 2575309124Sdim (__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(C), (int)(imm), \ 2576309124Sdim (__v16sf)(__m512)(A), (__mmask16)(B), \ 2577309124Sdim _MM_FROUND_CUR_DIRECTION); }) 2578309124Sdim 2579309124Sdim#define _mm512_maskz_roundscale_ps(A, B, imm) __extension__ ({\ 2580309124Sdim (__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(B), (int)(imm), \ 2581309124Sdim (__v16sf)_mm512_setzero_ps(), \ 2582309124Sdim (__mmask16)(A), \ 2583309124Sdim _MM_FROUND_CUR_DIRECTION); }) 2584309124Sdim 2585309124Sdim#define _mm512_mask_roundscale_round_ps(A, B, C, imm, R) __extension__ ({ \ 2586309124Sdim (__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(C), (int)(imm), \ 2587309124Sdim (__v16sf)(__m512)(A), (__mmask16)(B), \ 2588309124Sdim (int)(R)); }) 2589309124Sdim 2590309124Sdim#define _mm512_maskz_roundscale_round_ps(A, B, imm, R) __extension__ ({ \ 2591309124Sdim (__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(B), (int)(imm), \ 2592309124Sdim (__v16sf)_mm512_setzero_ps(), \ 2593309124Sdim (__mmask16)(A), (int)(R)); }) 2594309124Sdim 2595309124Sdim#define _mm512_roundscale_round_ps(A, imm, R) __extension__ ({ \ 2596309124Sdim (__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(A), (int)(imm), \ 2597309124Sdim (__v16sf)_mm512_undefined_ps(), \ 2598309124Sdim (__mmask16)-1, (int)(R)); }) 2599309124Sdim 2600288943Sdim#define _mm512_roundscale_pd(A, B) __extension__ ({ \ 2601309124Sdim (__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(A), (int)(B), \ 2602309124Sdim (__v8df)(__m512d)(A), (__mmask8)-1, \ 2603309124Sdim _MM_FROUND_CUR_DIRECTION); }) 2604288943Sdim 2605309124Sdim#define _mm512_mask_roundscale_pd(A, B, C, imm) __extension__ ({\ 2606309124Sdim (__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(C), (int)(imm), \ 2607309124Sdim (__v8df)(__m512d)(A), (__mmask8)(B), \ 2608309124Sdim _MM_FROUND_CUR_DIRECTION); }) 2609309124Sdim 2610309124Sdim#define _mm512_maskz_roundscale_pd(A, B, imm) __extension__ ({\ 2611309124Sdim (__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(B), (int)(imm), \ 2612309124Sdim (__v8df)_mm512_setzero_pd(), \ 2613309124Sdim (__mmask8)(A), \ 2614309124Sdim _MM_FROUND_CUR_DIRECTION); }) 2615309124Sdim 2616309124Sdim#define _mm512_mask_roundscale_round_pd(A, B, C, imm, R) __extension__ ({ \ 2617309124Sdim (__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(C), (int)(imm), \ 2618309124Sdim (__v8df)(__m512d)(A), (__mmask8)(B), \ 2619309124Sdim (int)(R)); }) 2620309124Sdim 2621309124Sdim#define _mm512_maskz_roundscale_round_pd(A, B, imm, R) __extension__ ({ \ 2622309124Sdim (__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(B), (int)(imm), \ 2623309124Sdim (__v8df)_mm512_setzero_pd(), \ 2624309124Sdim (__mmask8)(A), (int)(R)); }) 2625309124Sdim 2626309124Sdim#define _mm512_roundscale_round_pd(A, imm, R) __extension__ ({ \ 2627309124Sdim (__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(A), (int)(imm), \ 2628309124Sdim (__v8df)_mm512_undefined_pd(), \ 2629309124Sdim (__mmask8)-1, (int)(R)); }) 2630309124Sdim 2631288943Sdim#define _mm512_fmadd_round_pd(A, B, C, R) __extension__ ({ \ 2632309124Sdim (__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \ 2633309124Sdim (__v8df)(__m512d)(B), \ 2634309124Sdim (__v8df)(__m512d)(C), (__mmask8)-1, \ 2635309124Sdim (int)(R)); }) 2636288943Sdim 2637288943Sdim 2638288943Sdim#define _mm512_mask_fmadd_round_pd(A, U, B, C, R) __extension__ ({ \ 2639309124Sdim (__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \ 2640309124Sdim (__v8df)(__m512d)(B), \ 2641309124Sdim (__v8df)(__m512d)(C), \ 2642309124Sdim (__mmask8)(U), (int)(R)); }) 2643288943Sdim 2644288943Sdim 2645288943Sdim#define _mm512_mask3_fmadd_round_pd(A, B, C, U, R) __extension__ ({ \ 2646309124Sdim (__m512d)__builtin_ia32_vfmaddpd512_mask3((__v8df)(__m512d)(A), \ 2647309124Sdim (__v8df)(__m512d)(B), \ 2648309124Sdim (__v8df)(__m512d)(C), \ 2649309124Sdim (__mmask8)(U), (int)(R)); }) 2650288943Sdim 2651288943Sdim 2652288943Sdim#define _mm512_maskz_fmadd_round_pd(U, A, B, C, R) __extension__ ({ \ 2653309124Sdim (__m512d)__builtin_ia32_vfmaddpd512_maskz((__v8df)(__m512d)(A), \ 2654309124Sdim (__v8df)(__m512d)(B), \ 2655309124Sdim (__v8df)(__m512d)(C), \ 2656309124Sdim (__mmask8)(U), (int)(R)); }) 2657288943Sdim 2658288943Sdim 2659288943Sdim#define _mm512_fmsub_round_pd(A, B, C, R) __extension__ ({ \ 2660309124Sdim (__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \ 2661309124Sdim (__v8df)(__m512d)(B), \ 2662309124Sdim -(__v8df)(__m512d)(C), \ 2663309124Sdim (__mmask8)-1, (int)(R)); }) 2664288943Sdim 2665288943Sdim 2666288943Sdim#define _mm512_mask_fmsub_round_pd(A, U, B, C, R) __extension__ ({ \ 2667309124Sdim (__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \ 2668309124Sdim (__v8df)(__m512d)(B), \ 2669309124Sdim -(__v8df)(__m512d)(C), \ 2670309124Sdim (__mmask8)(U), (int)(R)); }) 2671288943Sdim 2672288943Sdim 2673288943Sdim#define _mm512_maskz_fmsub_round_pd(U, A, B, C, R) __extension__ ({ \ 2674309124Sdim (__m512d)__builtin_ia32_vfmaddpd512_maskz((__v8df)(__m512d)(A), \ 2675309124Sdim (__v8df)(__m512d)(B), \ 2676309124Sdim -(__v8df)(__m512d)(C), \ 2677309124Sdim (__mmask8)(U), (int)(R)); }) 2678288943Sdim 2679288943Sdim 2680288943Sdim#define _mm512_fnmadd_round_pd(A, B, C, R) __extension__ ({ \ 2681309124Sdim (__m512d)__builtin_ia32_vfmaddpd512_mask(-(__v8df)(__m512d)(A), \ 2682309124Sdim (__v8df)(__m512d)(B), \ 2683309124Sdim (__v8df)(__m512d)(C), (__mmask8)-1, \ 2684309124Sdim (int)(R)); }) 2685288943Sdim 2686288943Sdim 2687288943Sdim#define _mm512_mask3_fnmadd_round_pd(A, B, C, U, R) __extension__ ({ \ 2688309124Sdim (__m512d)__builtin_ia32_vfmaddpd512_mask3(-(__v8df)(__m512d)(A), \ 2689309124Sdim (__v8df)(__m512d)(B), \ 2690309124Sdim (__v8df)(__m512d)(C), \ 2691309124Sdim (__mmask8)(U), (int)(R)); }) 2692288943Sdim 2693288943Sdim 2694288943Sdim#define _mm512_maskz_fnmadd_round_pd(U, A, B, C, R) __extension__ ({ \ 2695309124Sdim (__m512d)__builtin_ia32_vfmaddpd512_maskz(-(__v8df)(__m512d)(A), \ 2696309124Sdim (__v8df)(__m512d)(B), \ 2697309124Sdim (__v8df)(__m512d)(C), \ 2698309124Sdim (__mmask8)(U), (int)(R)); }) 2699288943Sdim 2700288943Sdim 2701288943Sdim#define _mm512_fnmsub_round_pd(A, B, C, R) __extension__ ({ \ 2702309124Sdim (__m512d)__builtin_ia32_vfmaddpd512_mask(-(__v8df)(__m512d)(A), \ 2703309124Sdim (__v8df)(__m512d)(B), \ 2704309124Sdim -(__v8df)(__m512d)(C), \ 2705309124Sdim (__mmask8)-1, (int)(R)); }) 2706288943Sdim 2707288943Sdim 2708288943Sdim#define _mm512_maskz_fnmsub_round_pd(U, A, B, C, R) __extension__ ({ \ 2709309124Sdim (__m512d)__builtin_ia32_vfmaddpd512_maskz(-(__v8df)(__m512d)(A), \ 2710309124Sdim (__v8df)(__m512d)(B), \ 2711309124Sdim -(__v8df)(__m512d)(C), \ 2712309124Sdim (__mmask8)(U), (int)(R)); }) 2713288943Sdim 2714288943Sdim 2715288943Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS 2716288943Sdim_mm512_fmadd_pd(__m512d __A, __m512d __B, __m512d __C) 2717277325Sdim{ 2718288943Sdim return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A, 2719288943Sdim (__v8df) __B, 2720288943Sdim (__v8df) __C, 2721288943Sdim (__mmask8) -1, 2722288943Sdim _MM_FROUND_CUR_DIRECTION); 2723277325Sdim} 2724288943Sdim 2725288943Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS 2726288943Sdim_mm512_mask_fmadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C) 2727277325Sdim{ 2728288943Sdim return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A, 2729288943Sdim (__v8df) __B, 2730288943Sdim (__v8df) __C, 2731288943Sdim (__mmask8) __U, 2732288943Sdim _MM_FROUND_CUR_DIRECTION); 2733277325Sdim} 2734277325Sdim 2735288943Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS 2736288943Sdim_mm512_mask3_fmadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U) 2737277325Sdim{ 2738288943Sdim return (__m512d) __builtin_ia32_vfmaddpd512_mask3 ((__v8df) __A, 2739288943Sdim (__v8df) __B, 2740288943Sdim (__v8df) __C, 2741288943Sdim (__mmask8) __U, 2742288943Sdim _MM_FROUND_CUR_DIRECTION); 2743277325Sdim} 2744277325Sdim 2745288943Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS 2746288943Sdim_mm512_maskz_fmadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C) 2747288943Sdim{ 2748288943Sdim return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A, 2749288943Sdim (__v8df) __B, 2750288943Sdim (__v8df) __C, 2751288943Sdim (__mmask8) __U, 2752288943Sdim _MM_FROUND_CUR_DIRECTION); 2753288943Sdim} 2754288943Sdim 2755288943Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS 2756277325Sdim_mm512_fmsub_pd(__m512d __A, __m512d __B, __m512d __C) 2757277325Sdim{ 2758288943Sdim return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A, 2759288943Sdim (__v8df) __B, 2760288943Sdim -(__v8df) __C, 2761288943Sdim (__mmask8) -1, 2762288943Sdim _MM_FROUND_CUR_DIRECTION); 2763277325Sdim} 2764277325Sdim 2765288943Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS 2766288943Sdim_mm512_mask_fmsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C) 2767288943Sdim{ 2768288943Sdim return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A, 2769288943Sdim (__v8df) __B, 2770288943Sdim -(__v8df) __C, 2771288943Sdim (__mmask8) __U, 2772288943Sdim _MM_FROUND_CUR_DIRECTION); 2773288943Sdim} 2774288943Sdim 2775288943Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS 2776288943Sdim_mm512_maskz_fmsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C) 2777288943Sdim{ 2778288943Sdim return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A, 2779288943Sdim (__v8df) __B, 2780288943Sdim -(__v8df) __C, 2781288943Sdim (__mmask8) __U, 2782288943Sdim _MM_FROUND_CUR_DIRECTION); 2783288943Sdim} 2784288943Sdim 2785288943Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS 2786277325Sdim_mm512_fnmadd_pd(__m512d __A, __m512d __B, __m512d __C) 2787277325Sdim{ 2788288943Sdim return (__m512d) __builtin_ia32_vfmaddpd512_mask (-(__v8df) __A, 2789288943Sdim (__v8df) __B, 2790288943Sdim (__v8df) __C, 2791288943Sdim (__mmask8) -1, 2792288943Sdim _MM_FROUND_CUR_DIRECTION); 2793277325Sdim} 2794277325Sdim 2795288943Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS 2796288943Sdim_mm512_mask3_fnmadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U) 2797288943Sdim{ 2798288943Sdim return (__m512d) __builtin_ia32_vfmaddpd512_mask3 (-(__v8df) __A, 2799288943Sdim (__v8df) __B, 2800288943Sdim (__v8df) __C, 2801288943Sdim (__mmask8) __U, 2802288943Sdim _MM_FROUND_CUR_DIRECTION); 2803288943Sdim} 2804288943Sdim 2805288943Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS 2806288943Sdim_mm512_maskz_fnmadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C) 2807288943Sdim{ 2808288943Sdim return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A, 2809288943Sdim (__v8df) __B, 2810288943Sdim (__v8df) __C, 2811288943Sdim (__mmask8) __U, 2812288943Sdim _MM_FROUND_CUR_DIRECTION); 2813288943Sdim} 2814288943Sdim 2815288943Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS 2816288943Sdim_mm512_fnmsub_pd(__m512d __A, __m512d __B, __m512d __C) 2817288943Sdim{ 2818288943Sdim return (__m512d) __builtin_ia32_vfmaddpd512_mask (-(__v8df) __A, 2819288943Sdim (__v8df) __B, 2820288943Sdim -(__v8df) __C, 2821288943Sdim (__mmask8) -1, 2822288943Sdim _MM_FROUND_CUR_DIRECTION); 2823288943Sdim} 2824288943Sdim 2825288943Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS 2826288943Sdim_mm512_maskz_fnmsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C) 2827288943Sdim{ 2828288943Sdim return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A, 2829288943Sdim (__v8df) __B, 2830288943Sdim -(__v8df) __C, 2831288943Sdim (__mmask8) __U, 2832288943Sdim _MM_FROUND_CUR_DIRECTION); 2833288943Sdim} 2834288943Sdim 2835288943Sdim#define _mm512_fmadd_round_ps(A, B, C, R) __extension__ ({ \ 2836309124Sdim (__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \ 2837309124Sdim (__v16sf)(__m512)(B), \ 2838309124Sdim (__v16sf)(__m512)(C), (__mmask16)-1, \ 2839309124Sdim (int)(R)); }) 2840288943Sdim 2841288943Sdim 2842288943Sdim#define _mm512_mask_fmadd_round_ps(A, U, B, C, R) __extension__ ({ \ 2843309124Sdim (__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \ 2844309124Sdim (__v16sf)(__m512)(B), \ 2845309124Sdim (__v16sf)(__m512)(C), \ 2846309124Sdim (__mmask16)(U), (int)(R)); }) 2847288943Sdim 2848288943Sdim 2849288943Sdim#define _mm512_mask3_fmadd_round_ps(A, B, C, U, R) __extension__ ({ \ 2850309124Sdim (__m512)__builtin_ia32_vfmaddps512_mask3((__v16sf)(__m512)(A), \ 2851309124Sdim (__v16sf)(__m512)(B), \ 2852309124Sdim (__v16sf)(__m512)(C), \ 2853309124Sdim (__mmask16)(U), (int)(R)); }) 2854288943Sdim 2855288943Sdim 2856288943Sdim#define _mm512_maskz_fmadd_round_ps(U, A, B, C, R) __extension__ ({ \ 2857309124Sdim (__m512)__builtin_ia32_vfmaddps512_maskz((__v16sf)(__m512)(A), \ 2858309124Sdim (__v16sf)(__m512)(B), \ 2859309124Sdim (__v16sf)(__m512)(C), \ 2860309124Sdim (__mmask16)(U), (int)(R)); }) 2861288943Sdim 2862288943Sdim 2863288943Sdim#define _mm512_fmsub_round_ps(A, B, C, R) __extension__ ({ \ 2864309124Sdim (__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \ 2865309124Sdim (__v16sf)(__m512)(B), \ 2866309124Sdim -(__v16sf)(__m512)(C), \ 2867309124Sdim (__mmask16)-1, (int)(R)); }) 2868288943Sdim 2869288943Sdim 2870288943Sdim#define _mm512_mask_fmsub_round_ps(A, U, B, C, R) __extension__ ({ \ 2871309124Sdim (__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \ 2872309124Sdim (__v16sf)(__m512)(B), \ 2873309124Sdim -(__v16sf)(__m512)(C), \ 2874309124Sdim (__mmask16)(U), (int)(R)); }) 2875288943Sdim 2876288943Sdim 2877288943Sdim#define _mm512_maskz_fmsub_round_ps(U, A, B, C, R) __extension__ ({ \ 2878309124Sdim (__m512)__builtin_ia32_vfmaddps512_maskz((__v16sf)(__m512)(A), \ 2879309124Sdim (__v16sf)(__m512)(B), \ 2880309124Sdim -(__v16sf)(__m512)(C), \ 2881309124Sdim (__mmask16)(U), (int)(R)); }) 2882288943Sdim 2883288943Sdim 2884288943Sdim#define _mm512_fnmadd_round_ps(A, B, C, R) __extension__ ({ \ 2885309124Sdim (__m512)__builtin_ia32_vfmaddps512_mask(-(__v16sf)(__m512)(A), \ 2886309124Sdim (__v16sf)(__m512)(B), \ 2887309124Sdim (__v16sf)(__m512)(C), (__mmask16)-1, \ 2888309124Sdim (int)(R)); }) 2889288943Sdim 2890288943Sdim 2891288943Sdim#define _mm512_mask3_fnmadd_round_ps(A, B, C, U, R) __extension__ ({ \ 2892309124Sdim (__m512)__builtin_ia32_vfmaddps512_mask3(-(__v16sf)(__m512)(A), \ 2893309124Sdim (__v16sf)(__m512)(B), \ 2894309124Sdim (__v16sf)(__m512)(C), \ 2895309124Sdim (__mmask16)(U), (int)(R)); }) 2896288943Sdim 2897288943Sdim 2898288943Sdim#define _mm512_maskz_fnmadd_round_ps(U, A, B, C, R) __extension__ ({ \ 2899309124Sdim (__m512)__builtin_ia32_vfmaddps512_maskz(-(__v16sf)(__m512)(A), \ 2900309124Sdim (__v16sf)(__m512)(B), \ 2901309124Sdim (__v16sf)(__m512)(C), \ 2902309124Sdim (__mmask16)(U), (int)(R)); }) 2903288943Sdim 2904288943Sdim 2905288943Sdim#define _mm512_fnmsub_round_ps(A, B, C, R) __extension__ ({ \ 2906309124Sdim (__m512)__builtin_ia32_vfmaddps512_mask(-(__v16sf)(__m512)(A), \ 2907309124Sdim (__v16sf)(__m512)(B), \ 2908309124Sdim -(__v16sf)(__m512)(C), \ 2909309124Sdim (__mmask16)-1, (int)(R)); }) 2910288943Sdim 2911288943Sdim 2912288943Sdim#define _mm512_maskz_fnmsub_round_ps(U, A, B, C, R) __extension__ ({ \ 2913309124Sdim (__m512)__builtin_ia32_vfmaddps512_maskz(-(__v16sf)(__m512)(A), \ 2914309124Sdim (__v16sf)(__m512)(B), \ 2915309124Sdim -(__v16sf)(__m512)(C), \ 2916309124Sdim (__mmask16)(U), (int)(R)); }) 2917288943Sdim 2918288943Sdim 2919288943Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS 2920277325Sdim_mm512_fmadd_ps(__m512 __A, __m512 __B, __m512 __C) 2921277325Sdim{ 2922288943Sdim return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A, 2923288943Sdim (__v16sf) __B, 2924288943Sdim (__v16sf) __C, 2925288943Sdim (__mmask16) -1, 2926288943Sdim _MM_FROUND_CUR_DIRECTION); 2927277325Sdim} 2928277325Sdim 2929288943Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS 2930288943Sdim_mm512_mask_fmadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C) 2931288943Sdim{ 2932288943Sdim return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A, 2933288943Sdim (__v16sf) __B, 2934288943Sdim (__v16sf) __C, 2935288943Sdim (__mmask16) __U, 2936288943Sdim _MM_FROUND_CUR_DIRECTION); 2937288943Sdim} 2938288943Sdim 2939288943Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS 2940288943Sdim_mm512_mask3_fmadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U) 2941288943Sdim{ 2942288943Sdim return (__m512) __builtin_ia32_vfmaddps512_mask3 ((__v16sf) __A, 2943288943Sdim (__v16sf) __B, 2944288943Sdim (__v16sf) __C, 2945288943Sdim (__mmask16) __U, 2946288943Sdim _MM_FROUND_CUR_DIRECTION); 2947288943Sdim} 2948288943Sdim 2949288943Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS 2950288943Sdim_mm512_maskz_fmadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C) 2951288943Sdim{ 2952288943Sdim return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A, 2953288943Sdim (__v16sf) __B, 2954288943Sdim (__v16sf) __C, 2955288943Sdim (__mmask16) __U, 2956288943Sdim _MM_FROUND_CUR_DIRECTION); 2957288943Sdim} 2958288943Sdim 2959288943Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS 2960277325Sdim_mm512_fmsub_ps(__m512 __A, __m512 __B, __m512 __C) 2961277325Sdim{ 2962288943Sdim return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A, 2963288943Sdim (__v16sf) __B, 2964288943Sdim -(__v16sf) __C, 2965288943Sdim (__mmask16) -1, 2966288943Sdim _MM_FROUND_CUR_DIRECTION); 2967277325Sdim} 2968277325Sdim 2969288943Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS 2970288943Sdim_mm512_mask_fmsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C) 2971288943Sdim{ 2972288943Sdim return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A, 2973288943Sdim (__v16sf) __B, 2974288943Sdim -(__v16sf) __C, 2975288943Sdim (__mmask16) __U, 2976288943Sdim _MM_FROUND_CUR_DIRECTION); 2977288943Sdim} 2978288943Sdim 2979288943Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS 2980288943Sdim_mm512_maskz_fmsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C) 2981288943Sdim{ 2982288943Sdim return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A, 2983288943Sdim (__v16sf) __B, 2984288943Sdim -(__v16sf) __C, 2985288943Sdim (__mmask16) __U, 2986288943Sdim _MM_FROUND_CUR_DIRECTION); 2987288943Sdim} 2988288943Sdim 2989288943Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS 2990277325Sdim_mm512_fnmadd_ps(__m512 __A, __m512 __B, __m512 __C) 2991277325Sdim{ 2992288943Sdim return (__m512) __builtin_ia32_vfmaddps512_mask (-(__v16sf) __A, 2993288943Sdim (__v16sf) __B, 2994288943Sdim (__v16sf) __C, 2995288943Sdim (__mmask16) -1, 2996288943Sdim _MM_FROUND_CUR_DIRECTION); 2997277325Sdim} 2998277325Sdim 2999288943Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS 3000288943Sdim_mm512_mask3_fnmadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U) 3001288943Sdim{ 3002288943Sdim return (__m512) __builtin_ia32_vfmaddps512_mask3 (-(__v16sf) __A, 3003288943Sdim (__v16sf) __B, 3004288943Sdim (__v16sf) __C, 3005288943Sdim (__mmask16) __U, 3006288943Sdim _MM_FROUND_CUR_DIRECTION); 3007288943Sdim} 3008288943Sdim 3009288943Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS 3010288943Sdim_mm512_maskz_fnmadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C) 3011288943Sdim{ 3012288943Sdim return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A, 3013288943Sdim (__v16sf) __B, 3014288943Sdim (__v16sf) __C, 3015288943Sdim (__mmask16) __U, 3016288943Sdim _MM_FROUND_CUR_DIRECTION); 3017288943Sdim} 3018288943Sdim 3019288943Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS 3020288943Sdim_mm512_fnmsub_ps(__m512 __A, __m512 __B, __m512 __C) 3021288943Sdim{ 3022288943Sdim return (__m512) __builtin_ia32_vfmaddps512_mask (-(__v16sf) __A, 3023288943Sdim (__v16sf) __B, 3024288943Sdim -(__v16sf) __C, 3025288943Sdim (__mmask16) -1, 3026288943Sdim _MM_FROUND_CUR_DIRECTION); 3027288943Sdim} 3028288943Sdim 3029288943Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS 3030288943Sdim_mm512_maskz_fnmsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C) 3031288943Sdim{ 3032288943Sdim return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A, 3033288943Sdim (__v16sf) __B, 3034288943Sdim -(__v16sf) __C, 3035288943Sdim (__mmask16) __U, 3036288943Sdim _MM_FROUND_CUR_DIRECTION); 3037288943Sdim} 3038288943Sdim 3039288943Sdim#define _mm512_fmaddsub_round_pd(A, B, C, R) __extension__ ({ \ 3040309124Sdim (__m512d)__builtin_ia32_vfmaddsubpd512_mask((__v8df)(__m512d)(A), \ 3041309124Sdim (__v8df)(__m512d)(B), \ 3042309124Sdim (__v8df)(__m512d)(C), \ 3043309124Sdim (__mmask8)-1, (int)(R)); }) 3044288943Sdim 3045288943Sdim 3046288943Sdim#define _mm512_mask_fmaddsub_round_pd(A, U, B, C, R) __extension__ ({ \ 3047309124Sdim (__m512d)__builtin_ia32_vfmaddsubpd512_mask((__v8df)(__m512d)(A), \ 3048309124Sdim (__v8df)(__m512d)(B), \ 3049309124Sdim (__v8df)(__m512d)(C), \ 3050309124Sdim (__mmask8)(U), (int)(R)); }) 3051288943Sdim 3052288943Sdim 3053288943Sdim#define _mm512_mask3_fmaddsub_round_pd(A, B, C, U, R) __extension__ ({ \ 3054309124Sdim (__m512d)__builtin_ia32_vfmaddsubpd512_mask3((__v8df)(__m512d)(A), \ 3055309124Sdim (__v8df)(__m512d)(B), \ 3056309124Sdim (__v8df)(__m512d)(C), \ 3057309124Sdim (__mmask8)(U), (int)(R)); }) 3058288943Sdim 3059288943Sdim 3060288943Sdim#define _mm512_maskz_fmaddsub_round_pd(U, A, B, C, R) __extension__ ({ \ 3061309124Sdim (__m512d)__builtin_ia32_vfmaddsubpd512_maskz((__v8df)(__m512d)(A), \ 3062309124Sdim (__v8df)(__m512d)(B), \ 3063309124Sdim (__v8df)(__m512d)(C), \ 3064309124Sdim (__mmask8)(U), (int)(R)); }) 3065288943Sdim 3066288943Sdim 3067288943Sdim#define _mm512_fmsubadd_round_pd(A, B, C, R) __extension__ ({ \ 3068309124Sdim (__m512d)__builtin_ia32_vfmaddsubpd512_mask((__v8df)(__m512d)(A), \ 3069309124Sdim (__v8df)(__m512d)(B), \ 3070309124Sdim -(__v8df)(__m512d)(C), \ 3071309124Sdim (__mmask8)-1, (int)(R)); }) 3072288943Sdim 3073288943Sdim 3074288943Sdim#define _mm512_mask_fmsubadd_round_pd(A, U, B, C, R) __extension__ ({ \ 3075309124Sdim (__m512d)__builtin_ia32_vfmaddsubpd512_mask((__v8df)(__m512d)(A), \ 3076309124Sdim (__v8df)(__m512d)(B), \ 3077309124Sdim -(__v8df)(__m512d)(C), \ 3078309124Sdim (__mmask8)(U), (int)(R)); }) 3079288943Sdim 3080288943Sdim 3081288943Sdim#define _mm512_maskz_fmsubadd_round_pd(U, A, B, C, R) __extension__ ({ \ 3082309124Sdim (__m512d)__builtin_ia32_vfmaddsubpd512_maskz((__v8df)(__m512d)(A), \ 3083309124Sdim (__v8df)(__m512d)(B), \ 3084309124Sdim -(__v8df)(__m512d)(C), \ 3085309124Sdim (__mmask8)(U), (int)(R)); }) 3086288943Sdim 3087288943Sdim 3088288943Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS 3089288943Sdim_mm512_fmaddsub_pd(__m512d __A, __m512d __B, __m512d __C) 3090288943Sdim{ 3091288943Sdim return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A, 3092288943Sdim (__v8df) __B, 3093288943Sdim (__v8df) __C, 3094288943Sdim (__mmask8) -1, 3095288943Sdim _MM_FROUND_CUR_DIRECTION); 3096288943Sdim} 3097288943Sdim 3098288943Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS 3099288943Sdim_mm512_mask_fmaddsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C) 3100288943Sdim{ 3101288943Sdim return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A, 3102288943Sdim (__v8df) __B, 3103288943Sdim (__v8df) __C, 3104288943Sdim (__mmask8) __U, 3105288943Sdim _MM_FROUND_CUR_DIRECTION); 3106288943Sdim} 3107288943Sdim 3108288943Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS 3109288943Sdim_mm512_mask3_fmaddsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U) 3110288943Sdim{ 3111288943Sdim return (__m512d) __builtin_ia32_vfmaddsubpd512_mask3 ((__v8df) __A, 3112288943Sdim (__v8df) __B, 3113288943Sdim (__v8df) __C, 3114288943Sdim (__mmask8) __U, 3115288943Sdim _MM_FROUND_CUR_DIRECTION); 3116288943Sdim} 3117288943Sdim 3118288943Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS 3119288943Sdim_mm512_maskz_fmaddsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C) 3120288943Sdim{ 3121288943Sdim return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A, 3122288943Sdim (__v8df) __B, 3123288943Sdim (__v8df) __C, 3124288943Sdim (__mmask8) __U, 3125288943Sdim _MM_FROUND_CUR_DIRECTION); 3126288943Sdim} 3127288943Sdim 3128288943Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS 3129288943Sdim_mm512_fmsubadd_pd(__m512d __A, __m512d __B, __m512d __C) 3130288943Sdim{ 3131288943Sdim return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A, 3132288943Sdim (__v8df) __B, 3133288943Sdim -(__v8df) __C, 3134288943Sdim (__mmask8) -1, 3135288943Sdim _MM_FROUND_CUR_DIRECTION); 3136288943Sdim} 3137288943Sdim 3138288943Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS 3139288943Sdim_mm512_mask_fmsubadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C) 3140288943Sdim{ 3141288943Sdim return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A, 3142288943Sdim (__v8df) __B, 3143288943Sdim -(__v8df) __C, 3144288943Sdim (__mmask8) __U, 3145288943Sdim _MM_FROUND_CUR_DIRECTION); 3146288943Sdim} 3147288943Sdim 3148288943Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS 3149288943Sdim_mm512_maskz_fmsubadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C) 3150288943Sdim{ 3151288943Sdim return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A, 3152288943Sdim (__v8df) __B, 3153288943Sdim -(__v8df) __C, 3154288943Sdim (__mmask8) __U, 3155288943Sdim _MM_FROUND_CUR_DIRECTION); 3156288943Sdim} 3157288943Sdim 3158288943Sdim#define _mm512_fmaddsub_round_ps(A, B, C, R) __extension__ ({ \ 3159309124Sdim (__m512)__builtin_ia32_vfmaddsubps512_mask((__v16sf)(__m512)(A), \ 3160309124Sdim (__v16sf)(__m512)(B), \ 3161309124Sdim (__v16sf)(__m512)(C), \ 3162309124Sdim (__mmask16)-1, (int)(R)); }) 3163288943Sdim 3164288943Sdim 3165288943Sdim#define _mm512_mask_fmaddsub_round_ps(A, U, B, C, R) __extension__ ({ \ 3166309124Sdim (__m512)__builtin_ia32_vfmaddsubps512_mask((__v16sf)(__m512)(A), \ 3167309124Sdim (__v16sf)(__m512)(B), \ 3168309124Sdim (__v16sf)(__m512)(C), \ 3169309124Sdim (__mmask16)(U), (int)(R)); }) 3170288943Sdim 3171288943Sdim 3172288943Sdim#define _mm512_mask3_fmaddsub_round_ps(A, B, C, U, R) __extension__ ({ \ 3173309124Sdim (__m512)__builtin_ia32_vfmaddsubps512_mask3((__v16sf)(__m512)(A), \ 3174309124Sdim (__v16sf)(__m512)(B), \ 3175309124Sdim (__v16sf)(__m512)(C), \ 3176309124Sdim (__mmask16)(U), (int)(R)); }) 3177288943Sdim 3178288943Sdim 3179288943Sdim#define _mm512_maskz_fmaddsub_round_ps(U, A, B, C, R) __extension__ ({ \ 3180309124Sdim (__m512)__builtin_ia32_vfmaddsubps512_maskz((__v16sf)(__m512)(A), \ 3181309124Sdim (__v16sf)(__m512)(B), \ 3182309124Sdim (__v16sf)(__m512)(C), \ 3183309124Sdim (__mmask16)(U), (int)(R)); }) 3184288943Sdim 3185288943Sdim 3186288943Sdim#define _mm512_fmsubadd_round_ps(A, B, C, R) __extension__ ({ \ 3187309124Sdim (__m512)__builtin_ia32_vfmaddsubps512_mask((__v16sf)(__m512)(A), \ 3188309124Sdim (__v16sf)(__m512)(B), \ 3189309124Sdim -(__v16sf)(__m512)(C), \ 3190309124Sdim (__mmask16)-1, (int)(R)); }) 3191288943Sdim 3192288943Sdim 3193288943Sdim#define _mm512_mask_fmsubadd_round_ps(A, U, B, C, R) __extension__ ({ \ 3194309124Sdim (__m512)__builtin_ia32_vfmaddsubps512_mask((__v16sf)(__m512)(A), \ 3195309124Sdim (__v16sf)(__m512)(B), \ 3196309124Sdim -(__v16sf)(__m512)(C), \ 3197309124Sdim (__mmask16)(U), (int)(R)); }) 3198288943Sdim 3199288943Sdim 3200288943Sdim#define _mm512_maskz_fmsubadd_round_ps(U, A, B, C, R) __extension__ ({ \ 3201309124Sdim (__m512)__builtin_ia32_vfmaddsubps512_maskz((__v16sf)(__m512)(A), \ 3202309124Sdim (__v16sf)(__m512)(B), \ 3203309124Sdim -(__v16sf)(__m512)(C), \ 3204309124Sdim (__mmask16)(U), (int)(R)); }) 3205288943Sdim 3206288943Sdim 3207288943Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS 3208288943Sdim_mm512_fmaddsub_ps(__m512 __A, __m512 __B, __m512 __C) 3209288943Sdim{ 3210288943Sdim return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A, 3211288943Sdim (__v16sf) __B, 3212288943Sdim (__v16sf) __C, 3213288943Sdim (__mmask16) -1, 3214288943Sdim _MM_FROUND_CUR_DIRECTION); 3215288943Sdim} 3216288943Sdim 3217288943Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS 3218288943Sdim_mm512_mask_fmaddsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C) 3219288943Sdim{ 3220288943Sdim return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A, 3221288943Sdim (__v16sf) __B, 3222288943Sdim (__v16sf) __C, 3223288943Sdim (__mmask16) __U, 3224288943Sdim _MM_FROUND_CUR_DIRECTION); 3225288943Sdim} 3226288943Sdim 3227288943Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS 3228288943Sdim_mm512_mask3_fmaddsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U) 3229288943Sdim{ 3230288943Sdim return (__m512) __builtin_ia32_vfmaddsubps512_mask3 ((__v16sf) __A, 3231288943Sdim (__v16sf) __B, 3232288943Sdim (__v16sf) __C, 3233288943Sdim (__mmask16) __U, 3234288943Sdim _MM_FROUND_CUR_DIRECTION); 3235288943Sdim} 3236288943Sdim 3237288943Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS 3238288943Sdim_mm512_maskz_fmaddsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C) 3239288943Sdim{ 3240288943Sdim return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A, 3241288943Sdim (__v16sf) __B, 3242288943Sdim (__v16sf) __C, 3243288943Sdim (__mmask16) __U, 3244288943Sdim _MM_FROUND_CUR_DIRECTION); 3245288943Sdim} 3246288943Sdim 3247288943Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS 3248288943Sdim_mm512_fmsubadd_ps(__m512 __A, __m512 __B, __m512 __C) 3249288943Sdim{ 3250288943Sdim return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A, 3251288943Sdim (__v16sf) __B, 3252288943Sdim -(__v16sf) __C, 3253288943Sdim (__mmask16) -1, 3254288943Sdim _MM_FROUND_CUR_DIRECTION); 3255288943Sdim} 3256288943Sdim 3257288943Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS 3258288943Sdim_mm512_mask_fmsubadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C) 3259288943Sdim{ 3260288943Sdim return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A, 3261288943Sdim (__v16sf) __B, 3262288943Sdim -(__v16sf) __C, 3263288943Sdim (__mmask16) __U, 3264288943Sdim _MM_FROUND_CUR_DIRECTION); 3265288943Sdim} 3266288943Sdim 3267288943Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS 3268288943Sdim_mm512_maskz_fmsubadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C) 3269288943Sdim{ 3270288943Sdim return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A, 3271288943Sdim (__v16sf) __B, 3272288943Sdim -(__v16sf) __C, 3273288943Sdim (__mmask16) __U, 3274288943Sdim _MM_FROUND_CUR_DIRECTION); 3275288943Sdim} 3276288943Sdim 3277288943Sdim#define _mm512_mask3_fmsub_round_pd(A, B, C, U, R) __extension__ ({ \ 3278309124Sdim (__m512d)__builtin_ia32_vfmsubpd512_mask3((__v8df)(__m512d)(A), \ 3279309124Sdim (__v8df)(__m512d)(B), \ 3280309124Sdim (__v8df)(__m512d)(C), \ 3281309124Sdim (__mmask8)(U), (int)(R)); }) 3282288943Sdim 3283288943Sdim 3284288943Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS 3285288943Sdim_mm512_mask3_fmsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U) 3286288943Sdim{ 3287288943Sdim return (__m512d) __builtin_ia32_vfmsubpd512_mask3 ((__v8df) __A, 3288288943Sdim (__v8df) __B, 3289288943Sdim (__v8df) __C, 3290288943Sdim (__mmask8) __U, 3291288943Sdim _MM_FROUND_CUR_DIRECTION); 3292288943Sdim} 3293288943Sdim 3294288943Sdim#define _mm512_mask3_fmsub_round_ps(A, B, C, U, R) __extension__ ({ \ 3295309124Sdim (__m512)__builtin_ia32_vfmsubps512_mask3((__v16sf)(__m512)(A), \ 3296309124Sdim (__v16sf)(__m512)(B), \ 3297309124Sdim (__v16sf)(__m512)(C), \ 3298309124Sdim (__mmask16)(U), (int)(R)); }) 3299288943Sdim 3300288943Sdim 3301288943Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS 3302288943Sdim_mm512_mask3_fmsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U) 3303288943Sdim{ 3304288943Sdim return (__m512) __builtin_ia32_vfmsubps512_mask3 ((__v16sf) __A, 3305288943Sdim (__v16sf) __B, 3306288943Sdim (__v16sf) __C, 3307288943Sdim (__mmask16) __U, 3308288943Sdim _MM_FROUND_CUR_DIRECTION); 3309288943Sdim} 3310288943Sdim 3311288943Sdim#define _mm512_mask3_fmsubadd_round_pd(A, B, C, U, R) __extension__ ({ \ 3312309124Sdim (__m512d)__builtin_ia32_vfmsubaddpd512_mask3((__v8df)(__m512d)(A), \ 3313309124Sdim (__v8df)(__m512d)(B), \ 3314309124Sdim (__v8df)(__m512d)(C), \ 3315309124Sdim (__mmask8)(U), (int)(R)); }) 3316288943Sdim 3317288943Sdim 3318288943Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS 3319288943Sdim_mm512_mask3_fmsubadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U) 3320288943Sdim{ 3321288943Sdim return (__m512d) __builtin_ia32_vfmsubaddpd512_mask3 ((__v8df) __A, 3322288943Sdim (__v8df) __B, 3323288943Sdim (__v8df) __C, 3324288943Sdim (__mmask8) __U, 3325288943Sdim _MM_FROUND_CUR_DIRECTION); 3326288943Sdim} 3327288943Sdim 3328288943Sdim#define _mm512_mask3_fmsubadd_round_ps(A, B, C, U, R) __extension__ ({ \ 3329309124Sdim (__m512)__builtin_ia32_vfmsubaddps512_mask3((__v16sf)(__m512)(A), \ 3330309124Sdim (__v16sf)(__m512)(B), \ 3331309124Sdim (__v16sf)(__m512)(C), \ 3332309124Sdim (__mmask16)(U), (int)(R)); }) 3333288943Sdim 3334288943Sdim 3335288943Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS 3336288943Sdim_mm512_mask3_fmsubadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U) 3337288943Sdim{ 3338288943Sdim return (__m512) __builtin_ia32_vfmsubaddps512_mask3 ((__v16sf) __A, 3339288943Sdim (__v16sf) __B, 3340288943Sdim (__v16sf) __C, 3341288943Sdim (__mmask16) __U, 3342288943Sdim _MM_FROUND_CUR_DIRECTION); 3343288943Sdim} 3344288943Sdim 3345288943Sdim#define _mm512_mask_fnmadd_round_pd(A, U, B, C, R) __extension__ ({ \ 3346309124Sdim (__m512d)__builtin_ia32_vfnmaddpd512_mask((__v8df)(__m512d)(A), \ 3347309124Sdim (__v8df)(__m512d)(B), \ 3348309124Sdim (__v8df)(__m512d)(C), \ 3349309124Sdim (__mmask8)(U), (int)(R)); }) 3350288943Sdim 3351288943Sdim 3352288943Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS 3353288943Sdim_mm512_mask_fnmadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C) 3354288943Sdim{ 3355288943Sdim return (__m512d) __builtin_ia32_vfnmaddpd512_mask ((__v8df) __A, 3356288943Sdim (__v8df) __B, 3357288943Sdim (__v8df) __C, 3358288943Sdim (__mmask8) __U, 3359288943Sdim _MM_FROUND_CUR_DIRECTION); 3360288943Sdim} 3361288943Sdim 3362288943Sdim#define _mm512_mask_fnmadd_round_ps(A, U, B, C, R) __extension__ ({ \ 3363309124Sdim (__m512)__builtin_ia32_vfnmaddps512_mask((__v16sf)(__m512)(A), \ 3364309124Sdim (__v16sf)(__m512)(B), \ 3365309124Sdim (__v16sf)(__m512)(C), \ 3366309124Sdim (__mmask16)(U), (int)(R)); }) 3367288943Sdim 3368288943Sdim 3369288943Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS 3370288943Sdim_mm512_mask_fnmadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C) 3371288943Sdim{ 3372288943Sdim return (__m512) __builtin_ia32_vfnmaddps512_mask ((__v16sf) __A, 3373288943Sdim (__v16sf) __B, 3374288943Sdim (__v16sf) __C, 3375288943Sdim (__mmask16) __U, 3376288943Sdim _MM_FROUND_CUR_DIRECTION); 3377288943Sdim} 3378288943Sdim 3379288943Sdim#define _mm512_mask_fnmsub_round_pd(A, U, B, C, R) __extension__ ({ \ 3380309124Sdim (__m512d)__builtin_ia32_vfnmsubpd512_mask((__v8df)(__m512d)(A), \ 3381309124Sdim (__v8df)(__m512d)(B), \ 3382309124Sdim (__v8df)(__m512d)(C), \ 3383309124Sdim (__mmask8)(U), (int)(R)); }) 3384288943Sdim 3385288943Sdim 3386288943Sdim#define _mm512_mask3_fnmsub_round_pd(A, B, C, U, R) __extension__ ({ \ 3387309124Sdim (__m512d)__builtin_ia32_vfnmsubpd512_mask3((__v8df)(__m512d)(A), \ 3388309124Sdim (__v8df)(__m512d)(B), \ 3389309124Sdim (__v8df)(__m512d)(C), \ 3390309124Sdim (__mmask8)(U), (int)(R)); }) 3391288943Sdim 3392288943Sdim 3393288943Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS 3394288943Sdim_mm512_mask_fnmsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C) 3395288943Sdim{ 3396288943Sdim return (__m512d) __builtin_ia32_vfnmsubpd512_mask ((__v8df) __A, 3397288943Sdim (__v8df) __B, 3398288943Sdim (__v8df) __C, 3399288943Sdim (__mmask8) __U, 3400288943Sdim _MM_FROUND_CUR_DIRECTION); 3401288943Sdim} 3402288943Sdim 3403288943Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS 3404288943Sdim_mm512_mask3_fnmsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U) 3405288943Sdim{ 3406288943Sdim return (__m512d) __builtin_ia32_vfnmsubpd512_mask3 ((__v8df) __A, 3407288943Sdim (__v8df) __B, 3408288943Sdim (__v8df) __C, 3409288943Sdim (__mmask8) __U, 3410288943Sdim _MM_FROUND_CUR_DIRECTION); 3411288943Sdim} 3412288943Sdim 3413288943Sdim#define _mm512_mask_fnmsub_round_ps(A, U, B, C, R) __extension__ ({ \ 3414309124Sdim (__m512)__builtin_ia32_vfnmsubps512_mask((__v16sf)(__m512)(A), \ 3415309124Sdim (__v16sf)(__m512)(B), \ 3416309124Sdim (__v16sf)(__m512)(C), \ 3417309124Sdim (__mmask16)(U), (int)(R)); }) 3418288943Sdim 3419288943Sdim 3420288943Sdim#define _mm512_mask3_fnmsub_round_ps(A, B, C, U, R) __extension__ ({ \ 3421309124Sdim (__m512)__builtin_ia32_vfnmsubps512_mask3((__v16sf)(__m512)(A), \ 3422309124Sdim (__v16sf)(__m512)(B), \ 3423309124Sdim (__v16sf)(__m512)(C), \ 3424309124Sdim (__mmask16)(U), (int)(R)); }) 3425288943Sdim 3426288943Sdim 3427288943Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS 3428288943Sdim_mm512_mask_fnmsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C) 3429288943Sdim{ 3430288943Sdim return (__m512) __builtin_ia32_vfnmsubps512_mask ((__v16sf) __A, 3431288943Sdim (__v16sf) __B, 3432288943Sdim (__v16sf) __C, 3433288943Sdim (__mmask16) __U, 3434288943Sdim _MM_FROUND_CUR_DIRECTION); 3435288943Sdim} 3436288943Sdim 3437288943Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS 3438288943Sdim_mm512_mask3_fnmsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U) 3439288943Sdim{ 3440288943Sdim return (__m512) __builtin_ia32_vfnmsubps512_mask3 ((__v16sf) __A, 3441288943Sdim (__v16sf) __B, 3442288943Sdim (__v16sf) __C, 3443288943Sdim (__mmask16) __U, 3444288943Sdim _MM_FROUND_CUR_DIRECTION); 3445288943Sdim} 3446288943Sdim 3447288943Sdim 3448288943Sdim 3449277325Sdim/* Vector permutations */ 3450277325Sdim 3451288943Sdimstatic __inline __m512i __DEFAULT_FN_ATTRS 3452277325Sdim_mm512_permutex2var_epi32(__m512i __A, __m512i __I, __m512i __B) 3453277325Sdim{ 3454277325Sdim return (__m512i) __builtin_ia32_vpermt2vard512_mask ((__v16si) __I 3455277325Sdim /* idx */ , 3456277325Sdim (__v16si) __A, 3457277325Sdim (__v16si) __B, 3458277325Sdim (__mmask16) -1); 3459277325Sdim} 3460309124Sdim 3461309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 3462309124Sdim_mm512_mask_permutex2var_epi32 (__m512i __A, __mmask16 __U, 3463309124Sdim __m512i __I, __m512i __B) 3464309124Sdim{ 3465309124Sdim return (__m512i) __builtin_ia32_vpermt2vard512_mask ((__v16si) __I 3466309124Sdim /* idx */ , 3467309124Sdim (__v16si) __A, 3468309124Sdim (__v16si) __B, 3469309124Sdim (__mmask16) __U); 3470309124Sdim} 3471309124Sdim 3472309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 3473309124Sdim_mm512_maskz_permutex2var_epi32 (__mmask16 __U, __m512i __A, 3474309124Sdim __m512i __I, __m512i __B) 3475309124Sdim{ 3476309124Sdim return (__m512i) __builtin_ia32_vpermt2vard512_maskz ((__v16si) __I 3477309124Sdim /* idx */ , 3478309124Sdim (__v16si) __A, 3479309124Sdim (__v16si) __B, 3480309124Sdim (__mmask16) __U); 3481309124Sdim} 3482309124Sdim 3483288943Sdimstatic __inline __m512i __DEFAULT_FN_ATTRS 3484277325Sdim_mm512_permutex2var_epi64(__m512i __A, __m512i __I, __m512i __B) 3485277325Sdim{ 3486277325Sdim return (__m512i) __builtin_ia32_vpermt2varq512_mask ((__v8di) __I 3487277325Sdim /* idx */ , 3488277325Sdim (__v8di) __A, 3489277325Sdim (__v8di) __B, 3490277325Sdim (__mmask8) -1); 3491277325Sdim} 3492277325Sdim 3493309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 3494309124Sdim_mm512_mask_permutex2var_epi64 (__m512i __A, __mmask8 __U, __m512i __I, 3495309124Sdim __m512i __B) 3496277325Sdim{ 3497309124Sdim return (__m512i) __builtin_ia32_vpermt2varq512_mask ((__v8di) __I 3498309124Sdim /* idx */ , 3499309124Sdim (__v8di) __A, 3500309124Sdim (__v8di) __B, 3501309124Sdim (__mmask8) __U); 3502277325Sdim} 3503309124Sdim 3504309124Sdim 3505309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 3506309124Sdim_mm512_maskz_permutex2var_epi64 (__mmask8 __U, __m512i __A, 3507309124Sdim __m512i __I, __m512i __B) 3508277325Sdim{ 3509309124Sdim return (__m512i) __builtin_ia32_vpermt2varq512_maskz ((__v8di) __I 3510309124Sdim /* idx */ , 3511309124Sdim (__v8di) __A, 3512309124Sdim (__v8di) __B, 3513309124Sdim (__mmask8) __U); 3514277325Sdim} 3515277325Sdim 3516288943Sdim#define _mm512_alignr_epi64(A, B, I) __extension__ ({ \ 3517314564Sdim (__m512i)__builtin_shufflevector((__v8di)(__m512i)(B), \ 3518314564Sdim (__v8di)(__m512i)(A), \ 3519314564Sdim ((int)(I) & 0x7) + 0, \ 3520314564Sdim ((int)(I) & 0x7) + 1, \ 3521314564Sdim ((int)(I) & 0x7) + 2, \ 3522314564Sdim ((int)(I) & 0x7) + 3, \ 3523314564Sdim ((int)(I) & 0x7) + 4, \ 3524314564Sdim ((int)(I) & 0x7) + 5, \ 3525314564Sdim ((int)(I) & 0x7) + 6, \ 3526314564Sdim ((int)(I) & 0x7) + 7); }) 3527277325Sdim 3528309124Sdim#define _mm512_mask_alignr_epi64(W, U, A, B, imm) __extension__({\ 3529314564Sdim (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \ 3530314564Sdim (__v8di)_mm512_alignr_epi64((A), (B), (imm)), \ 3531314564Sdim (__v8di)(__m512i)(W)); }) 3532309124Sdim 3533309124Sdim#define _mm512_maskz_alignr_epi64(U, A, B, imm) __extension__({\ 3534314564Sdim (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \ 3535314564Sdim (__v8di)_mm512_alignr_epi64((A), (B), (imm)), \ 3536314564Sdim (__v8di)_mm512_setzero_si512()); }) 3537309124Sdim 3538288943Sdim#define _mm512_alignr_epi32(A, B, I) __extension__ ({ \ 3539314564Sdim (__m512i)__builtin_shufflevector((__v16si)(__m512i)(B), \ 3540314564Sdim (__v16si)(__m512i)(A), \ 3541314564Sdim ((int)(I) & 0xf) + 0, \ 3542314564Sdim ((int)(I) & 0xf) + 1, \ 3543314564Sdim ((int)(I) & 0xf) + 2, \ 3544314564Sdim ((int)(I) & 0xf) + 3, \ 3545314564Sdim ((int)(I) & 0xf) + 4, \ 3546314564Sdim ((int)(I) & 0xf) + 5, \ 3547314564Sdim ((int)(I) & 0xf) + 6, \ 3548314564Sdim ((int)(I) & 0xf) + 7, \ 3549314564Sdim ((int)(I) & 0xf) + 8, \ 3550314564Sdim ((int)(I) & 0xf) + 9, \ 3551314564Sdim ((int)(I) & 0xf) + 10, \ 3552314564Sdim ((int)(I) & 0xf) + 11, \ 3553314564Sdim ((int)(I) & 0xf) + 12, \ 3554314564Sdim ((int)(I) & 0xf) + 13, \ 3555314564Sdim ((int)(I) & 0xf) + 14, \ 3556314564Sdim ((int)(I) & 0xf) + 15); }) 3557277325Sdim 3558309124Sdim#define _mm512_mask_alignr_epi32(W, U, A, B, imm) __extension__ ({\ 3559314564Sdim (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \ 3560314564Sdim (__v16si)_mm512_alignr_epi32((A), (B), (imm)), \ 3561314564Sdim (__v16si)(__m512i)(W)); }) 3562309124Sdim 3563309124Sdim#define _mm512_maskz_alignr_epi32(U, A, B, imm) __extension__({\ 3564314564Sdim (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \ 3565314564Sdim (__v16si)_mm512_alignr_epi32((A), (B), (imm)), \ 3566314564Sdim (__v16si)_mm512_setzero_si512()); }) 3567288943Sdim/* Vector Extract */ 3568288943Sdim 3569314564Sdim#define _mm512_extractf64x4_pd(A, I) __extension__ ({ \ 3570314564Sdim (__m256d)__builtin_shufflevector((__v8df)(__m512d)(A), \ 3571314564Sdim (__v8df)_mm512_undefined_pd(), \ 3572314564Sdim ((I) & 1) ? 4 : 0, \ 3573314564Sdim ((I) & 1) ? 5 : 1, \ 3574314564Sdim ((I) & 1) ? 6 : 2, \ 3575314564Sdim ((I) & 1) ? 7 : 3); }) 3576288943Sdim 3577309124Sdim#define _mm512_mask_extractf64x4_pd(W, U, A, imm) __extension__ ({\ 3578314564Sdim (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \ 3579314564Sdim (__v4df)_mm512_extractf64x4_pd((A), (imm)), \ 3580314564Sdim (__v4df)(W)); }) 3581309124Sdim 3582309124Sdim#define _mm512_maskz_extractf64x4_pd(U, A, imm) __extension__ ({\ 3583314564Sdim (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \ 3584314564Sdim (__v4df)_mm512_extractf64x4_pd((A), (imm)), \ 3585314564Sdim (__v4df)_mm256_setzero_pd()); }) 3586309124Sdim 3587314564Sdim#define _mm512_extractf32x4_ps(A, I) __extension__ ({ \ 3588314564Sdim (__m128)__builtin_shufflevector((__v16sf)(__m512)(A), \ 3589314564Sdim (__v16sf)_mm512_undefined_ps(), \ 3590314564Sdim 0 + ((I) & 0x3) * 4, \ 3591314564Sdim 1 + ((I) & 0x3) * 4, \ 3592314564Sdim 2 + ((I) & 0x3) * 4, \ 3593314564Sdim 3 + ((I) & 0x3) * 4); }) 3594288943Sdim 3595309124Sdim#define _mm512_mask_extractf32x4_ps(W, U, A, imm) __extension__ ({\ 3596314564Sdim (__m128)__builtin_ia32_selectps_128((__mmask8)(U), \ 3597314564Sdim (__v4sf)_mm512_extractf32x4_ps((A), (imm)), \ 3598314564Sdim (__v4sf)(W)); }) 3599309124Sdim 3600309124Sdim#define _mm512_maskz_extractf32x4_ps(U, A, imm) __extension__ ({\ 3601314564Sdim (__m128)__builtin_ia32_selectps_128((__mmask8)(U), \ 3602314564Sdim (__v4sf)_mm512_extractf32x4_ps((A), (imm)), \ 3603314564Sdim (__v4sf)_mm_setzero_ps()); }) 3604314564Sdim 3605277325Sdim/* Vector Blend */ 3606277325Sdim 3607288943Sdimstatic __inline __m512d __DEFAULT_FN_ATTRS 3608277325Sdim_mm512_mask_blend_pd(__mmask8 __U, __m512d __A, __m512d __W) 3609277325Sdim{ 3610309124Sdim return (__m512d) __builtin_ia32_selectpd_512 ((__mmask8) __U, 3611277325Sdim (__v8df) __W, 3612309124Sdim (__v8df) __A); 3613277325Sdim} 3614277325Sdim 3615288943Sdimstatic __inline __m512 __DEFAULT_FN_ATTRS 3616277325Sdim_mm512_mask_blend_ps(__mmask16 __U, __m512 __A, __m512 __W) 3617277325Sdim{ 3618309124Sdim return (__m512) __builtin_ia32_selectps_512 ((__mmask16) __U, 3619277325Sdim (__v16sf) __W, 3620309124Sdim (__v16sf) __A); 3621277325Sdim} 3622277325Sdim 3623288943Sdimstatic __inline __m512i __DEFAULT_FN_ATTRS 3624277325Sdim_mm512_mask_blend_epi64(__mmask8 __U, __m512i __A, __m512i __W) 3625277325Sdim{ 3626309124Sdim return (__m512i) __builtin_ia32_selectq_512 ((__mmask8) __U, 3627277325Sdim (__v8di) __W, 3628309124Sdim (__v8di) __A); 3629277325Sdim} 3630277325Sdim 3631288943Sdimstatic __inline __m512i __DEFAULT_FN_ATTRS 3632277325Sdim_mm512_mask_blend_epi32(__mmask16 __U, __m512i __A, __m512i __W) 3633277325Sdim{ 3634309124Sdim return (__m512i) __builtin_ia32_selectd_512 ((__mmask16) __U, 3635277325Sdim (__v16si) __W, 3636309124Sdim (__v16si) __A); 3637277325Sdim} 3638277325Sdim 3639277325Sdim/* Compare */ 3640277325Sdim 3641288943Sdim#define _mm512_cmp_round_ps_mask(A, B, P, R) __extension__ ({ \ 3642288943Sdim (__mmask16)__builtin_ia32_cmpps512_mask((__v16sf)(__m512)(A), \ 3643309124Sdim (__v16sf)(__m512)(B), (int)(P), \ 3644309124Sdim (__mmask16)-1, (int)(R)); }) 3645277325Sdim 3646288943Sdim#define _mm512_mask_cmp_round_ps_mask(U, A, B, P, R) __extension__ ({ \ 3647288943Sdim (__mmask16)__builtin_ia32_cmpps512_mask((__v16sf)(__m512)(A), \ 3648309124Sdim (__v16sf)(__m512)(B), (int)(P), \ 3649309124Sdim (__mmask16)(U), (int)(R)); }) 3650277325Sdim 3651288943Sdim#define _mm512_cmp_ps_mask(A, B, P) \ 3652288943Sdim _mm512_cmp_round_ps_mask((A), (B), (P), _MM_FROUND_CUR_DIRECTION) 3653288943Sdim#define _mm512_mask_cmp_ps_mask(U, A, B, P) \ 3654288943Sdim _mm512_mask_cmp_round_ps_mask((U), (A), (B), (P), _MM_FROUND_CUR_DIRECTION) 3655288943Sdim 3656314564Sdim#define _mm512_cmpeq_ps_mask(A, B) \ 3657314564Sdim _mm512_cmp_ps_mask((A), (B), _CMP_EQ_OQ) 3658314564Sdim#define _mm512_mask_cmpeq_ps_mask(k, A, B) \ 3659314564Sdim _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_EQ_OQ) 3660314564Sdim 3661314564Sdim#define _mm512_cmplt_ps_mask(A, B) \ 3662314564Sdim _mm512_cmp_ps_mask((A), (B), _CMP_LT_OS) 3663314564Sdim#define _mm512_mask_cmplt_ps_mask(k, A, B) \ 3664314564Sdim _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_LT_OS) 3665314564Sdim 3666314564Sdim#define _mm512_cmple_ps_mask(A, B) \ 3667314564Sdim _mm512_cmp_ps_mask((A), (B), _CMP_LE_OS) 3668314564Sdim#define _mm512_mask_cmple_ps_mask(k, A, B) \ 3669314564Sdim _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_LE_OS) 3670314564Sdim 3671314564Sdim#define _mm512_cmpunord_ps_mask(A, B) \ 3672314564Sdim _mm512_cmp_ps_mask((A), (B), _CMP_UNORD_Q) 3673314564Sdim#define _mm512_mask_cmpunord_ps_mask(k, A, B) \ 3674314564Sdim _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_UNORD_Q) 3675314564Sdim 3676314564Sdim#define _mm512_cmpneq_ps_mask(A, B) \ 3677314564Sdim _mm512_cmp_ps_mask((A), (B), _CMP_NEQ_UQ) 3678314564Sdim#define _mm512_mask_cmpneq_ps_mask(k, A, B) \ 3679314564Sdim _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_NEQ_UQ) 3680314564Sdim 3681314564Sdim#define _mm512_cmpnlt_ps_mask(A, B) \ 3682314564Sdim _mm512_cmp_ps_mask((A), (B), _CMP_NLT_US) 3683314564Sdim#define _mm512_mask_cmpnlt_ps_mask(k, A, B) \ 3684314564Sdim _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_NLT_US) 3685314564Sdim 3686314564Sdim#define _mm512_cmpnle_ps_mask(A, B) \ 3687314564Sdim _mm512_cmp_ps_mask((A), (B), _CMP_NLE_US) 3688314564Sdim#define _mm512_mask_cmpnle_ps_mask(k, A, B) \ 3689314564Sdim _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_NLE_US) 3690314564Sdim 3691314564Sdim#define _mm512_cmpord_ps_mask(A, B) \ 3692314564Sdim _mm512_cmp_ps_mask((A), (B), _CMP_ORD_Q) 3693314564Sdim#define _mm512_mask_cmpord_ps_mask(k, A, B) \ 3694314564Sdim _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_ORD_Q) 3695314564Sdim 3696288943Sdim#define _mm512_cmp_round_pd_mask(A, B, P, R) __extension__ ({ \ 3697288943Sdim (__mmask8)__builtin_ia32_cmppd512_mask((__v8df)(__m512d)(A), \ 3698309124Sdim (__v8df)(__m512d)(B), (int)(P), \ 3699309124Sdim (__mmask8)-1, (int)(R)); }) 3700288943Sdim 3701288943Sdim#define _mm512_mask_cmp_round_pd_mask(U, A, B, P, R) __extension__ ({ \ 3702288943Sdim (__mmask8)__builtin_ia32_cmppd512_mask((__v8df)(__m512d)(A), \ 3703309124Sdim (__v8df)(__m512d)(B), (int)(P), \ 3704309124Sdim (__mmask8)(U), (int)(R)); }) 3705288943Sdim 3706288943Sdim#define _mm512_cmp_pd_mask(A, B, P) \ 3707288943Sdim _mm512_cmp_round_pd_mask((A), (B), (P), _MM_FROUND_CUR_DIRECTION) 3708288943Sdim#define _mm512_mask_cmp_pd_mask(U, A, B, P) \ 3709288943Sdim _mm512_mask_cmp_round_pd_mask((U), (A), (B), (P), _MM_FROUND_CUR_DIRECTION) 3710288943Sdim 3711314564Sdim#define _mm512_cmpeq_pd_mask(A, B) \ 3712314564Sdim _mm512_cmp_pd_mask((A), (B), _CMP_EQ_OQ) 3713314564Sdim#define _mm512_mask_cmpeq_pd_mask(k, A, B) \ 3714314564Sdim _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_EQ_OQ) 3715314564Sdim 3716314564Sdim#define _mm512_cmplt_pd_mask(A, B) \ 3717314564Sdim _mm512_cmp_pd_mask((A), (B), _CMP_LT_OS) 3718314564Sdim#define _mm512_mask_cmplt_pd_mask(k, A, B) \ 3719314564Sdim _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_LT_OS) 3720314564Sdim 3721314564Sdim#define _mm512_cmple_pd_mask(A, B) \ 3722314564Sdim _mm512_cmp_pd_mask((A), (B), _CMP_LE_OS) 3723314564Sdim#define _mm512_mask_cmple_pd_mask(k, A, B) \ 3724314564Sdim _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_LE_OS) 3725314564Sdim 3726314564Sdim#define _mm512_cmpunord_pd_mask(A, B) \ 3727314564Sdim _mm512_cmp_pd_mask((A), (B), _CMP_UNORD_Q) 3728314564Sdim#define _mm512_mask_cmpunord_pd_mask(k, A, B) \ 3729314564Sdim _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_UNORD_Q) 3730314564Sdim 3731314564Sdim#define _mm512_cmpneq_pd_mask(A, B) \ 3732314564Sdim _mm512_cmp_pd_mask((A), (B), _CMP_NEQ_UQ) 3733314564Sdim#define _mm512_mask_cmpneq_pd_mask(k, A, B) \ 3734314564Sdim _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_NEQ_UQ) 3735314564Sdim 3736314564Sdim#define _mm512_cmpnlt_pd_mask(A, B) \ 3737314564Sdim _mm512_cmp_pd_mask((A), (B), _CMP_NLT_US) 3738314564Sdim#define _mm512_mask_cmpnlt_pd_mask(k, A, B) \ 3739314564Sdim _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_NLT_US) 3740314564Sdim 3741314564Sdim#define _mm512_cmpnle_pd_mask(A, B) \ 3742314564Sdim _mm512_cmp_pd_mask((A), (B), _CMP_NLE_US) 3743314564Sdim#define _mm512_mask_cmpnle_pd_mask(k, A, B) \ 3744314564Sdim _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_NLE_US) 3745314564Sdim 3746314564Sdim#define _mm512_cmpord_pd_mask(A, B) \ 3747314564Sdim _mm512_cmp_pd_mask((A), (B), _CMP_ORD_Q) 3748314564Sdim#define _mm512_mask_cmpord_pd_mask(k, A, B) \ 3749314564Sdim _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_ORD_Q) 3750314564Sdim 3751277325Sdim/* Conversion */ 3752277325Sdim 3753309124Sdim#define _mm512_cvtt_roundps_epu32(A, R) __extension__ ({ \ 3754309124Sdim (__m512i)__builtin_ia32_cvttps2udq512_mask((__v16sf)(__m512)(A), \ 3755309124Sdim (__v16si)_mm512_undefined_epi32(), \ 3756309124Sdim (__mmask16)-1, (int)(R)); }) 3757309124Sdim 3758309124Sdim#define _mm512_mask_cvtt_roundps_epu32(W, U, A, R) __extension__ ({ \ 3759309124Sdim (__m512i)__builtin_ia32_cvttps2udq512_mask((__v16sf)(__m512)(A), \ 3760309124Sdim (__v16si)(__m512i)(W), \ 3761309124Sdim (__mmask16)(U), (int)(R)); }) 3762309124Sdim 3763309124Sdim#define _mm512_maskz_cvtt_roundps_epu32(U, A, R) __extension__ ({ \ 3764309124Sdim (__m512i)__builtin_ia32_cvttps2udq512_mask((__v16sf)(__m512)(A), \ 3765309124Sdim (__v16si)_mm512_setzero_si512(), \ 3766309124Sdim (__mmask16)(U), (int)(R)); }) 3767309124Sdim 3768309124Sdim 3769288943Sdimstatic __inline __m512i __DEFAULT_FN_ATTRS 3770277325Sdim_mm512_cvttps_epu32(__m512 __A) 3771277325Sdim{ 3772277325Sdim return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A, 3773277325Sdim (__v16si) 3774277325Sdim _mm512_setzero_si512 (), 3775277325Sdim (__mmask16) -1, 3776277325Sdim _MM_FROUND_CUR_DIRECTION); 3777277325Sdim} 3778277325Sdim 3779309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 3780309124Sdim_mm512_mask_cvttps_epu32 (__m512i __W, __mmask16 __U, __m512 __A) 3781309124Sdim{ 3782309124Sdim return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A, 3783309124Sdim (__v16si) __W, 3784309124Sdim (__mmask16) __U, 3785309124Sdim _MM_FROUND_CUR_DIRECTION); 3786309124Sdim} 3787309124Sdim 3788309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 3789309124Sdim_mm512_maskz_cvttps_epu32 (__mmask16 __U, __m512 __A) 3790309124Sdim{ 3791309124Sdim return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A, 3792309124Sdim (__v16si) _mm512_setzero_si512 (), 3793309124Sdim (__mmask16) __U, 3794309124Sdim _MM_FROUND_CUR_DIRECTION); 3795309124Sdim} 3796309124Sdim 3797288943Sdim#define _mm512_cvt_roundepi32_ps(A, R) __extension__ ({ \ 3798309124Sdim (__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(__m512i)(A), \ 3799288943Sdim (__v16sf)_mm512_setzero_ps(), \ 3800309124Sdim (__mmask16)-1, (int)(R)); }) 3801277325Sdim 3802309124Sdim#define _mm512_mask_cvt_roundepi32_ps(W, U, A, R) __extension__ ({ \ 3803309124Sdim (__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(__m512i)(A), \ 3804309124Sdim (__v16sf)(__m512)(W), \ 3805309124Sdim (__mmask16)(U), (int)(R)); }) 3806309124Sdim 3807309124Sdim#define _mm512_maskz_cvt_roundepi32_ps(U, A, R) __extension__ ({ \ 3808309124Sdim (__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(__m512i)(A), \ 3809309124Sdim (__v16sf)_mm512_setzero_ps(), \ 3810309124Sdim (__mmask16)(U), (int)(R)); }) 3811309124Sdim 3812288943Sdim#define _mm512_cvt_roundepu32_ps(A, R) __extension__ ({ \ 3813309124Sdim (__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(__m512i)(A), \ 3814288943Sdim (__v16sf)_mm512_setzero_ps(), \ 3815309124Sdim (__mmask16)-1, (int)(R)); }) 3816277325Sdim 3817309124Sdim#define _mm512_mask_cvt_roundepu32_ps(W, U, A, R) __extension__ ({ \ 3818309124Sdim (__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(__m512i)(A), \ 3819309124Sdim (__v16sf)(__m512)(W), \ 3820309124Sdim (__mmask16)(U), (int)(R)); }) 3821309124Sdim 3822309124Sdim#define _mm512_maskz_cvt_roundepu32_ps(U, A, R) __extension__ ({ \ 3823309124Sdim (__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(__m512i)(A), \ 3824309124Sdim (__v16sf)_mm512_setzero_ps(), \ 3825309124Sdim (__mmask16)(U), (int)(R)); }) 3826309124Sdim 3827309124Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS 3828309124Sdim_mm512_cvtepu32_ps (__m512i __A) 3829309124Sdim{ 3830309124Sdim return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A, 3831309124Sdim (__v16sf) _mm512_undefined_ps (), 3832309124Sdim (__mmask16) -1, 3833309124Sdim _MM_FROUND_CUR_DIRECTION); 3834309124Sdim} 3835309124Sdim 3836309124Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS 3837309124Sdim_mm512_mask_cvtepu32_ps (__m512 __W, __mmask16 __U, __m512i __A) 3838309124Sdim{ 3839309124Sdim return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A, 3840309124Sdim (__v16sf) __W, 3841309124Sdim (__mmask16) __U, 3842309124Sdim _MM_FROUND_CUR_DIRECTION); 3843309124Sdim} 3844309124Sdim 3845309124Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS 3846309124Sdim_mm512_maskz_cvtepu32_ps (__mmask16 __U, __m512i __A) 3847309124Sdim{ 3848309124Sdim return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A, 3849309124Sdim (__v16sf) _mm512_setzero_ps (), 3850309124Sdim (__mmask16) __U, 3851309124Sdim _MM_FROUND_CUR_DIRECTION); 3852309124Sdim} 3853309124Sdim 3854288943Sdimstatic __inline __m512d __DEFAULT_FN_ATTRS 3855277325Sdim_mm512_cvtepi32_pd(__m256i __A) 3856277325Sdim{ 3857314564Sdim return (__m512d)__builtin_convertvector((__v8si)__A, __v8df); 3858277325Sdim} 3859277325Sdim 3860309124Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS 3861309124Sdim_mm512_mask_cvtepi32_pd (__m512d __W, __mmask8 __U, __m256i __A) 3862309124Sdim{ 3863314564Sdim return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U, 3864314564Sdim (__v8df)_mm512_cvtepi32_pd(__A), 3865314564Sdim (__v8df)__W); 3866309124Sdim} 3867309124Sdim 3868309124Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS 3869309124Sdim_mm512_maskz_cvtepi32_pd (__mmask8 __U, __m256i __A) 3870309124Sdim{ 3871314564Sdim return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U, 3872314564Sdim (__v8df)_mm512_cvtepi32_pd(__A), 3873314564Sdim (__v8df)_mm512_setzero_pd()); 3874309124Sdim} 3875309124Sdim 3876314564Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS 3877314564Sdim_mm512_cvtepi32lo_pd(__m512i __A) 3878314564Sdim{ 3879314564Sdim return (__m512d) _mm512_cvtepi32_pd(_mm512_castsi512_si256(__A)); 3880314564Sdim} 3881314564Sdim 3882314564Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS 3883314564Sdim_mm512_mask_cvtepi32lo_pd(__m512d __W, __mmask8 __U,__m512i __A) 3884314564Sdim{ 3885314564Sdim return (__m512d) _mm512_mask_cvtepi32_pd(__W, __U, _mm512_castsi512_si256(__A)); 3886314564Sdim} 3887314564Sdim 3888309124Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS 3889309124Sdim_mm512_cvtepi32_ps (__m512i __A) 3890309124Sdim{ 3891309124Sdim return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A, 3892309124Sdim (__v16sf) _mm512_undefined_ps (), 3893309124Sdim (__mmask16) -1, 3894309124Sdim _MM_FROUND_CUR_DIRECTION); 3895309124Sdim} 3896309124Sdim 3897309124Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS 3898309124Sdim_mm512_mask_cvtepi32_ps (__m512 __W, __mmask16 __U, __m512i __A) 3899309124Sdim{ 3900309124Sdim return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A, 3901309124Sdim (__v16sf) __W, 3902309124Sdim (__mmask16) __U, 3903309124Sdim _MM_FROUND_CUR_DIRECTION); 3904309124Sdim} 3905309124Sdim 3906309124Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS 3907309124Sdim_mm512_maskz_cvtepi32_ps (__mmask16 __U, __m512i __A) 3908309124Sdim{ 3909309124Sdim return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A, 3910309124Sdim (__v16sf) _mm512_setzero_ps (), 3911309124Sdim (__mmask16) __U, 3912309124Sdim _MM_FROUND_CUR_DIRECTION); 3913309124Sdim} 3914309124Sdim 3915288943Sdimstatic __inline __m512d __DEFAULT_FN_ATTRS 3916277325Sdim_mm512_cvtepu32_pd(__m256i __A) 3917277325Sdim{ 3918314564Sdim return (__m512d)__builtin_convertvector((__v8su)__A, __v8df); 3919277325Sdim} 3920277325Sdim 3921309124Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS 3922309124Sdim_mm512_mask_cvtepu32_pd (__m512d __W, __mmask8 __U, __m256i __A) 3923309124Sdim{ 3924314564Sdim return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U, 3925314564Sdim (__v8df)_mm512_cvtepu32_pd(__A), 3926314564Sdim (__v8df)__W); 3927309124Sdim} 3928309124Sdim 3929309124Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS 3930309124Sdim_mm512_maskz_cvtepu32_pd (__mmask8 __U, __m256i __A) 3931309124Sdim{ 3932314564Sdim return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U, 3933314564Sdim (__v8df)_mm512_cvtepu32_pd(__A), 3934314564Sdim (__v8df)_mm512_setzero_pd()); 3935309124Sdim} 3936309124Sdim 3937314564Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS 3938314564Sdim_mm512_cvtepu32lo_pd(__m512i __A) 3939314564Sdim{ 3940314564Sdim return (__m512d) _mm512_cvtepu32_pd(_mm512_castsi512_si256(__A)); 3941314564Sdim} 3942314564Sdim 3943314564Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS 3944314564Sdim_mm512_mask_cvtepu32lo_pd(__m512d __W, __mmask8 __U,__m512i __A) 3945314564Sdim{ 3946314564Sdim return (__m512d) _mm512_mask_cvtepu32_pd(__W, __U, _mm512_castsi512_si256(__A)); 3947314564Sdim} 3948314564Sdim 3949288943Sdim#define _mm512_cvt_roundpd_ps(A, R) __extension__ ({ \ 3950309124Sdim (__m256)__builtin_ia32_cvtpd2ps512_mask((__v8df)(__m512d)(A), \ 3951288943Sdim (__v8sf)_mm256_setzero_ps(), \ 3952309124Sdim (__mmask8)-1, (int)(R)); }) 3953277325Sdim 3954309124Sdim#define _mm512_mask_cvt_roundpd_ps(W, U, A, R) __extension__ ({ \ 3955309124Sdim (__m256)__builtin_ia32_cvtpd2ps512_mask((__v8df)(__m512d)(A), \ 3956309124Sdim (__v8sf)(__m256)(W), (__mmask8)(U), \ 3957309124Sdim (int)(R)); }) 3958309124Sdim 3959309124Sdim#define _mm512_maskz_cvt_roundpd_ps(U, A, R) __extension__ ({ \ 3960309124Sdim (__m256)__builtin_ia32_cvtpd2ps512_mask((__v8df)(__m512d)(A), \ 3961309124Sdim (__v8sf)_mm256_setzero_ps(), \ 3962309124Sdim (__mmask8)(U), (int)(R)); }) 3963309124Sdim 3964309124Sdimstatic __inline__ __m256 __DEFAULT_FN_ATTRS 3965309124Sdim_mm512_cvtpd_ps (__m512d __A) 3966309124Sdim{ 3967309124Sdim return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A, 3968309124Sdim (__v8sf) _mm256_undefined_ps (), 3969309124Sdim (__mmask8) -1, 3970309124Sdim _MM_FROUND_CUR_DIRECTION); 3971309124Sdim} 3972309124Sdim 3973309124Sdimstatic __inline__ __m256 __DEFAULT_FN_ATTRS 3974309124Sdim_mm512_mask_cvtpd_ps (__m256 __W, __mmask8 __U, __m512d __A) 3975309124Sdim{ 3976309124Sdim return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A, 3977309124Sdim (__v8sf) __W, 3978309124Sdim (__mmask8) __U, 3979309124Sdim _MM_FROUND_CUR_DIRECTION); 3980309124Sdim} 3981309124Sdim 3982309124Sdimstatic __inline__ __m256 __DEFAULT_FN_ATTRS 3983309124Sdim_mm512_maskz_cvtpd_ps (__mmask8 __U, __m512d __A) 3984309124Sdim{ 3985309124Sdim return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A, 3986309124Sdim (__v8sf) _mm256_setzero_ps (), 3987309124Sdim (__mmask8) __U, 3988309124Sdim _MM_FROUND_CUR_DIRECTION); 3989309124Sdim} 3990309124Sdim 3991314564Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS 3992314564Sdim_mm512_cvtpd_pslo (__m512d __A) 3993314564Sdim{ 3994314564Sdim return (__m512) __builtin_shufflevector((__v8sf) _mm512_cvtpd_ps(__A), 3995314564Sdim (__v8sf) _mm256_setzero_ps (), 3996314564Sdim 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); 3997314564Sdim} 3998314564Sdim 3999314564Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS 4000314564Sdim_mm512_mask_cvtpd_pslo (__m512 __W, __mmask8 __U,__m512d __A) 4001314564Sdim{ 4002314564Sdim return (__m512) __builtin_shufflevector ( 4003314564Sdim (__v8sf) _mm512_mask_cvtpd_ps (_mm512_castps512_ps256(__W), 4004314564Sdim __U, __A), 4005314564Sdim (__v8sf) _mm256_setzero_ps (), 4006314564Sdim 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); 4007314564Sdim} 4008314564Sdim 4009309124Sdim#define _mm512_cvt_roundps_ph(A, I) __extension__ ({ \ 4010309124Sdim (__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \ 4011309124Sdim (__v16hi)_mm256_undefined_si256(), \ 4012309124Sdim (__mmask16)-1); }) 4013309124Sdim 4014309124Sdim#define _mm512_mask_cvt_roundps_ph(U, W, A, I) __extension__ ({ \ 4015309124Sdim (__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \ 4016309124Sdim (__v16hi)(__m256i)(U), \ 4017309124Sdim (__mmask16)(W)); }) 4018309124Sdim 4019309124Sdim#define _mm512_maskz_cvt_roundps_ph(W, A, I) __extension__ ({ \ 4020309124Sdim (__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \ 4021309124Sdim (__v16hi)_mm256_setzero_si256(), \ 4022309124Sdim (__mmask16)(W)); }) 4023309124Sdim 4024288943Sdim#define _mm512_cvtps_ph(A, I) __extension__ ({ \ 4025309124Sdim (__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \ 4026288943Sdim (__v16hi)_mm256_setzero_si256(), \ 4027309124Sdim (__mmask16)-1); }) 4028288943Sdim 4029309124Sdim#define _mm512_mask_cvtps_ph(U, W, A, I) __extension__ ({ \ 4030309124Sdim (__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \ 4031309124Sdim (__v16hi)(__m256i)(U), \ 4032309124Sdim (__mmask16)(W)); }) 4033309124Sdim 4034309124Sdim#define _mm512_maskz_cvtps_ph(W, A, I) __extension__ ({\ 4035309124Sdim (__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \ 4036309124Sdim (__v16hi)_mm256_setzero_si256(), \ 4037309124Sdim (__mmask16)(W)); }) 4038309124Sdim 4039309124Sdim#define _mm512_cvt_roundph_ps(A, R) __extension__ ({ \ 4040309124Sdim (__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(__m256i)(A), \ 4041309124Sdim (__v16sf)_mm512_undefined_ps(), \ 4042309124Sdim (__mmask16)-1, (int)(R)); }) 4043309124Sdim 4044309124Sdim#define _mm512_mask_cvt_roundph_ps(W, U, A, R) __extension__ ({ \ 4045309124Sdim (__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(__m256i)(A), \ 4046309124Sdim (__v16sf)(__m512)(W), \ 4047309124Sdim (__mmask16)(U), (int)(R)); }) 4048309124Sdim 4049309124Sdim#define _mm512_maskz_cvt_roundph_ps(U, A, R) __extension__ ({ \ 4050309124Sdim (__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(__m256i)(A), \ 4051309124Sdim (__v16sf)_mm512_setzero_ps(), \ 4052309124Sdim (__mmask16)(U), (int)(R)); }) 4053309124Sdim 4054309124Sdim 4055288943Sdimstatic __inline __m512 __DEFAULT_FN_ATTRS 4056277325Sdim_mm512_cvtph_ps(__m256i __A) 4057277325Sdim{ 4058277325Sdim return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A, 4059277325Sdim (__v16sf) 4060277325Sdim _mm512_setzero_ps (), 4061277325Sdim (__mmask16) -1, 4062277325Sdim _MM_FROUND_CUR_DIRECTION); 4063277325Sdim} 4064277325Sdim 4065309124Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS 4066309124Sdim_mm512_mask_cvtph_ps (__m512 __W, __mmask16 __U, __m256i __A) 4067277325Sdim{ 4068309124Sdim return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A, 4069309124Sdim (__v16sf) __W, 4070309124Sdim (__mmask16) __U, 4071309124Sdim _MM_FROUND_CUR_DIRECTION); 4072277325Sdim} 4073277325Sdim 4074309124Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS 4075309124Sdim_mm512_maskz_cvtph_ps (__mmask16 __U, __m256i __A) 4076309124Sdim{ 4077309124Sdim return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A, 4078309124Sdim (__v16sf) _mm512_setzero_ps (), 4079309124Sdim (__mmask16) __U, 4080309124Sdim _MM_FROUND_CUR_DIRECTION); 4081309124Sdim} 4082309124Sdim 4083309124Sdim#define _mm512_cvtt_roundpd_epi32(A, R) __extension__ ({ \ 4084309124Sdim (__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df)(__m512d)(A), \ 4085309124Sdim (__v8si)_mm256_setzero_si256(), \ 4086309124Sdim (__mmask8)-1, (int)(R)); }) 4087309124Sdim 4088309124Sdim#define _mm512_mask_cvtt_roundpd_epi32(W, U, A, R) __extension__ ({ \ 4089309124Sdim (__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df)(__m512d)(A), \ 4090309124Sdim (__v8si)(__m256i)(W), \ 4091309124Sdim (__mmask8)(U), (int)(R)); }) 4092309124Sdim 4093309124Sdim#define _mm512_maskz_cvtt_roundpd_epi32(U, A, R) __extension__ ({ \ 4094309124Sdim (__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df)(__m512d)(A), \ 4095309124Sdim (__v8si)_mm256_setzero_si256(), \ 4096309124Sdim (__mmask8)(U), (int)(R)); }) 4097309124Sdim 4098288943Sdimstatic __inline __m256i __DEFAULT_FN_ATTRS 4099296417Sdim_mm512_cvttpd_epi32(__m512d __a) 4100277325Sdim{ 4101296417Sdim return (__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df) __a, 4102277325Sdim (__v8si)_mm256_setzero_si256(), 4103277325Sdim (__mmask8) -1, 4104277325Sdim _MM_FROUND_CUR_DIRECTION); 4105277325Sdim} 4106277325Sdim 4107309124Sdimstatic __inline__ __m256i __DEFAULT_FN_ATTRS 4108309124Sdim_mm512_mask_cvttpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A) 4109309124Sdim{ 4110309124Sdim return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A, 4111309124Sdim (__v8si) __W, 4112309124Sdim (__mmask8) __U, 4113309124Sdim _MM_FROUND_CUR_DIRECTION); 4114309124Sdim} 4115277325Sdim 4116309124Sdimstatic __inline__ __m256i __DEFAULT_FN_ATTRS 4117309124Sdim_mm512_maskz_cvttpd_epi32 (__mmask8 __U, __m512d __A) 4118309124Sdim{ 4119309124Sdim return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A, 4120309124Sdim (__v8si) _mm256_setzero_si256 (), 4121309124Sdim (__mmask8) __U, 4122309124Sdim _MM_FROUND_CUR_DIRECTION); 4123309124Sdim} 4124309124Sdim 4125288943Sdim#define _mm512_cvtt_roundps_epi32(A, R) __extension__ ({ \ 4126309124Sdim (__m512i)__builtin_ia32_cvttps2dq512_mask((__v16sf)(__m512)(A), \ 4127288943Sdim (__v16si)_mm512_setzero_si512(), \ 4128309124Sdim (__mmask16)-1, (int)(R)); }) 4129277325Sdim 4130309124Sdim#define _mm512_mask_cvtt_roundps_epi32(W, U, A, R) __extension__ ({ \ 4131309124Sdim (__m512i)__builtin_ia32_cvttps2dq512_mask((__v16sf)(__m512)(A), \ 4132309124Sdim (__v16si)(__m512i)(W), \ 4133309124Sdim (__mmask16)(U), (int)(R)); }) 4134309124Sdim 4135309124Sdim#define _mm512_maskz_cvtt_roundps_epi32(U, A, R) __extension__ ({ \ 4136309124Sdim (__m512i)__builtin_ia32_cvttps2dq512_mask((__v16sf)(__m512)(A), \ 4137309124Sdim (__v16si)_mm512_setzero_si512(), \ 4138309124Sdim (__mmask16)(U), (int)(R)); }) 4139309124Sdim 4140309124Sdimstatic __inline __m512i __DEFAULT_FN_ATTRS 4141309124Sdim_mm512_cvttps_epi32(__m512 __a) 4142309124Sdim{ 4143309124Sdim return (__m512i) 4144309124Sdim __builtin_ia32_cvttps2dq512_mask((__v16sf) __a, 4145309124Sdim (__v16si) _mm512_setzero_si512 (), 4146309124Sdim (__mmask16) -1, _MM_FROUND_CUR_DIRECTION); 4147309124Sdim} 4148309124Sdim 4149309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 4150309124Sdim_mm512_mask_cvttps_epi32 (__m512i __W, __mmask16 __U, __m512 __A) 4151309124Sdim{ 4152309124Sdim return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A, 4153309124Sdim (__v16si) __W, 4154309124Sdim (__mmask16) __U, 4155309124Sdim _MM_FROUND_CUR_DIRECTION); 4156309124Sdim} 4157309124Sdim 4158309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 4159309124Sdim_mm512_maskz_cvttps_epi32 (__mmask16 __U, __m512 __A) 4160309124Sdim{ 4161309124Sdim return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A, 4162309124Sdim (__v16si) _mm512_setzero_si512 (), 4163309124Sdim (__mmask16) __U, 4164309124Sdim _MM_FROUND_CUR_DIRECTION); 4165309124Sdim} 4166309124Sdim 4167288943Sdim#define _mm512_cvt_roundps_epi32(A, R) __extension__ ({ \ 4168309124Sdim (__m512i)__builtin_ia32_cvtps2dq512_mask((__v16sf)(__m512)(A), \ 4169288943Sdim (__v16si)_mm512_setzero_si512(), \ 4170309124Sdim (__mmask16)-1, (int)(R)); }) 4171288943Sdim 4172309124Sdim#define _mm512_mask_cvt_roundps_epi32(W, U, A, R) __extension__ ({ \ 4173309124Sdim (__m512i)__builtin_ia32_cvtps2dq512_mask((__v16sf)(__m512)(A), \ 4174309124Sdim (__v16si)(__m512i)(W), \ 4175309124Sdim (__mmask16)(U), (int)(R)); }) 4176309124Sdim 4177309124Sdim#define _mm512_maskz_cvt_roundps_epi32(U, A, R) __extension__ ({ \ 4178309124Sdim (__m512i)__builtin_ia32_cvtps2dq512_mask((__v16sf)(__m512)(A), \ 4179309124Sdim (__v16si)_mm512_setzero_si512(), \ 4180309124Sdim (__mmask16)(U), (int)(R)); }) 4181309124Sdim 4182309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 4183309124Sdim_mm512_cvtps_epi32 (__m512 __A) 4184309124Sdim{ 4185309124Sdim return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A, 4186309124Sdim (__v16si) _mm512_undefined_epi32 (), 4187309124Sdim (__mmask16) -1, 4188309124Sdim _MM_FROUND_CUR_DIRECTION); 4189309124Sdim} 4190309124Sdim 4191309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 4192309124Sdim_mm512_mask_cvtps_epi32 (__m512i __W, __mmask16 __U, __m512 __A) 4193309124Sdim{ 4194309124Sdim return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A, 4195309124Sdim (__v16si) __W, 4196309124Sdim (__mmask16) __U, 4197309124Sdim _MM_FROUND_CUR_DIRECTION); 4198309124Sdim} 4199309124Sdim 4200309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 4201309124Sdim_mm512_maskz_cvtps_epi32 (__mmask16 __U, __m512 __A) 4202309124Sdim{ 4203309124Sdim return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A, 4204309124Sdim (__v16si) 4205309124Sdim _mm512_setzero_si512 (), 4206309124Sdim (__mmask16) __U, 4207309124Sdim _MM_FROUND_CUR_DIRECTION); 4208309124Sdim} 4209309124Sdim 4210288943Sdim#define _mm512_cvt_roundpd_epi32(A, R) __extension__ ({ \ 4211309124Sdim (__m256i)__builtin_ia32_cvtpd2dq512_mask((__v8df)(__m512d)(A), \ 4212288943Sdim (__v8si)_mm256_setzero_si256(), \ 4213309124Sdim (__mmask8)-1, (int)(R)); }) 4214288943Sdim 4215309124Sdim#define _mm512_mask_cvt_roundpd_epi32(W, U, A, R) __extension__ ({ \ 4216309124Sdim (__m256i)__builtin_ia32_cvtpd2dq512_mask((__v8df)(__m512d)(A), \ 4217309124Sdim (__v8si)(__m256i)(W), \ 4218309124Sdim (__mmask8)(U), (int)(R)); }) 4219309124Sdim 4220309124Sdim#define _mm512_maskz_cvt_roundpd_epi32(U, A, R) __extension__ ({ \ 4221309124Sdim (__m256i)__builtin_ia32_cvtpd2dq512_mask((__v8df)(__m512d)(A), \ 4222309124Sdim (__v8si)_mm256_setzero_si256(), \ 4223309124Sdim (__mmask8)(U), (int)(R)); }) 4224309124Sdim 4225309124Sdimstatic __inline__ __m256i __DEFAULT_FN_ATTRS 4226309124Sdim_mm512_cvtpd_epi32 (__m512d __A) 4227309124Sdim{ 4228309124Sdim return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A, 4229309124Sdim (__v8si) 4230309124Sdim _mm256_undefined_si256 (), 4231309124Sdim (__mmask8) -1, 4232309124Sdim _MM_FROUND_CUR_DIRECTION); 4233309124Sdim} 4234309124Sdim 4235309124Sdimstatic __inline__ __m256i __DEFAULT_FN_ATTRS 4236309124Sdim_mm512_mask_cvtpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A) 4237309124Sdim{ 4238309124Sdim return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A, 4239309124Sdim (__v8si) __W, 4240309124Sdim (__mmask8) __U, 4241309124Sdim _MM_FROUND_CUR_DIRECTION); 4242309124Sdim} 4243309124Sdim 4244309124Sdimstatic __inline__ __m256i __DEFAULT_FN_ATTRS 4245309124Sdim_mm512_maskz_cvtpd_epi32 (__mmask8 __U, __m512d __A) 4246309124Sdim{ 4247309124Sdim return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A, 4248309124Sdim (__v8si) 4249309124Sdim _mm256_setzero_si256 (), 4250309124Sdim (__mmask8) __U, 4251309124Sdim _MM_FROUND_CUR_DIRECTION); 4252309124Sdim} 4253309124Sdim 4254288943Sdim#define _mm512_cvt_roundps_epu32(A, R) __extension__ ({ \ 4255309124Sdim (__m512i)__builtin_ia32_cvtps2udq512_mask((__v16sf)(__m512)(A), \ 4256288943Sdim (__v16si)_mm512_setzero_si512(), \ 4257309124Sdim (__mmask16)-1, (int)(R)); }) 4258288943Sdim 4259309124Sdim#define _mm512_mask_cvt_roundps_epu32(W, U, A, R) __extension__ ({ \ 4260309124Sdim (__m512i)__builtin_ia32_cvtps2udq512_mask((__v16sf)(__m512)(A), \ 4261309124Sdim (__v16si)(__m512i)(W), \ 4262309124Sdim (__mmask16)(U), (int)(R)); }) 4263309124Sdim 4264309124Sdim#define _mm512_maskz_cvt_roundps_epu32(U, A, R) __extension__ ({ \ 4265309124Sdim (__m512i)__builtin_ia32_cvtps2udq512_mask((__v16sf)(__m512)(A), \ 4266309124Sdim (__v16si)_mm512_setzero_si512(), \ 4267309124Sdim (__mmask16)(U), (int)(R)); }) 4268309124Sdim 4269309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 4270309124Sdim_mm512_cvtps_epu32 ( __m512 __A) 4271309124Sdim{ 4272309124Sdim return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,\ 4273309124Sdim (__v16si)\ 4274309124Sdim _mm512_undefined_epi32 (),\ 4275309124Sdim (__mmask16) -1,\ 4276309124Sdim _MM_FROUND_CUR_DIRECTION);\ 4277309124Sdim} 4278309124Sdim 4279309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 4280309124Sdim_mm512_mask_cvtps_epu32 (__m512i __W, __mmask16 __U, __m512 __A) 4281309124Sdim{ 4282309124Sdim return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A, 4283309124Sdim (__v16si) __W, 4284309124Sdim (__mmask16) __U, 4285309124Sdim _MM_FROUND_CUR_DIRECTION); 4286309124Sdim} 4287309124Sdim 4288309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 4289309124Sdim_mm512_maskz_cvtps_epu32 ( __mmask16 __U, __m512 __A) 4290309124Sdim{ 4291309124Sdim return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A, 4292309124Sdim (__v16si) 4293309124Sdim _mm512_setzero_si512 (), 4294309124Sdim (__mmask16) __U , 4295309124Sdim _MM_FROUND_CUR_DIRECTION); 4296309124Sdim} 4297309124Sdim 4298288943Sdim#define _mm512_cvt_roundpd_epu32(A, R) __extension__ ({ \ 4299309124Sdim (__m256i)__builtin_ia32_cvtpd2udq512_mask((__v8df)(__m512d)(A), \ 4300288943Sdim (__v8si)_mm256_setzero_si256(), \ 4301309124Sdim (__mmask8)-1, (int)(R)); }) 4302288943Sdim 4303309124Sdim#define _mm512_mask_cvt_roundpd_epu32(W, U, A, R) __extension__ ({ \ 4304309124Sdim (__m256i)__builtin_ia32_cvtpd2udq512_mask((__v8df)(__m512d)(A), \ 4305309124Sdim (__v8si)(W), \ 4306309124Sdim (__mmask8)(U), (int)(R)); }) 4307309124Sdim 4308309124Sdim#define _mm512_maskz_cvt_roundpd_epu32(U, A, R) __extension__ ({ \ 4309309124Sdim (__m256i)__builtin_ia32_cvtpd2udq512_mask((__v8df)(__m512d)(A), \ 4310309124Sdim (__v8si)_mm256_setzero_si256(), \ 4311309124Sdim (__mmask8)(U), (int)(R)); }) 4312309124Sdim 4313309124Sdimstatic __inline__ __m256i __DEFAULT_FN_ATTRS 4314309124Sdim_mm512_cvtpd_epu32 (__m512d __A) 4315309124Sdim{ 4316309124Sdim return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A, 4317309124Sdim (__v8si) 4318309124Sdim _mm256_undefined_si256 (), 4319309124Sdim (__mmask8) -1, 4320309124Sdim _MM_FROUND_CUR_DIRECTION); 4321309124Sdim} 4322309124Sdim 4323309124Sdimstatic __inline__ __m256i __DEFAULT_FN_ATTRS 4324309124Sdim_mm512_mask_cvtpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A) 4325309124Sdim{ 4326309124Sdim return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A, 4327309124Sdim (__v8si) __W, 4328309124Sdim (__mmask8) __U, 4329309124Sdim _MM_FROUND_CUR_DIRECTION); 4330309124Sdim} 4331309124Sdim 4332309124Sdimstatic __inline__ __m256i __DEFAULT_FN_ATTRS 4333309124Sdim_mm512_maskz_cvtpd_epu32 (__mmask8 __U, __m512d __A) 4334309124Sdim{ 4335309124Sdim return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A, 4336309124Sdim (__v8si) 4337309124Sdim _mm256_setzero_si256 (), 4338309124Sdim (__mmask8) __U, 4339309124Sdim _MM_FROUND_CUR_DIRECTION); 4340309124Sdim} 4341309124Sdim 4342321369Sdimstatic __inline__ double __DEFAULT_FN_ATTRS 4343321369Sdim_mm512_cvtsd_f64(__m512d __a) 4344321369Sdim{ 4345321369Sdim return __a[0]; 4346321369Sdim} 4347321369Sdim 4348321369Sdimstatic __inline__ float __DEFAULT_FN_ATTRS 4349321369Sdim_mm512_cvtss_f32(__m512 __a) 4350321369Sdim{ 4351321369Sdim return __a[0]; 4352321369Sdim} 4353321369Sdim 4354277325Sdim/* Unpack and Interleave */ 4355309124Sdim 4356288943Sdimstatic __inline __m512d __DEFAULT_FN_ATTRS 4357277325Sdim_mm512_unpackhi_pd(__m512d __a, __m512d __b) 4358277325Sdim{ 4359309124Sdim return (__m512d)__builtin_shufflevector((__v8df)__a, (__v8df)__b, 4360309124Sdim 1, 9, 1+2, 9+2, 1+4, 9+4, 1+6, 9+6); 4361277325Sdim} 4362277325Sdim 4363309124Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS 4364309124Sdim_mm512_mask_unpackhi_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) 4365309124Sdim{ 4366309124Sdim return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U, 4367309124Sdim (__v8df)_mm512_unpackhi_pd(__A, __B), 4368309124Sdim (__v8df)__W); 4369309124Sdim} 4370309124Sdim 4371309124Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS 4372309124Sdim_mm512_maskz_unpackhi_pd(__mmask8 __U, __m512d __A, __m512d __B) 4373309124Sdim{ 4374309124Sdim return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U, 4375309124Sdim (__v8df)_mm512_unpackhi_pd(__A, __B), 4376309124Sdim (__v8df)_mm512_setzero_pd()); 4377309124Sdim} 4378309124Sdim 4379288943Sdimstatic __inline __m512d __DEFAULT_FN_ATTRS 4380277325Sdim_mm512_unpacklo_pd(__m512d __a, __m512d __b) 4381277325Sdim{ 4382309124Sdim return (__m512d)__builtin_shufflevector((__v8df)__a, (__v8df)__b, 4383309124Sdim 0, 8, 0+2, 8+2, 0+4, 8+4, 0+6, 8+6); 4384277325Sdim} 4385277325Sdim 4386309124Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS 4387309124Sdim_mm512_mask_unpacklo_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) 4388309124Sdim{ 4389309124Sdim return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U, 4390309124Sdim (__v8df)_mm512_unpacklo_pd(__A, __B), 4391309124Sdim (__v8df)__W); 4392309124Sdim} 4393309124Sdim 4394309124Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS 4395309124Sdim_mm512_maskz_unpacklo_pd (__mmask8 __U, __m512d __A, __m512d __B) 4396309124Sdim{ 4397309124Sdim return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U, 4398309124Sdim (__v8df)_mm512_unpacklo_pd(__A, __B), 4399309124Sdim (__v8df)_mm512_setzero_pd()); 4400309124Sdim} 4401309124Sdim 4402288943Sdimstatic __inline __m512 __DEFAULT_FN_ATTRS 4403277325Sdim_mm512_unpackhi_ps(__m512 __a, __m512 __b) 4404277325Sdim{ 4405309124Sdim return (__m512)__builtin_shufflevector((__v16sf)__a, (__v16sf)__b, 4406309124Sdim 2, 18, 3, 19, 4407309124Sdim 2+4, 18+4, 3+4, 19+4, 4408309124Sdim 2+8, 18+8, 3+8, 19+8, 4409309124Sdim 2+12, 18+12, 3+12, 19+12); 4410277325Sdim} 4411277325Sdim 4412309124Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS 4413309124Sdim_mm512_mask_unpackhi_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) 4414309124Sdim{ 4415309124Sdim return (__m512)__builtin_ia32_selectps_512((__mmask16) __U, 4416309124Sdim (__v16sf)_mm512_unpackhi_ps(__A, __B), 4417309124Sdim (__v16sf)__W); 4418309124Sdim} 4419309124Sdim 4420309124Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS 4421309124Sdim_mm512_maskz_unpackhi_ps (__mmask16 __U, __m512 __A, __m512 __B) 4422309124Sdim{ 4423309124Sdim return (__m512)__builtin_ia32_selectps_512((__mmask16) __U, 4424309124Sdim (__v16sf)_mm512_unpackhi_ps(__A, __B), 4425309124Sdim (__v16sf)_mm512_setzero_ps()); 4426309124Sdim} 4427309124Sdim 4428288943Sdimstatic __inline __m512 __DEFAULT_FN_ATTRS 4429277325Sdim_mm512_unpacklo_ps(__m512 __a, __m512 __b) 4430277325Sdim{ 4431309124Sdim return (__m512)__builtin_shufflevector((__v16sf)__a, (__v16sf)__b, 4432309124Sdim 0, 16, 1, 17, 4433309124Sdim 0+4, 16+4, 1+4, 17+4, 4434309124Sdim 0+8, 16+8, 1+8, 17+8, 4435309124Sdim 0+12, 16+12, 1+12, 17+12); 4436277325Sdim} 4437277325Sdim 4438309124Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS 4439309124Sdim_mm512_mask_unpacklo_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) 4440309124Sdim{ 4441309124Sdim return (__m512)__builtin_ia32_selectps_512((__mmask16) __U, 4442309124Sdim (__v16sf)_mm512_unpacklo_ps(__A, __B), 4443309124Sdim (__v16sf)__W); 4444309124Sdim} 4445309124Sdim 4446309124Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS 4447309124Sdim_mm512_maskz_unpacklo_ps (__mmask16 __U, __m512 __A, __m512 __B) 4448309124Sdim{ 4449309124Sdim return (__m512)__builtin_ia32_selectps_512((__mmask16) __U, 4450309124Sdim (__v16sf)_mm512_unpacklo_ps(__A, __B), 4451309124Sdim (__v16sf)_mm512_setzero_ps()); 4452309124Sdim} 4453309124Sdim 4454309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 4455309124Sdim_mm512_unpackhi_epi32(__m512i __A, __m512i __B) 4456309124Sdim{ 4457309124Sdim return (__m512i)__builtin_shufflevector((__v16si)__A, (__v16si)__B, 4458309124Sdim 2, 18, 3, 19, 4459309124Sdim 2+4, 18+4, 3+4, 19+4, 4460309124Sdim 2+8, 18+8, 3+8, 19+8, 4461309124Sdim 2+12, 18+12, 3+12, 19+12); 4462309124Sdim} 4463309124Sdim 4464309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 4465309124Sdim_mm512_mask_unpackhi_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B) 4466309124Sdim{ 4467309124Sdim return (__m512i)__builtin_ia32_selectd_512((__mmask16) __U, 4468309124Sdim (__v16si)_mm512_unpackhi_epi32(__A, __B), 4469309124Sdim (__v16si)__W); 4470309124Sdim} 4471309124Sdim 4472309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 4473309124Sdim_mm512_maskz_unpackhi_epi32(__mmask16 __U, __m512i __A, __m512i __B) 4474309124Sdim{ 4475309124Sdim return (__m512i)__builtin_ia32_selectd_512((__mmask16) __U, 4476309124Sdim (__v16si)_mm512_unpackhi_epi32(__A, __B), 4477309124Sdim (__v16si)_mm512_setzero_si512()); 4478309124Sdim} 4479309124Sdim 4480309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 4481309124Sdim_mm512_unpacklo_epi32(__m512i __A, __m512i __B) 4482309124Sdim{ 4483309124Sdim return (__m512i)__builtin_shufflevector((__v16si)__A, (__v16si)__B, 4484309124Sdim 0, 16, 1, 17, 4485309124Sdim 0+4, 16+4, 1+4, 17+4, 4486309124Sdim 0+8, 16+8, 1+8, 17+8, 4487309124Sdim 0+12, 16+12, 1+12, 17+12); 4488309124Sdim} 4489309124Sdim 4490309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 4491309124Sdim_mm512_mask_unpacklo_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B) 4492309124Sdim{ 4493309124Sdim return (__m512i)__builtin_ia32_selectd_512((__mmask16) __U, 4494309124Sdim (__v16si)_mm512_unpacklo_epi32(__A, __B), 4495309124Sdim (__v16si)__W); 4496309124Sdim} 4497309124Sdim 4498309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 4499309124Sdim_mm512_maskz_unpacklo_epi32(__mmask16 __U, __m512i __A, __m512i __B) 4500309124Sdim{ 4501309124Sdim return (__m512i)__builtin_ia32_selectd_512((__mmask16) __U, 4502309124Sdim (__v16si)_mm512_unpacklo_epi32(__A, __B), 4503309124Sdim (__v16si)_mm512_setzero_si512()); 4504309124Sdim} 4505309124Sdim 4506309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 4507309124Sdim_mm512_unpackhi_epi64(__m512i __A, __m512i __B) 4508309124Sdim{ 4509309124Sdim return (__m512i)__builtin_shufflevector((__v8di)__A, (__v8di)__B, 4510309124Sdim 1, 9, 1+2, 9+2, 1+4, 9+4, 1+6, 9+6); 4511309124Sdim} 4512309124Sdim 4513309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 4514309124Sdim_mm512_mask_unpackhi_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) 4515309124Sdim{ 4516309124Sdim return (__m512i)__builtin_ia32_selectq_512((__mmask8) __U, 4517309124Sdim (__v8di)_mm512_unpackhi_epi64(__A, __B), 4518309124Sdim (__v8di)__W); 4519309124Sdim} 4520309124Sdim 4521309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 4522309124Sdim_mm512_maskz_unpackhi_epi64(__mmask8 __U, __m512i __A, __m512i __B) 4523309124Sdim{ 4524309124Sdim return (__m512i)__builtin_ia32_selectq_512((__mmask8) __U, 4525309124Sdim (__v8di)_mm512_unpackhi_epi64(__A, __B), 4526309124Sdim (__v8di)_mm512_setzero_si512()); 4527309124Sdim} 4528309124Sdim 4529309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 4530309124Sdim_mm512_unpacklo_epi64 (__m512i __A, __m512i __B) 4531309124Sdim{ 4532309124Sdim return (__m512i)__builtin_shufflevector((__v8di)__A, (__v8di)__B, 4533309124Sdim 0, 8, 0+2, 8+2, 0+4, 8+4, 0+6, 8+6); 4534309124Sdim} 4535309124Sdim 4536309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 4537309124Sdim_mm512_mask_unpacklo_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) 4538309124Sdim{ 4539309124Sdim return (__m512i)__builtin_ia32_selectq_512((__mmask8) __U, 4540309124Sdim (__v8di)_mm512_unpacklo_epi64(__A, __B), 4541309124Sdim (__v8di)__W); 4542309124Sdim} 4543309124Sdim 4544309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 4545309124Sdim_mm512_maskz_unpacklo_epi64 (__mmask8 __U, __m512i __A, __m512i __B) 4546309124Sdim{ 4547309124Sdim return (__m512i)__builtin_ia32_selectq_512((__mmask8) __U, 4548309124Sdim (__v8di)_mm512_unpacklo_epi64(__A, __B), 4549309124Sdim (__v8di)_mm512_setzero_si512()); 4550309124Sdim} 4551309124Sdim 4552277325Sdim/* Bit Test */ 4553277325Sdim 4554288943Sdimstatic __inline __mmask16 __DEFAULT_FN_ATTRS 4555277325Sdim_mm512_test_epi32_mask(__m512i __A, __m512i __B) 4556277325Sdim{ 4557277325Sdim return (__mmask16) __builtin_ia32_ptestmd512 ((__v16si) __A, 4558277325Sdim (__v16si) __B, 4559277325Sdim (__mmask16) -1); 4560277325Sdim} 4561277325Sdim 4562309124Sdimstatic __inline__ __mmask16 __DEFAULT_FN_ATTRS 4563309124Sdim_mm512_mask_test_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B) 4564309124Sdim{ 4565309124Sdim return (__mmask16) __builtin_ia32_ptestmd512 ((__v16si) __A, 4566309124Sdim (__v16si) __B, __U); 4567309124Sdim} 4568309124Sdim 4569288943Sdimstatic __inline __mmask8 __DEFAULT_FN_ATTRS 4570277325Sdim_mm512_test_epi64_mask(__m512i __A, __m512i __B) 4571277325Sdim{ 4572277325Sdim return (__mmask8) __builtin_ia32_ptestmq512 ((__v8di) __A, 4573277325Sdim (__v8di) __B, 4574277325Sdim (__mmask8) -1); 4575277325Sdim} 4576277325Sdim 4577309124Sdimstatic __inline__ __mmask8 __DEFAULT_FN_ATTRS 4578309124Sdim_mm512_mask_test_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B) 4579309124Sdim{ 4580309124Sdim return (__mmask8) __builtin_ia32_ptestmq512 ((__v8di) __A, (__v8di) __B, __U); 4581309124Sdim} 4582309124Sdim 4583309124Sdim 4584277325Sdim/* SIMD load ops */ 4585277325Sdim 4586288943Sdimstatic __inline __m512i __DEFAULT_FN_ATTRS 4587309124Sdim_mm512_loadu_si512 (void const *__P) 4588309124Sdim{ 4589309124Sdim return (__m512i) __builtin_ia32_loaddqusi512_mask ((const int *) __P, 4590309124Sdim (__v16si) 4591309124Sdim _mm512_setzero_si512 (), 4592309124Sdim (__mmask16) -1); 4593309124Sdim} 4594309124Sdim 4595309124Sdimstatic __inline __m512i __DEFAULT_FN_ATTRS 4596309124Sdim_mm512_mask_loadu_epi32 (__m512i __W, __mmask16 __U, void const *__P) 4597309124Sdim{ 4598309124Sdim return (__m512i) __builtin_ia32_loaddqusi512_mask ((const int *) __P, 4599309124Sdim (__v16si) __W, 4600309124Sdim (__mmask16) __U); 4601309124Sdim} 4602309124Sdim 4603309124Sdim 4604309124Sdimstatic __inline __m512i __DEFAULT_FN_ATTRS 4605277325Sdim_mm512_maskz_loadu_epi32(__mmask16 __U, void const *__P) 4606277325Sdim{ 4607309124Sdim return (__m512i) __builtin_ia32_loaddqusi512_mask ((const int *)__P, 4608277325Sdim (__v16si) 4609277325Sdim _mm512_setzero_si512 (), 4610277325Sdim (__mmask16) __U); 4611277325Sdim} 4612277325Sdim 4613288943Sdimstatic __inline __m512i __DEFAULT_FN_ATTRS 4614309124Sdim_mm512_mask_loadu_epi64 (__m512i __W, __mmask8 __U, void const *__P) 4615309124Sdim{ 4616309124Sdim return (__m512i) __builtin_ia32_loaddqudi512_mask ((const long long *) __P, 4617309124Sdim (__v8di) __W, 4618309124Sdim (__mmask8) __U); 4619309124Sdim} 4620309124Sdim 4621309124Sdimstatic __inline __m512i __DEFAULT_FN_ATTRS 4622277325Sdim_mm512_maskz_loadu_epi64(__mmask8 __U, void const *__P) 4623277325Sdim{ 4624309124Sdim return (__m512i) __builtin_ia32_loaddqudi512_mask ((const long long *)__P, 4625277325Sdim (__v8di) 4626277325Sdim _mm512_setzero_si512 (), 4627277325Sdim (__mmask8) __U); 4628277325Sdim} 4629277325Sdim 4630288943Sdimstatic __inline __m512 __DEFAULT_FN_ATTRS 4631309124Sdim_mm512_mask_loadu_ps (__m512 __W, __mmask16 __U, void const *__P) 4632309124Sdim{ 4633309124Sdim return (__m512) __builtin_ia32_loadups512_mask ((const float *) __P, 4634309124Sdim (__v16sf) __W, 4635309124Sdim (__mmask16) __U); 4636309124Sdim} 4637309124Sdim 4638309124Sdimstatic __inline __m512 __DEFAULT_FN_ATTRS 4639277325Sdim_mm512_maskz_loadu_ps(__mmask16 __U, void const *__P) 4640277325Sdim{ 4641309124Sdim return (__m512) __builtin_ia32_loadups512_mask ((const float *)__P, 4642277325Sdim (__v16sf) 4643277325Sdim _mm512_setzero_ps (), 4644277325Sdim (__mmask16) __U); 4645277325Sdim} 4646277325Sdim 4647288943Sdimstatic __inline __m512d __DEFAULT_FN_ATTRS 4648309124Sdim_mm512_mask_loadu_pd (__m512d __W, __mmask8 __U, void const *__P) 4649277325Sdim{ 4650309124Sdim return (__m512d) __builtin_ia32_loadupd512_mask ((const double *) __P, 4651309124Sdim (__v8df) __W, 4652309124Sdim (__mmask8) __U); 4653277325Sdim} 4654277325Sdim 4655288943Sdimstatic __inline __m512d __DEFAULT_FN_ATTRS 4656309124Sdim_mm512_maskz_loadu_pd(__mmask8 __U, void const *__P) 4657288943Sdim{ 4658309124Sdim return (__m512d) __builtin_ia32_loadupd512_mask ((const double *)__P, 4659288943Sdim (__v8df) 4660288943Sdim _mm512_setzero_pd (), 4661288943Sdim (__mmask8) __U); 4662288943Sdim} 4663288943Sdim 4664288943Sdimstatic __inline __m512d __DEFAULT_FN_ATTRS 4665321369Sdim_mm512_loadu_pd(void const *__p) 4666277325Sdim{ 4667277325Sdim struct __loadu_pd { 4668277325Sdim __m512d __v; 4669288943Sdim } __attribute__((__packed__, __may_alias__)); 4670277325Sdim return ((struct __loadu_pd*)__p)->__v; 4671277325Sdim} 4672277325Sdim 4673288943Sdimstatic __inline __m512 __DEFAULT_FN_ATTRS 4674321369Sdim_mm512_loadu_ps(void const *__p) 4675277325Sdim{ 4676277325Sdim struct __loadu_ps { 4677277325Sdim __m512 __v; 4678288943Sdim } __attribute__((__packed__, __may_alias__)); 4679277325Sdim return ((struct __loadu_ps*)__p)->__v; 4680277325Sdim} 4681277325Sdim 4682288943Sdimstatic __inline __m512 __DEFAULT_FN_ATTRS 4683321369Sdim_mm512_load_ps(void const *__p) 4684288943Sdim{ 4685288943Sdim return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *)__p, 4686288943Sdim (__v16sf) 4687288943Sdim _mm512_setzero_ps (), 4688288943Sdim (__mmask16) -1); 4689288943Sdim} 4690288943Sdim 4691309124Sdimstatic __inline __m512 __DEFAULT_FN_ATTRS 4692309124Sdim_mm512_mask_load_ps (__m512 __W, __mmask16 __U, void const *__P) 4693309124Sdim{ 4694309124Sdim return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *) __P, 4695309124Sdim (__v16sf) __W, 4696309124Sdim (__mmask16) __U); 4697309124Sdim} 4698309124Sdim 4699309124Sdimstatic __inline __m512 __DEFAULT_FN_ATTRS 4700309124Sdim_mm512_maskz_load_ps(__mmask16 __U, void const *__P) 4701309124Sdim{ 4702309124Sdim return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *)__P, 4703309124Sdim (__v16sf) 4704309124Sdim _mm512_setzero_ps (), 4705309124Sdim (__mmask16) __U); 4706309124Sdim} 4707309124Sdim 4708288943Sdimstatic __inline __m512d __DEFAULT_FN_ATTRS 4709321369Sdim_mm512_load_pd(void const *__p) 4710288943Sdim{ 4711288943Sdim return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df *)__p, 4712288943Sdim (__v8df) 4713288943Sdim _mm512_setzero_pd (), 4714288943Sdim (__mmask8) -1); 4715288943Sdim} 4716288943Sdim 4717309124Sdimstatic __inline __m512d __DEFAULT_FN_ATTRS 4718309124Sdim_mm512_mask_load_pd (__m512d __W, __mmask8 __U, void const *__P) 4719309124Sdim{ 4720309124Sdim return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df *) __P, 4721309124Sdim (__v8df) __W, 4722309124Sdim (__mmask8) __U); 4723309124Sdim} 4724309124Sdim 4725309124Sdimstatic __inline __m512d __DEFAULT_FN_ATTRS 4726309124Sdim_mm512_maskz_load_pd(__mmask8 __U, void const *__P) 4727309124Sdim{ 4728309124Sdim return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df *)__P, 4729309124Sdim (__v8df) 4730309124Sdim _mm512_setzero_pd (), 4731309124Sdim (__mmask8) __U); 4732309124Sdim} 4733309124Sdim 4734309124Sdimstatic __inline __m512i __DEFAULT_FN_ATTRS 4735309124Sdim_mm512_load_si512 (void const *__P) 4736309124Sdim{ 4737309124Sdim return *(__m512i *) __P; 4738309124Sdim} 4739309124Sdim 4740309124Sdimstatic __inline __m512i __DEFAULT_FN_ATTRS 4741309124Sdim_mm512_load_epi32 (void const *__P) 4742309124Sdim{ 4743309124Sdim return *(__m512i *) __P; 4744309124Sdim} 4745309124Sdim 4746309124Sdimstatic __inline __m512i __DEFAULT_FN_ATTRS 4747309124Sdim_mm512_load_epi64 (void const *__P) 4748309124Sdim{ 4749309124Sdim return *(__m512i *) __P; 4750309124Sdim} 4751309124Sdim 4752277325Sdim/* SIMD store ops */ 4753277325Sdim 4754288943Sdimstatic __inline void __DEFAULT_FN_ATTRS 4755277325Sdim_mm512_mask_storeu_epi64(void *__P, __mmask8 __U, __m512i __A) 4756277325Sdim{ 4757309124Sdim __builtin_ia32_storedqudi512_mask ((long long *)__P, (__v8di) __A, 4758277325Sdim (__mmask8) __U); 4759277325Sdim} 4760277325Sdim 4761288943Sdimstatic __inline void __DEFAULT_FN_ATTRS 4762309124Sdim_mm512_storeu_si512 (void *__P, __m512i __A) 4763309124Sdim{ 4764309124Sdim __builtin_ia32_storedqusi512_mask ((int *) __P, (__v16si) __A, 4765309124Sdim (__mmask16) -1); 4766309124Sdim} 4767309124Sdim 4768309124Sdimstatic __inline void __DEFAULT_FN_ATTRS 4769277325Sdim_mm512_mask_storeu_epi32(void *__P, __mmask16 __U, __m512i __A) 4770277325Sdim{ 4771309124Sdim __builtin_ia32_storedqusi512_mask ((int *)__P, (__v16si) __A, 4772277325Sdim (__mmask16) __U); 4773277325Sdim} 4774277325Sdim 4775288943Sdimstatic __inline void __DEFAULT_FN_ATTRS 4776277325Sdim_mm512_mask_storeu_pd(void *__P, __mmask8 __U, __m512d __A) 4777277325Sdim{ 4778309124Sdim __builtin_ia32_storeupd512_mask ((double *)__P, (__v8df) __A, (__mmask8) __U); 4779277325Sdim} 4780277325Sdim 4781288943Sdimstatic __inline void __DEFAULT_FN_ATTRS 4782277325Sdim_mm512_storeu_pd(void *__P, __m512d __A) 4783277325Sdim{ 4784309124Sdim __builtin_ia32_storeupd512_mask((double *)__P, (__v8df)__A, (__mmask8)-1); 4785277325Sdim} 4786277325Sdim 4787288943Sdimstatic __inline void __DEFAULT_FN_ATTRS 4788277325Sdim_mm512_mask_storeu_ps(void *__P, __mmask16 __U, __m512 __A) 4789277325Sdim{ 4790309124Sdim __builtin_ia32_storeups512_mask ((float *)__P, (__v16sf) __A, 4791277325Sdim (__mmask16) __U); 4792277325Sdim} 4793277325Sdim 4794288943Sdimstatic __inline void __DEFAULT_FN_ATTRS 4795277325Sdim_mm512_storeu_ps(void *__P, __m512 __A) 4796277325Sdim{ 4797309124Sdim __builtin_ia32_storeups512_mask((float *)__P, (__v16sf)__A, (__mmask16)-1); 4798277325Sdim} 4799277325Sdim 4800288943Sdimstatic __inline void __DEFAULT_FN_ATTRS 4801288943Sdim_mm512_mask_store_pd(void *__P, __mmask8 __U, __m512d __A) 4802277325Sdim{ 4803288943Sdim __builtin_ia32_storeapd512_mask ((__v8df *)__P, (__v8df) __A, (__mmask8) __U); 4804277325Sdim} 4805277325Sdim 4806288943Sdimstatic __inline void __DEFAULT_FN_ATTRS 4807277325Sdim_mm512_store_pd(void *__P, __m512d __A) 4808277325Sdim{ 4809277325Sdim *(__m512d*)__P = __A; 4810277325Sdim} 4811277325Sdim 4812288943Sdimstatic __inline void __DEFAULT_FN_ATTRS 4813288943Sdim_mm512_mask_store_ps(void *__P, __mmask16 __U, __m512 __A) 4814288943Sdim{ 4815288943Sdim __builtin_ia32_storeaps512_mask ((__v16sf *)__P, (__v16sf) __A, 4816288943Sdim (__mmask16) __U); 4817288943Sdim} 4818288943Sdim 4819288943Sdimstatic __inline void __DEFAULT_FN_ATTRS 4820288943Sdim_mm512_store_ps(void *__P, __m512 __A) 4821288943Sdim{ 4822288943Sdim *(__m512*)__P = __A; 4823288943Sdim} 4824288943Sdim 4825309124Sdimstatic __inline void __DEFAULT_FN_ATTRS 4826309124Sdim_mm512_store_si512 (void *__P, __m512i __A) 4827309124Sdim{ 4828309124Sdim *(__m512i *) __P = __A; 4829309124Sdim} 4830309124Sdim 4831309124Sdimstatic __inline void __DEFAULT_FN_ATTRS 4832309124Sdim_mm512_store_epi32 (void *__P, __m512i __A) 4833309124Sdim{ 4834309124Sdim *(__m512i *) __P = __A; 4835309124Sdim} 4836309124Sdim 4837309124Sdimstatic __inline void __DEFAULT_FN_ATTRS 4838309124Sdim_mm512_store_epi64 (void *__P, __m512i __A) 4839309124Sdim{ 4840309124Sdim *(__m512i *) __P = __A; 4841309124Sdim} 4842309124Sdim 4843277325Sdim/* Mask ops */ 4844277325Sdim 4845288943Sdimstatic __inline __mmask16 __DEFAULT_FN_ATTRS 4846277325Sdim_mm512_knot(__mmask16 __M) 4847277325Sdim{ 4848277325Sdim return __builtin_ia32_knothi(__M); 4849277325Sdim} 4850277325Sdim 4851277325Sdim/* Integer compare */ 4852277325Sdim 4853288943Sdimstatic __inline__ __mmask16 __DEFAULT_FN_ATTRS 4854277325Sdim_mm512_cmpeq_epi32_mask(__m512i __a, __m512i __b) { 4855277325Sdim return (__mmask16)__builtin_ia32_pcmpeqd512_mask((__v16si)__a, (__v16si)__b, 4856277325Sdim (__mmask16)-1); 4857277325Sdim} 4858277325Sdim 4859288943Sdimstatic __inline__ __mmask16 __DEFAULT_FN_ATTRS 4860277325Sdim_mm512_mask_cmpeq_epi32_mask(__mmask16 __u, __m512i __a, __m512i __b) { 4861277325Sdim return (__mmask16)__builtin_ia32_pcmpeqd512_mask((__v16si)__a, (__v16si)__b, 4862277325Sdim __u); 4863277325Sdim} 4864277325Sdim 4865288943Sdimstatic __inline__ __mmask16 __DEFAULT_FN_ATTRS 4866288943Sdim_mm512_cmpeq_epu32_mask(__m512i __a, __m512i __b) { 4867288943Sdim return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 0, 4868288943Sdim (__mmask16)-1); 4869288943Sdim} 4870288943Sdim 4871288943Sdimstatic __inline__ __mmask16 __DEFAULT_FN_ATTRS 4872288943Sdim_mm512_mask_cmpeq_epu32_mask(__mmask16 __u, __m512i __a, __m512i __b) { 4873288943Sdim return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 0, 4874288943Sdim __u); 4875288943Sdim} 4876288943Sdim 4877288943Sdimstatic __inline__ __mmask8 __DEFAULT_FN_ATTRS 4878277325Sdim_mm512_mask_cmpeq_epi64_mask(__mmask8 __u, __m512i __a, __m512i __b) { 4879277325Sdim return (__mmask8)__builtin_ia32_pcmpeqq512_mask((__v8di)__a, (__v8di)__b, 4880277325Sdim __u); 4881277325Sdim} 4882277325Sdim 4883288943Sdimstatic __inline__ __mmask8 __DEFAULT_FN_ATTRS 4884277325Sdim_mm512_cmpeq_epi64_mask(__m512i __a, __m512i __b) { 4885277325Sdim return (__mmask8)__builtin_ia32_pcmpeqq512_mask((__v8di)__a, (__v8di)__b, 4886277325Sdim (__mmask8)-1); 4887277325Sdim} 4888277325Sdim 4889288943Sdimstatic __inline__ __mmask8 __DEFAULT_FN_ATTRS 4890288943Sdim_mm512_cmpeq_epu64_mask(__m512i __a, __m512i __b) { 4891288943Sdim return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 0, 4892288943Sdim (__mmask8)-1); 4893288943Sdim} 4894288943Sdim 4895288943Sdimstatic __inline__ __mmask8 __DEFAULT_FN_ATTRS 4896288943Sdim_mm512_mask_cmpeq_epu64_mask(__mmask8 __u, __m512i __a, __m512i __b) { 4897288943Sdim return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 0, 4898288943Sdim __u); 4899288943Sdim} 4900288943Sdim 4901288943Sdimstatic __inline__ __mmask16 __DEFAULT_FN_ATTRS 4902288943Sdim_mm512_cmpge_epi32_mask(__m512i __a, __m512i __b) { 4903288943Sdim return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 5, 4904288943Sdim (__mmask16)-1); 4905288943Sdim} 4906288943Sdim 4907288943Sdimstatic __inline__ __mmask16 __DEFAULT_FN_ATTRS 4908288943Sdim_mm512_mask_cmpge_epi32_mask(__mmask16 __u, __m512i __a, __m512i __b) { 4909288943Sdim return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 5, 4910288943Sdim __u); 4911288943Sdim} 4912288943Sdim 4913288943Sdimstatic __inline__ __mmask16 __DEFAULT_FN_ATTRS 4914288943Sdim_mm512_cmpge_epu32_mask(__m512i __a, __m512i __b) { 4915288943Sdim return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 5, 4916288943Sdim (__mmask16)-1); 4917288943Sdim} 4918288943Sdim 4919288943Sdimstatic __inline__ __mmask16 __DEFAULT_FN_ATTRS 4920288943Sdim_mm512_mask_cmpge_epu32_mask(__mmask16 __u, __m512i __a, __m512i __b) { 4921288943Sdim return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 5, 4922288943Sdim __u); 4923288943Sdim} 4924288943Sdim 4925288943Sdimstatic __inline__ __mmask8 __DEFAULT_FN_ATTRS 4926288943Sdim_mm512_cmpge_epi64_mask(__m512i __a, __m512i __b) { 4927288943Sdim return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 5, 4928288943Sdim (__mmask8)-1); 4929288943Sdim} 4930288943Sdim 4931288943Sdimstatic __inline__ __mmask8 __DEFAULT_FN_ATTRS 4932288943Sdim_mm512_mask_cmpge_epi64_mask(__mmask8 __u, __m512i __a, __m512i __b) { 4933288943Sdim return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 5, 4934288943Sdim __u); 4935288943Sdim} 4936288943Sdim 4937288943Sdimstatic __inline__ __mmask8 __DEFAULT_FN_ATTRS 4938288943Sdim_mm512_cmpge_epu64_mask(__m512i __a, __m512i __b) { 4939288943Sdim return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 5, 4940288943Sdim (__mmask8)-1); 4941288943Sdim} 4942288943Sdim 4943288943Sdimstatic __inline__ __mmask8 __DEFAULT_FN_ATTRS 4944288943Sdim_mm512_mask_cmpge_epu64_mask(__mmask8 __u, __m512i __a, __m512i __b) { 4945288943Sdim return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 5, 4946288943Sdim __u); 4947288943Sdim} 4948288943Sdim 4949288943Sdimstatic __inline__ __mmask16 __DEFAULT_FN_ATTRS 4950288943Sdim_mm512_cmpgt_epi32_mask(__m512i __a, __m512i __b) { 4951288943Sdim return (__mmask16)__builtin_ia32_pcmpgtd512_mask((__v16si)__a, (__v16si)__b, 4952288943Sdim (__mmask16)-1); 4953288943Sdim} 4954288943Sdim 4955288943Sdimstatic __inline__ __mmask16 __DEFAULT_FN_ATTRS 4956288943Sdim_mm512_mask_cmpgt_epi32_mask(__mmask16 __u, __m512i __a, __m512i __b) { 4957288943Sdim return (__mmask16)__builtin_ia32_pcmpgtd512_mask((__v16si)__a, (__v16si)__b, 4958288943Sdim __u); 4959288943Sdim} 4960288943Sdim 4961288943Sdimstatic __inline__ __mmask16 __DEFAULT_FN_ATTRS 4962288943Sdim_mm512_cmpgt_epu32_mask(__m512i __a, __m512i __b) { 4963288943Sdim return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 6, 4964288943Sdim (__mmask16)-1); 4965288943Sdim} 4966288943Sdim 4967288943Sdimstatic __inline__ __mmask16 __DEFAULT_FN_ATTRS 4968288943Sdim_mm512_mask_cmpgt_epu32_mask(__mmask16 __u, __m512i __a, __m512i __b) { 4969288943Sdim return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 6, 4970288943Sdim __u); 4971288943Sdim} 4972288943Sdim 4973288943Sdimstatic __inline__ __mmask8 __DEFAULT_FN_ATTRS 4974288943Sdim_mm512_mask_cmpgt_epi64_mask(__mmask8 __u, __m512i __a, __m512i __b) { 4975288943Sdim return (__mmask8)__builtin_ia32_pcmpgtq512_mask((__v8di)__a, (__v8di)__b, 4976288943Sdim __u); 4977288943Sdim} 4978288943Sdim 4979288943Sdimstatic __inline__ __mmask8 __DEFAULT_FN_ATTRS 4980288943Sdim_mm512_cmpgt_epi64_mask(__m512i __a, __m512i __b) { 4981288943Sdim return (__mmask8)__builtin_ia32_pcmpgtq512_mask((__v8di)__a, (__v8di)__b, 4982288943Sdim (__mmask8)-1); 4983288943Sdim} 4984288943Sdim 4985288943Sdimstatic __inline__ __mmask8 __DEFAULT_FN_ATTRS 4986288943Sdim_mm512_cmpgt_epu64_mask(__m512i __a, __m512i __b) { 4987288943Sdim return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 6, 4988288943Sdim (__mmask8)-1); 4989288943Sdim} 4990288943Sdim 4991288943Sdimstatic __inline__ __mmask8 __DEFAULT_FN_ATTRS 4992288943Sdim_mm512_mask_cmpgt_epu64_mask(__mmask8 __u, __m512i __a, __m512i __b) { 4993288943Sdim return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 6, 4994288943Sdim __u); 4995288943Sdim} 4996288943Sdim 4997288943Sdimstatic __inline__ __mmask16 __DEFAULT_FN_ATTRS 4998288943Sdim_mm512_cmple_epi32_mask(__m512i __a, __m512i __b) { 4999288943Sdim return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 2, 5000288943Sdim (__mmask16)-1); 5001288943Sdim} 5002288943Sdim 5003288943Sdimstatic __inline__ __mmask16 __DEFAULT_FN_ATTRS 5004288943Sdim_mm512_mask_cmple_epi32_mask(__mmask16 __u, __m512i __a, __m512i __b) { 5005288943Sdim return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 2, 5006288943Sdim __u); 5007288943Sdim} 5008288943Sdim 5009288943Sdimstatic __inline__ __mmask16 __DEFAULT_FN_ATTRS 5010288943Sdim_mm512_cmple_epu32_mask(__m512i __a, __m512i __b) { 5011288943Sdim return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 2, 5012288943Sdim (__mmask16)-1); 5013288943Sdim} 5014288943Sdim 5015288943Sdimstatic __inline__ __mmask16 __DEFAULT_FN_ATTRS 5016288943Sdim_mm512_mask_cmple_epu32_mask(__mmask16 __u, __m512i __a, __m512i __b) { 5017288943Sdim return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 2, 5018288943Sdim __u); 5019288943Sdim} 5020288943Sdim 5021288943Sdimstatic __inline__ __mmask8 __DEFAULT_FN_ATTRS 5022288943Sdim_mm512_cmple_epi64_mask(__m512i __a, __m512i __b) { 5023288943Sdim return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 2, 5024288943Sdim (__mmask8)-1); 5025288943Sdim} 5026288943Sdim 5027288943Sdimstatic __inline__ __mmask8 __DEFAULT_FN_ATTRS 5028288943Sdim_mm512_mask_cmple_epi64_mask(__mmask8 __u, __m512i __a, __m512i __b) { 5029288943Sdim return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 2, 5030288943Sdim __u); 5031288943Sdim} 5032288943Sdim 5033288943Sdimstatic __inline__ __mmask8 __DEFAULT_FN_ATTRS 5034288943Sdim_mm512_cmple_epu64_mask(__m512i __a, __m512i __b) { 5035288943Sdim return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 2, 5036288943Sdim (__mmask8)-1); 5037288943Sdim} 5038288943Sdim 5039288943Sdimstatic __inline__ __mmask8 __DEFAULT_FN_ATTRS 5040288943Sdim_mm512_mask_cmple_epu64_mask(__mmask8 __u, __m512i __a, __m512i __b) { 5041288943Sdim return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 2, 5042288943Sdim __u); 5043288943Sdim} 5044288943Sdim 5045288943Sdimstatic __inline__ __mmask16 __DEFAULT_FN_ATTRS 5046288943Sdim_mm512_cmplt_epi32_mask(__m512i __a, __m512i __b) { 5047288943Sdim return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 1, 5048288943Sdim (__mmask16)-1); 5049288943Sdim} 5050288943Sdim 5051288943Sdimstatic __inline__ __mmask16 __DEFAULT_FN_ATTRS 5052288943Sdim_mm512_mask_cmplt_epi32_mask(__mmask16 __u, __m512i __a, __m512i __b) { 5053288943Sdim return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 1, 5054288943Sdim __u); 5055288943Sdim} 5056288943Sdim 5057288943Sdimstatic __inline__ __mmask16 __DEFAULT_FN_ATTRS 5058288943Sdim_mm512_cmplt_epu32_mask(__m512i __a, __m512i __b) { 5059288943Sdim return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 1, 5060288943Sdim (__mmask16)-1); 5061288943Sdim} 5062288943Sdim 5063288943Sdimstatic __inline__ __mmask16 __DEFAULT_FN_ATTRS 5064288943Sdim_mm512_mask_cmplt_epu32_mask(__mmask16 __u, __m512i __a, __m512i __b) { 5065288943Sdim return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 1, 5066288943Sdim __u); 5067288943Sdim} 5068288943Sdim 5069288943Sdimstatic __inline__ __mmask8 __DEFAULT_FN_ATTRS 5070288943Sdim_mm512_cmplt_epi64_mask(__m512i __a, __m512i __b) { 5071288943Sdim return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 1, 5072288943Sdim (__mmask8)-1); 5073288943Sdim} 5074288943Sdim 5075288943Sdimstatic __inline__ __mmask8 __DEFAULT_FN_ATTRS 5076288943Sdim_mm512_mask_cmplt_epi64_mask(__mmask8 __u, __m512i __a, __m512i __b) { 5077288943Sdim return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 1, 5078288943Sdim __u); 5079288943Sdim} 5080288943Sdim 5081288943Sdimstatic __inline__ __mmask8 __DEFAULT_FN_ATTRS 5082288943Sdim_mm512_cmplt_epu64_mask(__m512i __a, __m512i __b) { 5083288943Sdim return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 1, 5084288943Sdim (__mmask8)-1); 5085288943Sdim} 5086288943Sdim 5087288943Sdimstatic __inline__ __mmask8 __DEFAULT_FN_ATTRS 5088288943Sdim_mm512_mask_cmplt_epu64_mask(__mmask8 __u, __m512i __a, __m512i __b) { 5089288943Sdim return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 1, 5090288943Sdim __u); 5091288943Sdim} 5092288943Sdim 5093288943Sdimstatic __inline__ __mmask16 __DEFAULT_FN_ATTRS 5094288943Sdim_mm512_cmpneq_epi32_mask(__m512i __a, __m512i __b) { 5095288943Sdim return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 4, 5096288943Sdim (__mmask16)-1); 5097288943Sdim} 5098288943Sdim 5099288943Sdimstatic __inline__ __mmask16 __DEFAULT_FN_ATTRS 5100288943Sdim_mm512_mask_cmpneq_epi32_mask(__mmask16 __u, __m512i __a, __m512i __b) { 5101288943Sdim return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 4, 5102288943Sdim __u); 5103288943Sdim} 5104288943Sdim 5105288943Sdimstatic __inline__ __mmask16 __DEFAULT_FN_ATTRS 5106288943Sdim_mm512_cmpneq_epu32_mask(__m512i __a, __m512i __b) { 5107288943Sdim return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 4, 5108288943Sdim (__mmask16)-1); 5109288943Sdim} 5110288943Sdim 5111288943Sdimstatic __inline__ __mmask16 __DEFAULT_FN_ATTRS 5112288943Sdim_mm512_mask_cmpneq_epu32_mask(__mmask16 __u, __m512i __a, __m512i __b) { 5113288943Sdim return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 4, 5114288943Sdim __u); 5115288943Sdim} 5116288943Sdim 5117288943Sdimstatic __inline__ __mmask8 __DEFAULT_FN_ATTRS 5118288943Sdim_mm512_cmpneq_epi64_mask(__m512i __a, __m512i __b) { 5119288943Sdim return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 4, 5120288943Sdim (__mmask8)-1); 5121288943Sdim} 5122288943Sdim 5123288943Sdimstatic __inline__ __mmask8 __DEFAULT_FN_ATTRS 5124288943Sdim_mm512_mask_cmpneq_epi64_mask(__mmask8 __u, __m512i __a, __m512i __b) { 5125288943Sdim return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 4, 5126288943Sdim __u); 5127288943Sdim} 5128288943Sdim 5129288943Sdimstatic __inline__ __mmask8 __DEFAULT_FN_ATTRS 5130288943Sdim_mm512_cmpneq_epu64_mask(__m512i __a, __m512i __b) { 5131288943Sdim return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 4, 5132288943Sdim (__mmask8)-1); 5133288943Sdim} 5134288943Sdim 5135288943Sdimstatic __inline__ __mmask8 __DEFAULT_FN_ATTRS 5136288943Sdim_mm512_mask_cmpneq_epu64_mask(__mmask8 __u, __m512i __a, __m512i __b) { 5137288943Sdim return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 4, 5138288943Sdim __u); 5139288943Sdim} 5140288943Sdim 5141309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 5142314564Sdim_mm512_cvtepi8_epi32(__m128i __A) 5143309124Sdim{ 5144314564Sdim /* This function always performs a signed extension, but __v16qi is a char 5145314564Sdim which may be signed or unsigned, so use __v16qs. */ 5146314564Sdim return (__m512i)__builtin_convertvector((__v16qs)__A, __v16si); 5147309124Sdim} 5148309124Sdim 5149309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 5150314564Sdim_mm512_mask_cvtepi8_epi32(__m512i __W, __mmask16 __U, __m128i __A) 5151309124Sdim{ 5152314564Sdim return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, 5153314564Sdim (__v16si)_mm512_cvtepi8_epi32(__A), 5154314564Sdim (__v16si)__W); 5155309124Sdim} 5156309124Sdim 5157309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 5158314564Sdim_mm512_maskz_cvtepi8_epi32(__mmask16 __U, __m128i __A) 5159309124Sdim{ 5160314564Sdim return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, 5161314564Sdim (__v16si)_mm512_cvtepi8_epi32(__A), 5162314564Sdim (__v16si)_mm512_setzero_si512()); 5163309124Sdim} 5164309124Sdim 5165309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 5166314564Sdim_mm512_cvtepi8_epi64(__m128i __A) 5167309124Sdim{ 5168314564Sdim /* This function always performs a signed extension, but __v16qi is a char 5169314564Sdim which may be signed or unsigned, so use __v16qs. */ 5170314564Sdim return (__m512i)__builtin_convertvector(__builtin_shufflevector((__v16qs)__A, (__v16qs)__A, 0, 1, 2, 3, 4, 5, 6, 7), __v8di); 5171309124Sdim} 5172309124Sdim 5173309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 5174314564Sdim_mm512_mask_cvtepi8_epi64(__m512i __W, __mmask8 __U, __m128i __A) 5175309124Sdim{ 5176314564Sdim return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, 5177314564Sdim (__v8di)_mm512_cvtepi8_epi64(__A), 5178314564Sdim (__v8di)__W); 5179309124Sdim} 5180309124Sdim 5181309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 5182314564Sdim_mm512_maskz_cvtepi8_epi64(__mmask8 __U, __m128i __A) 5183309124Sdim{ 5184314564Sdim return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, 5185314564Sdim (__v8di)_mm512_cvtepi8_epi64(__A), 5186314564Sdim (__v8di)_mm512_setzero_si512 ()); 5187309124Sdim} 5188309124Sdim 5189309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 5190314564Sdim_mm512_cvtepi32_epi64(__m256i __X) 5191309124Sdim{ 5192314564Sdim return (__m512i)__builtin_convertvector((__v8si)__X, __v8di); 5193309124Sdim} 5194309124Sdim 5195309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 5196314564Sdim_mm512_mask_cvtepi32_epi64(__m512i __W, __mmask8 __U, __m256i __X) 5197309124Sdim{ 5198314564Sdim return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, 5199314564Sdim (__v8di)_mm512_cvtepi32_epi64(__X), 5200314564Sdim (__v8di)__W); 5201309124Sdim} 5202309124Sdim 5203309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 5204314564Sdim_mm512_maskz_cvtepi32_epi64(__mmask8 __U, __m256i __X) 5205309124Sdim{ 5206314564Sdim return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, 5207314564Sdim (__v8di)_mm512_cvtepi32_epi64(__X), 5208314564Sdim (__v8di)_mm512_setzero_si512()); 5209309124Sdim} 5210309124Sdim 5211309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 5212314564Sdim_mm512_cvtepi16_epi32(__m256i __A) 5213309124Sdim{ 5214314564Sdim return (__m512i)__builtin_convertvector((__v16hi)__A, __v16si); 5215309124Sdim} 5216309124Sdim 5217309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 5218314564Sdim_mm512_mask_cvtepi16_epi32(__m512i __W, __mmask16 __U, __m256i __A) 5219309124Sdim{ 5220314564Sdim return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, 5221314564Sdim (__v16si)_mm512_cvtepi16_epi32(__A), 5222314564Sdim (__v16si)__W); 5223309124Sdim} 5224309124Sdim 5225309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 5226314564Sdim_mm512_maskz_cvtepi16_epi32(__mmask16 __U, __m256i __A) 5227309124Sdim{ 5228314564Sdim return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, 5229314564Sdim (__v16si)_mm512_cvtepi16_epi32(__A), 5230314564Sdim (__v16si)_mm512_setzero_si512 ()); 5231309124Sdim} 5232309124Sdim 5233309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 5234314564Sdim_mm512_cvtepi16_epi64(__m128i __A) 5235309124Sdim{ 5236314564Sdim return (__m512i)__builtin_convertvector((__v8hi)__A, __v8di); 5237309124Sdim} 5238309124Sdim 5239309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 5240314564Sdim_mm512_mask_cvtepi16_epi64(__m512i __W, __mmask8 __U, __m128i __A) 5241309124Sdim{ 5242314564Sdim return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, 5243314564Sdim (__v8di)_mm512_cvtepi16_epi64(__A), 5244314564Sdim (__v8di)__W); 5245309124Sdim} 5246309124Sdim 5247309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 5248314564Sdim_mm512_maskz_cvtepi16_epi64(__mmask8 __U, __m128i __A) 5249309124Sdim{ 5250314564Sdim return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, 5251314564Sdim (__v8di)_mm512_cvtepi16_epi64(__A), 5252314564Sdim (__v8di)_mm512_setzero_si512()); 5253309124Sdim} 5254309124Sdim 5255309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 5256314564Sdim_mm512_cvtepu8_epi32(__m128i __A) 5257309124Sdim{ 5258314564Sdim return (__m512i)__builtin_convertvector((__v16qu)__A, __v16si); 5259309124Sdim} 5260309124Sdim 5261309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 5262314564Sdim_mm512_mask_cvtepu8_epi32(__m512i __W, __mmask16 __U, __m128i __A) 5263309124Sdim{ 5264314564Sdim return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, 5265314564Sdim (__v16si)_mm512_cvtepu8_epi32(__A), 5266314564Sdim (__v16si)__W); 5267309124Sdim} 5268309124Sdim 5269309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 5270314564Sdim_mm512_maskz_cvtepu8_epi32(__mmask16 __U, __m128i __A) 5271309124Sdim{ 5272314564Sdim return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, 5273314564Sdim (__v16si)_mm512_cvtepu8_epi32(__A), 5274314564Sdim (__v16si)_mm512_setzero_si512()); 5275309124Sdim} 5276309124Sdim 5277309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 5278314564Sdim_mm512_cvtepu8_epi64(__m128i __A) 5279309124Sdim{ 5280314564Sdim return (__m512i)__builtin_convertvector(__builtin_shufflevector((__v16qu)__A, (__v16qu)__A, 0, 1, 2, 3, 4, 5, 6, 7), __v8di); 5281309124Sdim} 5282309124Sdim 5283309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 5284314564Sdim_mm512_mask_cvtepu8_epi64(__m512i __W, __mmask8 __U, __m128i __A) 5285309124Sdim{ 5286314564Sdim return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, 5287314564Sdim (__v8di)_mm512_cvtepu8_epi64(__A), 5288314564Sdim (__v8di)__W); 5289309124Sdim} 5290309124Sdim 5291309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 5292314564Sdim_mm512_maskz_cvtepu8_epi64(__mmask8 __U, __m128i __A) 5293309124Sdim{ 5294314564Sdim return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, 5295314564Sdim (__v8di)_mm512_cvtepu8_epi64(__A), 5296314564Sdim (__v8di)_mm512_setzero_si512()); 5297309124Sdim} 5298309124Sdim 5299309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 5300314564Sdim_mm512_cvtepu32_epi64(__m256i __X) 5301309124Sdim{ 5302314564Sdim return (__m512i)__builtin_convertvector((__v8su)__X, __v8di); 5303309124Sdim} 5304309124Sdim 5305309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 5306314564Sdim_mm512_mask_cvtepu32_epi64(__m512i __W, __mmask8 __U, __m256i __X) 5307309124Sdim{ 5308314564Sdim return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, 5309314564Sdim (__v8di)_mm512_cvtepu32_epi64(__X), 5310314564Sdim (__v8di)__W); 5311309124Sdim} 5312309124Sdim 5313309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 5314314564Sdim_mm512_maskz_cvtepu32_epi64(__mmask8 __U, __m256i __X) 5315309124Sdim{ 5316314564Sdim return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, 5317314564Sdim (__v8di)_mm512_cvtepu32_epi64(__X), 5318314564Sdim (__v8di)_mm512_setzero_si512()); 5319309124Sdim} 5320309124Sdim 5321309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 5322314564Sdim_mm512_cvtepu16_epi32(__m256i __A) 5323309124Sdim{ 5324314564Sdim return (__m512i)__builtin_convertvector((__v16hu)__A, __v16si); 5325309124Sdim} 5326309124Sdim 5327309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 5328314564Sdim_mm512_mask_cvtepu16_epi32(__m512i __W, __mmask16 __U, __m256i __A) 5329309124Sdim{ 5330314564Sdim return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, 5331314564Sdim (__v16si)_mm512_cvtepu16_epi32(__A), 5332314564Sdim (__v16si)__W); 5333309124Sdim} 5334309124Sdim 5335309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 5336314564Sdim_mm512_maskz_cvtepu16_epi32(__mmask16 __U, __m256i __A) 5337309124Sdim{ 5338314564Sdim return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, 5339314564Sdim (__v16si)_mm512_cvtepu16_epi32(__A), 5340314564Sdim (__v16si)_mm512_setzero_si512()); 5341309124Sdim} 5342309124Sdim 5343309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 5344314564Sdim_mm512_cvtepu16_epi64(__m128i __A) 5345309124Sdim{ 5346314564Sdim return (__m512i)__builtin_convertvector((__v8hu)__A, __v8di); 5347309124Sdim} 5348309124Sdim 5349309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 5350314564Sdim_mm512_mask_cvtepu16_epi64(__m512i __W, __mmask8 __U, __m128i __A) 5351309124Sdim{ 5352314564Sdim return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, 5353314564Sdim (__v8di)_mm512_cvtepu16_epi64(__A), 5354314564Sdim (__v8di)__W); 5355309124Sdim} 5356309124Sdim 5357309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 5358314564Sdim_mm512_maskz_cvtepu16_epi64(__mmask8 __U, __m128i __A) 5359309124Sdim{ 5360314564Sdim return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, 5361314564Sdim (__v8di)_mm512_cvtepu16_epi64(__A), 5362314564Sdim (__v8di)_mm512_setzero_si512()); 5363309124Sdim} 5364309124Sdim 5365309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 5366309124Sdim_mm512_rorv_epi32 (__m512i __A, __m512i __B) 5367309124Sdim{ 5368309124Sdim return (__m512i) __builtin_ia32_prorvd512_mask ((__v16si) __A, 5369309124Sdim (__v16si) __B, 5370309124Sdim (__v16si) 5371309124Sdim _mm512_setzero_si512 (), 5372309124Sdim (__mmask16) -1); 5373309124Sdim} 5374309124Sdim 5375309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 5376309124Sdim_mm512_mask_rorv_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B) 5377309124Sdim{ 5378309124Sdim return (__m512i) __builtin_ia32_prorvd512_mask ((__v16si) __A, 5379309124Sdim (__v16si) __B, 5380309124Sdim (__v16si) __W, 5381309124Sdim (__mmask16) __U); 5382309124Sdim} 5383309124Sdim 5384309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 5385309124Sdim_mm512_maskz_rorv_epi32 (__mmask16 __U, __m512i __A, __m512i __B) 5386309124Sdim{ 5387309124Sdim return (__m512i) __builtin_ia32_prorvd512_mask ((__v16si) __A, 5388309124Sdim (__v16si) __B, 5389309124Sdim (__v16si) 5390309124Sdim _mm512_setzero_si512 (), 5391309124Sdim (__mmask16) __U); 5392309124Sdim} 5393309124Sdim 5394309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 5395309124Sdim_mm512_rorv_epi64 (__m512i __A, __m512i __B) 5396309124Sdim{ 5397309124Sdim return (__m512i) __builtin_ia32_prorvq512_mask ((__v8di) __A, 5398309124Sdim (__v8di) __B, 5399309124Sdim (__v8di) 5400309124Sdim _mm512_setzero_si512 (), 5401309124Sdim (__mmask8) -1); 5402309124Sdim} 5403309124Sdim 5404309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 5405309124Sdim_mm512_mask_rorv_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) 5406309124Sdim{ 5407309124Sdim return (__m512i) __builtin_ia32_prorvq512_mask ((__v8di) __A, 5408309124Sdim (__v8di) __B, 5409309124Sdim (__v8di) __W, 5410309124Sdim (__mmask8) __U); 5411309124Sdim} 5412309124Sdim 5413309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 5414309124Sdim_mm512_maskz_rorv_epi64 (__mmask8 __U, __m512i __A, __m512i __B) 5415309124Sdim{ 5416309124Sdim return (__m512i) __builtin_ia32_prorvq512_mask ((__v8di) __A, 5417309124Sdim (__v8di) __B, 5418309124Sdim (__v8di) 5419309124Sdim _mm512_setzero_si512 (), 5420309124Sdim (__mmask8) __U); 5421309124Sdim} 5422309124Sdim 5423309124Sdim 5424309124Sdim 5425288943Sdim#define _mm512_cmp_epi32_mask(a, b, p) __extension__ ({ \ 5426296417Sdim (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)(__m512i)(a), \ 5427309124Sdim (__v16si)(__m512i)(b), (int)(p), \ 5428288943Sdim (__mmask16)-1); }) 5429288943Sdim 5430288943Sdim#define _mm512_cmp_epu32_mask(a, b, p) __extension__ ({ \ 5431296417Sdim (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)(__m512i)(a), \ 5432309124Sdim (__v16si)(__m512i)(b), (int)(p), \ 5433288943Sdim (__mmask16)-1); }) 5434288943Sdim 5435288943Sdim#define _mm512_cmp_epi64_mask(a, b, p) __extension__ ({ \ 5436296417Sdim (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)(__m512i)(a), \ 5437309124Sdim (__v8di)(__m512i)(b), (int)(p), \ 5438288943Sdim (__mmask8)-1); }) 5439288943Sdim 5440288943Sdim#define _mm512_cmp_epu64_mask(a, b, p) __extension__ ({ \ 5441296417Sdim (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)(__m512i)(a), \ 5442309124Sdim (__v8di)(__m512i)(b), (int)(p), \ 5443288943Sdim (__mmask8)-1); }) 5444288943Sdim 5445288943Sdim#define _mm512_mask_cmp_epi32_mask(m, a, b, p) __extension__ ({ \ 5446296417Sdim (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)(__m512i)(a), \ 5447309124Sdim (__v16si)(__m512i)(b), (int)(p), \ 5448288943Sdim (__mmask16)(m)); }) 5449288943Sdim 5450288943Sdim#define _mm512_mask_cmp_epu32_mask(m, a, b, p) __extension__ ({ \ 5451296417Sdim (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)(__m512i)(a), \ 5452309124Sdim (__v16si)(__m512i)(b), (int)(p), \ 5453288943Sdim (__mmask16)(m)); }) 5454288943Sdim 5455288943Sdim#define _mm512_mask_cmp_epi64_mask(m, a, b, p) __extension__ ({ \ 5456296417Sdim (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)(__m512i)(a), \ 5457309124Sdim (__v8di)(__m512i)(b), (int)(p), \ 5458288943Sdim (__mmask8)(m)); }) 5459288943Sdim 5460288943Sdim#define _mm512_mask_cmp_epu64_mask(m, a, b, p) __extension__ ({ \ 5461296417Sdim (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)(__m512i)(a), \ 5462309124Sdim (__v8di)(__m512i)(b), (int)(p), \ 5463288943Sdim (__mmask8)(m)); }) 5464288943Sdim 5465309124Sdim#define _mm512_rol_epi32(a, b) __extension__ ({ \ 5466309124Sdim (__m512i)__builtin_ia32_prold512_mask((__v16si)(__m512i)(a), (int)(b), \ 5467309124Sdim (__v16si)_mm512_setzero_si512(), \ 5468309124Sdim (__mmask16)-1); }) 5469309124Sdim 5470309124Sdim#define _mm512_mask_rol_epi32(W, U, a, b) __extension__ ({ \ 5471309124Sdim (__m512i)__builtin_ia32_prold512_mask((__v16si)(__m512i)(a), (int)(b), \ 5472309124Sdim (__v16si)(__m512i)(W), \ 5473309124Sdim (__mmask16)(U)); }) 5474309124Sdim 5475309124Sdim#define _mm512_maskz_rol_epi32(U, a, b) __extension__ ({ \ 5476309124Sdim (__m512i)__builtin_ia32_prold512_mask((__v16si)(__m512i)(a), (int)(b), \ 5477309124Sdim (__v16si)_mm512_setzero_si512(), \ 5478309124Sdim (__mmask16)(U)); }) 5479309124Sdim 5480309124Sdim#define _mm512_rol_epi64(a, b) __extension__ ({ \ 5481309124Sdim (__m512i)__builtin_ia32_prolq512_mask((__v8di)(__m512i)(a), (int)(b), \ 5482309124Sdim (__v8di)_mm512_setzero_si512(), \ 5483309124Sdim (__mmask8)-1); }) 5484309124Sdim 5485309124Sdim#define _mm512_mask_rol_epi64(W, U, a, b) __extension__ ({ \ 5486309124Sdim (__m512i)__builtin_ia32_prolq512_mask((__v8di)(__m512i)(a), (int)(b), \ 5487309124Sdim (__v8di)(__m512i)(W), (__mmask8)(U)); }) 5488309124Sdim 5489309124Sdim#define _mm512_maskz_rol_epi64(U, a, b) __extension__ ({ \ 5490309124Sdim (__m512i)__builtin_ia32_prolq512_mask((__v8di)(__m512i)(a), (int)(b), \ 5491309124Sdim (__v8di)_mm512_setzero_si512(), \ 5492309124Sdim (__mmask8)(U)); }) 5493309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 5494309124Sdim_mm512_rolv_epi32 (__m512i __A, __m512i __B) 5495309124Sdim{ 5496309124Sdim return (__m512i) __builtin_ia32_prolvd512_mask ((__v16si) __A, 5497309124Sdim (__v16si) __B, 5498309124Sdim (__v16si) 5499309124Sdim _mm512_setzero_si512 (), 5500309124Sdim (__mmask16) -1); 5501309124Sdim} 5502309124Sdim 5503309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 5504309124Sdim_mm512_mask_rolv_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B) 5505309124Sdim{ 5506309124Sdim return (__m512i) __builtin_ia32_prolvd512_mask ((__v16si) __A, 5507309124Sdim (__v16si) __B, 5508309124Sdim (__v16si) __W, 5509309124Sdim (__mmask16) __U); 5510309124Sdim} 5511309124Sdim 5512309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 5513309124Sdim_mm512_maskz_rolv_epi32 (__mmask16 __U, __m512i __A, __m512i __B) 5514309124Sdim{ 5515309124Sdim return (__m512i) __builtin_ia32_prolvd512_mask ((__v16si) __A, 5516309124Sdim (__v16si) __B, 5517309124Sdim (__v16si) 5518309124Sdim _mm512_setzero_si512 (), 5519309124Sdim (__mmask16) __U); 5520309124Sdim} 5521309124Sdim 5522309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 5523309124Sdim_mm512_rolv_epi64 (__m512i __A, __m512i __B) 5524309124Sdim{ 5525309124Sdim return (__m512i) __builtin_ia32_prolvq512_mask ((__v8di) __A, 5526309124Sdim (__v8di) __B, 5527309124Sdim (__v8di) 5528309124Sdim _mm512_setzero_si512 (), 5529309124Sdim (__mmask8) -1); 5530309124Sdim} 5531309124Sdim 5532309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 5533309124Sdim_mm512_mask_rolv_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) 5534309124Sdim{ 5535309124Sdim return (__m512i) __builtin_ia32_prolvq512_mask ((__v8di) __A, 5536309124Sdim (__v8di) __B, 5537309124Sdim (__v8di) __W, 5538309124Sdim (__mmask8) __U); 5539309124Sdim} 5540309124Sdim 5541309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 5542309124Sdim_mm512_maskz_rolv_epi64 (__mmask8 __U, __m512i __A, __m512i __B) 5543309124Sdim{ 5544309124Sdim return (__m512i) __builtin_ia32_prolvq512_mask ((__v8di) __A, 5545309124Sdim (__v8di) __B, 5546309124Sdim (__v8di) 5547309124Sdim _mm512_setzero_si512 (), 5548309124Sdim (__mmask8) __U); 5549309124Sdim} 5550309124Sdim 5551309124Sdim#define _mm512_ror_epi32(A, B) __extension__ ({ \ 5552309124Sdim (__m512i)__builtin_ia32_prord512_mask((__v16si)(__m512i)(A), (int)(B), \ 5553309124Sdim (__v16si)_mm512_setzero_si512(), \ 5554309124Sdim (__mmask16)-1); }) 5555309124Sdim 5556309124Sdim#define _mm512_mask_ror_epi32(W, U, A, B) __extension__ ({ \ 5557309124Sdim (__m512i)__builtin_ia32_prord512_mask((__v16si)(__m512i)(A), (int)(B), \ 5558309124Sdim (__v16si)(__m512i)(W), \ 5559309124Sdim (__mmask16)(U)); }) 5560309124Sdim 5561309124Sdim#define _mm512_maskz_ror_epi32(U, A, B) __extension__ ({ \ 5562309124Sdim (__m512i)__builtin_ia32_prord512_mask((__v16si)(__m512i)(A), (int)(B), \ 5563309124Sdim (__v16si)_mm512_setzero_si512(), \ 5564309124Sdim (__mmask16)(U)); }) 5565309124Sdim 5566309124Sdim#define _mm512_ror_epi64(A, B) __extension__ ({ \ 5567309124Sdim (__m512i)__builtin_ia32_prorq512_mask((__v8di)(__m512i)(A), (int)(B), \ 5568309124Sdim (__v8di)_mm512_setzero_si512(), \ 5569309124Sdim (__mmask8)-1); }) 5570309124Sdim 5571309124Sdim#define _mm512_mask_ror_epi64(W, U, A, B) __extension__ ({ \ 5572309124Sdim (__m512i)__builtin_ia32_prorq512_mask((__v8di)(__m512i)(A), (int)(B), \ 5573309124Sdim (__v8di)(__m512i)(W), (__mmask8)(U)); }) 5574309124Sdim 5575309124Sdim#define _mm512_maskz_ror_epi64(U, A, B) __extension__ ({ \ 5576309124Sdim (__m512i)__builtin_ia32_prorq512_mask((__v8di)(__m512i)(A), (int)(B), \ 5577309124Sdim (__v8di)_mm512_setzero_si512(), \ 5578309124Sdim (__mmask8)(U)); }) 5579309124Sdim 5580314564Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 5581314564Sdim_mm512_slli_epi32(__m512i __A, int __B) 5582314564Sdim{ 5583314564Sdim return (__m512i)__builtin_ia32_pslldi512((__v16si)__A, __B); 5584314564Sdim} 5585309124Sdim 5586314564Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 5587314564Sdim_mm512_mask_slli_epi32(__m512i __W, __mmask16 __U, __m512i __A, int __B) 5588314564Sdim{ 5589314564Sdim return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, 5590314564Sdim (__v16si)_mm512_slli_epi32(__A, __B), 5591314564Sdim (__v16si)__W); 5592314564Sdim} 5593309124Sdim 5594314564Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 5595314564Sdim_mm512_maskz_slli_epi32(__mmask16 __U, __m512i __A, int __B) { 5596314564Sdim return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, 5597314564Sdim (__v16si)_mm512_slli_epi32(__A, __B), 5598314564Sdim (__v16si)_mm512_setzero_si512()); 5599314564Sdim} 5600309124Sdim 5601314564Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 5602314564Sdim_mm512_slli_epi64(__m512i __A, int __B) 5603314564Sdim{ 5604314564Sdim return (__m512i)__builtin_ia32_psllqi512((__v8di)__A, __B); 5605314564Sdim} 5606309124Sdim 5607314564Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 5608314564Sdim_mm512_mask_slli_epi64(__m512i __W, __mmask8 __U, __m512i __A, int __B) 5609314564Sdim{ 5610314564Sdim return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, 5611314564Sdim (__v8di)_mm512_slli_epi64(__A, __B), 5612314564Sdim (__v8di)__W); 5613314564Sdim} 5614309124Sdim 5615314564Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 5616314564Sdim_mm512_maskz_slli_epi64(__mmask8 __U, __m512i __A, int __B) 5617314564Sdim{ 5618314564Sdim return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, 5619314564Sdim (__v8di)_mm512_slli_epi64(__A, __B), 5620314564Sdim (__v8di)_mm512_setzero_si512()); 5621314564Sdim} 5622309124Sdim 5623314564Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 5624314564Sdim_mm512_srli_epi32(__m512i __A, int __B) 5625314564Sdim{ 5626314564Sdim return (__m512i)__builtin_ia32_psrldi512((__v16si)__A, __B); 5627314564Sdim} 5628309124Sdim 5629314564Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 5630314564Sdim_mm512_mask_srli_epi32(__m512i __W, __mmask16 __U, __m512i __A, int __B) 5631314564Sdim{ 5632314564Sdim return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, 5633314564Sdim (__v16si)_mm512_srli_epi32(__A, __B), 5634314564Sdim (__v16si)__W); 5635314564Sdim} 5636309124Sdim 5637314564Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 5638314564Sdim_mm512_maskz_srli_epi32(__mmask16 __U, __m512i __A, int __B) { 5639314564Sdim return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, 5640314564Sdim (__v16si)_mm512_srli_epi32(__A, __B), 5641314564Sdim (__v16si)_mm512_setzero_si512()); 5642314564Sdim} 5643309124Sdim 5644314564Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 5645314564Sdim_mm512_srli_epi64(__m512i __A, int __B) 5646314564Sdim{ 5647314564Sdim return (__m512i)__builtin_ia32_psrlqi512((__v8di)__A, __B); 5648314564Sdim} 5649309124Sdim 5650314564Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 5651314564Sdim_mm512_mask_srli_epi64(__m512i __W, __mmask8 __U, __m512i __A, int __B) 5652314564Sdim{ 5653314564Sdim return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, 5654314564Sdim (__v8di)_mm512_srli_epi64(__A, __B), 5655314564Sdim (__v8di)__W); 5656314564Sdim} 5657309124Sdim 5658314564Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 5659314564Sdim_mm512_maskz_srli_epi64(__mmask8 __U, __m512i __A, int __B) 5660314564Sdim{ 5661314564Sdim return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, 5662314564Sdim (__v8di)_mm512_srli_epi64(__A, __B), 5663314564Sdim (__v8di)_mm512_setzero_si512()); 5664314564Sdim} 5665309124Sdim 5666309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 5667309124Sdim_mm512_mask_load_epi32 (__m512i __W, __mmask16 __U, void const *__P) 5668309124Sdim{ 5669309124Sdim return (__m512i) __builtin_ia32_movdqa32load512_mask ((const __v16si *) __P, 5670309124Sdim (__v16si) __W, 5671309124Sdim (__mmask16) __U); 5672309124Sdim} 5673309124Sdim 5674309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 5675309124Sdim_mm512_maskz_load_epi32 (__mmask16 __U, void const *__P) 5676309124Sdim{ 5677309124Sdim return (__m512i) __builtin_ia32_movdqa32load512_mask ((const __v16si *) __P, 5678309124Sdim (__v16si) 5679309124Sdim _mm512_setzero_si512 (), 5680309124Sdim (__mmask16) __U); 5681309124Sdim} 5682309124Sdim 5683309124Sdimstatic __inline__ void __DEFAULT_FN_ATTRS 5684309124Sdim_mm512_mask_store_epi32 (void *__P, __mmask16 __U, __m512i __A) 5685309124Sdim{ 5686309124Sdim __builtin_ia32_movdqa32store512_mask ((__v16si *) __P, (__v16si) __A, 5687309124Sdim (__mmask16) __U); 5688309124Sdim} 5689309124Sdim 5690309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 5691309124Sdim_mm512_mask_mov_epi32 (__m512i __W, __mmask16 __U, __m512i __A) 5692309124Sdim{ 5693309124Sdim return (__m512i) __builtin_ia32_selectd_512 ((__mmask16) __U, 5694309124Sdim (__v16si) __A, 5695309124Sdim (__v16si) __W); 5696309124Sdim} 5697309124Sdim 5698309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 5699309124Sdim_mm512_maskz_mov_epi32 (__mmask16 __U, __m512i __A) 5700309124Sdim{ 5701309124Sdim return (__m512i) __builtin_ia32_selectd_512 ((__mmask16) __U, 5702309124Sdim (__v16si) __A, 5703309124Sdim (__v16si) _mm512_setzero_si512 ()); 5704309124Sdim} 5705309124Sdim 5706309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 5707309124Sdim_mm512_mask_mov_epi64 (__m512i __W, __mmask8 __U, __m512i __A) 5708309124Sdim{ 5709309124Sdim return (__m512i) __builtin_ia32_selectq_512 ((__mmask8) __U, 5710309124Sdim (__v8di) __A, 5711309124Sdim (__v8di) __W); 5712309124Sdim} 5713309124Sdim 5714309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 5715309124Sdim_mm512_maskz_mov_epi64 (__mmask8 __U, __m512i __A) 5716309124Sdim{ 5717309124Sdim return (__m512i) __builtin_ia32_selectq_512 ((__mmask8) __U, 5718309124Sdim (__v8di) __A, 5719309124Sdim (__v8di) _mm512_setzero_si512 ()); 5720309124Sdim} 5721309124Sdim 5722309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 5723309124Sdim_mm512_mask_load_epi64 (__m512i __W, __mmask8 __U, void const *__P) 5724309124Sdim{ 5725309124Sdim return (__m512i) __builtin_ia32_movdqa64load512_mask ((const __v8di *) __P, 5726309124Sdim (__v8di) __W, 5727309124Sdim (__mmask8) __U); 5728309124Sdim} 5729309124Sdim 5730309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 5731309124Sdim_mm512_maskz_load_epi64 (__mmask8 __U, void const *__P) 5732309124Sdim{ 5733309124Sdim return (__m512i) __builtin_ia32_movdqa64load512_mask ((const __v8di *) __P, 5734309124Sdim (__v8di) 5735309124Sdim _mm512_setzero_si512 (), 5736309124Sdim (__mmask8) __U); 5737309124Sdim} 5738309124Sdim 5739309124Sdimstatic __inline__ void __DEFAULT_FN_ATTRS 5740309124Sdim_mm512_mask_store_epi64 (void *__P, __mmask8 __U, __m512i __A) 5741309124Sdim{ 5742309124Sdim __builtin_ia32_movdqa64store512_mask ((__v8di *) __P, (__v8di) __A, 5743309124Sdim (__mmask8) __U); 5744309124Sdim} 5745309124Sdim 5746309124Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS 5747309124Sdim_mm512_movedup_pd (__m512d __A) 5748309124Sdim{ 5749309124Sdim return (__m512d)__builtin_shufflevector((__v8df)__A, (__v8df)__A, 5750309124Sdim 0, 0, 2, 2, 4, 4, 6, 6); 5751309124Sdim} 5752309124Sdim 5753309124Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS 5754309124Sdim_mm512_mask_movedup_pd (__m512d __W, __mmask8 __U, __m512d __A) 5755309124Sdim{ 5756309124Sdim return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, 5757309124Sdim (__v8df)_mm512_movedup_pd(__A), 5758309124Sdim (__v8df)__W); 5759309124Sdim} 5760309124Sdim 5761309124Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS 5762309124Sdim_mm512_maskz_movedup_pd (__mmask8 __U, __m512d __A) 5763309124Sdim{ 5764309124Sdim return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, 5765309124Sdim (__v8df)_mm512_movedup_pd(__A), 5766309124Sdim (__v8df)_mm512_setzero_pd()); 5767309124Sdim} 5768309124Sdim 5769309124Sdim#define _mm512_fixupimm_round_pd(A, B, C, imm, R) __extension__ ({ \ 5770309124Sdim (__m512d)__builtin_ia32_fixupimmpd512_mask((__v8df)(__m512d)(A), \ 5771309124Sdim (__v8df)(__m512d)(B), \ 5772309124Sdim (__v8di)(__m512i)(C), (int)(imm), \ 5773309124Sdim (__mmask8)-1, (int)(R)); }) 5774309124Sdim 5775309124Sdim#define _mm512_mask_fixupimm_round_pd(A, U, B, C, imm, R) __extension__ ({ \ 5776309124Sdim (__m512d)__builtin_ia32_fixupimmpd512_mask((__v8df)(__m512d)(A), \ 5777309124Sdim (__v8df)(__m512d)(B), \ 5778309124Sdim (__v8di)(__m512i)(C), (int)(imm), \ 5779309124Sdim (__mmask8)(U), (int)(R)); }) 5780309124Sdim 5781309124Sdim#define _mm512_fixupimm_pd(A, B, C, imm) __extension__ ({ \ 5782309124Sdim (__m512d)__builtin_ia32_fixupimmpd512_mask((__v8df)(__m512d)(A), \ 5783309124Sdim (__v8df)(__m512d)(B), \ 5784309124Sdim (__v8di)(__m512i)(C), (int)(imm), \ 5785309124Sdim (__mmask8)-1, \ 5786309124Sdim _MM_FROUND_CUR_DIRECTION); }) 5787309124Sdim 5788309124Sdim#define _mm512_mask_fixupimm_pd(A, U, B, C, imm) __extension__ ({ \ 5789309124Sdim (__m512d)__builtin_ia32_fixupimmpd512_mask((__v8df)(__m512d)(A), \ 5790309124Sdim (__v8df)(__m512d)(B), \ 5791309124Sdim (__v8di)(__m512i)(C), (int)(imm), \ 5792309124Sdim (__mmask8)(U), \ 5793309124Sdim _MM_FROUND_CUR_DIRECTION); }) 5794309124Sdim 5795309124Sdim#define _mm512_maskz_fixupimm_round_pd(U, A, B, C, imm, R) __extension__ ({ \ 5796309124Sdim (__m512d)__builtin_ia32_fixupimmpd512_maskz((__v8df)(__m512d)(A), \ 5797309124Sdim (__v8df)(__m512d)(B), \ 5798309124Sdim (__v8di)(__m512i)(C), \ 5799309124Sdim (int)(imm), (__mmask8)(U), \ 5800309124Sdim (int)(R)); }) 5801309124Sdim 5802309124Sdim#define _mm512_maskz_fixupimm_pd(U, A, B, C, imm) __extension__ ({ \ 5803309124Sdim (__m512d)__builtin_ia32_fixupimmpd512_maskz((__v8df)(__m512d)(A), \ 5804309124Sdim (__v8df)(__m512d)(B), \ 5805309124Sdim (__v8di)(__m512i)(C), \ 5806309124Sdim (int)(imm), (__mmask8)(U), \ 5807309124Sdim _MM_FROUND_CUR_DIRECTION); }) 5808309124Sdim 5809309124Sdim#define _mm512_fixupimm_round_ps(A, B, C, imm, R) __extension__ ({ \ 5810309124Sdim (__m512)__builtin_ia32_fixupimmps512_mask((__v16sf)(__m512)(A), \ 5811309124Sdim (__v16sf)(__m512)(B), \ 5812309124Sdim (__v16si)(__m512i)(C), (int)(imm), \ 5813309124Sdim (__mmask16)-1, (int)(R)); }) 5814309124Sdim 5815309124Sdim#define _mm512_mask_fixupimm_round_ps(A, U, B, C, imm, R) __extension__ ({ \ 5816309124Sdim (__m512)__builtin_ia32_fixupimmps512_mask((__v16sf)(__m512)(A), \ 5817309124Sdim (__v16sf)(__m512)(B), \ 5818309124Sdim (__v16si)(__m512i)(C), (int)(imm), \ 5819309124Sdim (__mmask16)(U), (int)(R)); }) 5820309124Sdim 5821309124Sdim#define _mm512_fixupimm_ps(A, B, C, imm) __extension__ ({ \ 5822309124Sdim (__m512)__builtin_ia32_fixupimmps512_mask((__v16sf)(__m512)(A), \ 5823309124Sdim (__v16sf)(__m512)(B), \ 5824309124Sdim (__v16si)(__m512i)(C), (int)(imm), \ 5825309124Sdim (__mmask16)-1, \ 5826309124Sdim _MM_FROUND_CUR_DIRECTION); }) 5827309124Sdim 5828309124Sdim#define _mm512_mask_fixupimm_ps(A, U, B, C, imm) __extension__ ({ \ 5829309124Sdim (__m512)__builtin_ia32_fixupimmps512_mask((__v16sf)(__m512)(A), \ 5830309124Sdim (__v16sf)(__m512)(B), \ 5831309124Sdim (__v16si)(__m512i)(C), (int)(imm), \ 5832309124Sdim (__mmask16)(U), \ 5833309124Sdim _MM_FROUND_CUR_DIRECTION); }) 5834309124Sdim 5835309124Sdim#define _mm512_maskz_fixupimm_round_ps(U, A, B, C, imm, R) __extension__ ({ \ 5836309124Sdim (__m512)__builtin_ia32_fixupimmps512_maskz((__v16sf)(__m512)(A), \ 5837309124Sdim (__v16sf)(__m512)(B), \ 5838309124Sdim (__v16si)(__m512i)(C), \ 5839309124Sdim (int)(imm), (__mmask16)(U), \ 5840309124Sdim (int)(R)); }) 5841309124Sdim 5842309124Sdim#define _mm512_maskz_fixupimm_ps(U, A, B, C, imm) __extension__ ({ \ 5843309124Sdim (__m512)__builtin_ia32_fixupimmps512_maskz((__v16sf)(__m512)(A), \ 5844309124Sdim (__v16sf)(__m512)(B), \ 5845309124Sdim (__v16si)(__m512i)(C), \ 5846309124Sdim (int)(imm), (__mmask16)(U), \ 5847309124Sdim _MM_FROUND_CUR_DIRECTION); }) 5848309124Sdim 5849309124Sdim#define _mm_fixupimm_round_sd(A, B, C, imm, R) __extension__ ({ \ 5850309124Sdim (__m128d)__builtin_ia32_fixupimmsd_mask((__v2df)(__m128d)(A), \ 5851309124Sdim (__v2df)(__m128d)(B), \ 5852309124Sdim (__v2di)(__m128i)(C), (int)(imm), \ 5853309124Sdim (__mmask8)-1, (int)(R)); }) 5854309124Sdim 5855309124Sdim#define _mm_mask_fixupimm_round_sd(A, U, B, C, imm, R) __extension__ ({ \ 5856309124Sdim (__m128d)__builtin_ia32_fixupimmsd_mask((__v2df)(__m128d)(A), \ 5857309124Sdim (__v2df)(__m128d)(B), \ 5858309124Sdim (__v2di)(__m128i)(C), (int)(imm), \ 5859309124Sdim (__mmask8)(U), (int)(R)); }) 5860309124Sdim 5861309124Sdim#define _mm_fixupimm_sd(A, B, C, imm) __extension__ ({ \ 5862309124Sdim (__m128d)__builtin_ia32_fixupimmsd_mask((__v2df)(__m128d)(A), \ 5863309124Sdim (__v2df)(__m128d)(B), \ 5864309124Sdim (__v2di)(__m128i)(C), (int)(imm), \ 5865309124Sdim (__mmask8)-1, \ 5866309124Sdim _MM_FROUND_CUR_DIRECTION); }) 5867309124Sdim 5868309124Sdim#define _mm_mask_fixupimm_sd(A, U, B, C, imm) __extension__ ({ \ 5869309124Sdim (__m128d)__builtin_ia32_fixupimmsd_mask((__v2df)(__m128d)(A), \ 5870309124Sdim (__v2df)(__m128d)(B), \ 5871309124Sdim (__v2di)(__m128i)(C), (int)(imm), \ 5872309124Sdim (__mmask8)(U), \ 5873309124Sdim _MM_FROUND_CUR_DIRECTION); }) 5874309124Sdim 5875309124Sdim#define _mm_maskz_fixupimm_round_sd(U, A, B, C, imm, R) __extension__ ({ \ 5876309124Sdim (__m128d)__builtin_ia32_fixupimmsd_maskz((__v2df)(__m128d)(A), \ 5877309124Sdim (__v2df)(__m128d)(B), \ 5878309124Sdim (__v2di)(__m128i)(C), (int)(imm), \ 5879309124Sdim (__mmask8)(U), (int)(R)); }) 5880309124Sdim 5881309124Sdim#define _mm_maskz_fixupimm_sd(U, A, B, C, imm) __extension__ ({ \ 5882309124Sdim (__m128d)__builtin_ia32_fixupimmsd_maskz((__v2df)(__m128d)(A), \ 5883309124Sdim (__v2df)(__m128d)(B), \ 5884309124Sdim (__v2di)(__m128i)(C), (int)(imm), \ 5885309124Sdim (__mmask8)(U), \ 5886309124Sdim _MM_FROUND_CUR_DIRECTION); }) 5887309124Sdim 5888309124Sdim#define _mm_fixupimm_round_ss(A, B, C, imm, R) __extension__ ({ \ 5889309124Sdim (__m128)__builtin_ia32_fixupimmss_mask((__v4sf)(__m128)(A), \ 5890309124Sdim (__v4sf)(__m128)(B), \ 5891309124Sdim (__v4si)(__m128i)(C), (int)(imm), \ 5892309124Sdim (__mmask8)-1, (int)(R)); }) 5893309124Sdim 5894309124Sdim#define _mm_mask_fixupimm_round_ss(A, U, B, C, imm, R) __extension__ ({ \ 5895309124Sdim (__m128)__builtin_ia32_fixupimmss_mask((__v4sf)(__m128)(A), \ 5896309124Sdim (__v4sf)(__m128)(B), \ 5897309124Sdim (__v4si)(__m128i)(C), (int)(imm), \ 5898309124Sdim (__mmask8)(U), (int)(R)); }) 5899309124Sdim 5900309124Sdim#define _mm_fixupimm_ss(A, B, C, imm) __extension__ ({ \ 5901309124Sdim (__m128)__builtin_ia32_fixupimmss_mask((__v4sf)(__m128)(A), \ 5902309124Sdim (__v4sf)(__m128)(B), \ 5903309124Sdim (__v4si)(__m128i)(C), (int)(imm), \ 5904309124Sdim (__mmask8)-1, \ 5905309124Sdim _MM_FROUND_CUR_DIRECTION); }) 5906309124Sdim 5907309124Sdim#define _mm_mask_fixupimm_ss(A, U, B, C, imm) __extension__ ({ \ 5908309124Sdim (__m128)__builtin_ia32_fixupimmss_mask((__v4sf)(__m128)(A), \ 5909309124Sdim (__v4sf)(__m128)(B), \ 5910309124Sdim (__v4si)(__m128i)(C), (int)(imm), \ 5911309124Sdim (__mmask8)(U), \ 5912309124Sdim _MM_FROUND_CUR_DIRECTION); }) 5913309124Sdim 5914309124Sdim#define _mm_maskz_fixupimm_round_ss(U, A, B, C, imm, R) __extension__ ({ \ 5915309124Sdim (__m128)__builtin_ia32_fixupimmss_maskz((__v4sf)(__m128)(A), \ 5916309124Sdim (__v4sf)(__m128)(B), \ 5917309124Sdim (__v4si)(__m128i)(C), (int)(imm), \ 5918309124Sdim (__mmask8)(U), (int)(R)); }) 5919309124Sdim 5920309124Sdim#define _mm_maskz_fixupimm_ss(U, A, B, C, imm) __extension__ ({ \ 5921309124Sdim (__m128)__builtin_ia32_fixupimmss_maskz((__v4sf)(__m128)(A), \ 5922309124Sdim (__v4sf)(__m128)(B), \ 5923309124Sdim (__v4si)(__m128i)(C), (int)(imm), \ 5924309124Sdim (__mmask8)(U), \ 5925309124Sdim _MM_FROUND_CUR_DIRECTION); }) 5926309124Sdim 5927309124Sdim#define _mm_getexp_round_sd(A, B, R) __extension__ ({ \ 5928309124Sdim (__m128d)__builtin_ia32_getexpsd128_round_mask((__v2df)(__m128d)(A), \ 5929309124Sdim (__v2df)(__m128d)(B), \ 5930309124Sdim (__v2df)_mm_setzero_pd(), \ 5931309124Sdim (__mmask8)-1, (int)(R)); }) 5932309124Sdim 5933309124Sdim 5934309124Sdimstatic __inline__ __m128d __DEFAULT_FN_ATTRS 5935309124Sdim_mm_getexp_sd (__m128d __A, __m128d __B) 5936309124Sdim{ 5937309124Sdim return (__m128d) __builtin_ia32_getexpsd128_round_mask ((__v2df) __A, 5938309124Sdim (__v2df) __B, (__v2df) _mm_setzero_pd(), (__mmask8) -1, _MM_FROUND_CUR_DIRECTION); 5939309124Sdim} 5940309124Sdim 5941309124Sdimstatic __inline__ __m128d __DEFAULT_FN_ATTRS 5942309124Sdim_mm_mask_getexp_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) 5943309124Sdim{ 5944309124Sdim return (__m128d) __builtin_ia32_getexpsd128_round_mask ( (__v2df) __A, 5945309124Sdim (__v2df) __B, 5946309124Sdim (__v2df) __W, 5947309124Sdim (__mmask8) __U, 5948309124Sdim _MM_FROUND_CUR_DIRECTION); 5949309124Sdim} 5950309124Sdim 5951309124Sdim#define _mm_mask_getexp_round_sd(W, U, A, B, R) __extension__ ({\ 5952309124Sdim (__m128d)__builtin_ia32_getexpsd128_round_mask((__v2df)(__m128d)(A), \ 5953309124Sdim (__v2df)(__m128d)(B), \ 5954309124Sdim (__v2df)(__m128d)(W), \ 5955309124Sdim (__mmask8)(U), (int)(R)); }) 5956309124Sdim 5957309124Sdimstatic __inline__ __m128d __DEFAULT_FN_ATTRS 5958309124Sdim_mm_maskz_getexp_sd (__mmask8 __U, __m128d __A, __m128d __B) 5959309124Sdim{ 5960309124Sdim return (__m128d) __builtin_ia32_getexpsd128_round_mask ( (__v2df) __A, 5961309124Sdim (__v2df) __B, 5962309124Sdim (__v2df) _mm_setzero_pd (), 5963309124Sdim (__mmask8) __U, 5964309124Sdim _MM_FROUND_CUR_DIRECTION); 5965309124Sdim} 5966309124Sdim 5967309124Sdim#define _mm_maskz_getexp_round_sd(U, A, B, R) __extension__ ({\ 5968309124Sdim (__m128d)__builtin_ia32_getexpsd128_round_mask((__v2df)(__m128d)(A), \ 5969309124Sdim (__v2df)(__m128d)(B), \ 5970309124Sdim (__v2df)_mm_setzero_pd(), \ 5971309124Sdim (__mmask8)(U), (int)(R)); }) 5972309124Sdim 5973309124Sdim#define _mm_getexp_round_ss(A, B, R) __extension__ ({ \ 5974309124Sdim (__m128)__builtin_ia32_getexpss128_round_mask((__v4sf)(__m128)(A), \ 5975309124Sdim (__v4sf)(__m128)(B), \ 5976309124Sdim (__v4sf)_mm_setzero_ps(), \ 5977309124Sdim (__mmask8)-1, (int)(R)); }) 5978309124Sdim 5979309124Sdimstatic __inline__ __m128 __DEFAULT_FN_ATTRS 5980309124Sdim_mm_getexp_ss (__m128 __A, __m128 __B) 5981309124Sdim{ 5982309124Sdim return (__m128) __builtin_ia32_getexpss128_round_mask ((__v4sf) __A, 5983309124Sdim (__v4sf) __B, (__v4sf) _mm_setzero_ps(), (__mmask8) -1, _MM_FROUND_CUR_DIRECTION); 5984309124Sdim} 5985309124Sdim 5986309124Sdimstatic __inline__ __m128 __DEFAULT_FN_ATTRS 5987309124Sdim_mm_mask_getexp_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) 5988309124Sdim{ 5989309124Sdim return (__m128) __builtin_ia32_getexpss128_round_mask ((__v4sf) __A, 5990309124Sdim (__v4sf) __B, 5991309124Sdim (__v4sf) __W, 5992309124Sdim (__mmask8) __U, 5993309124Sdim _MM_FROUND_CUR_DIRECTION); 5994309124Sdim} 5995309124Sdim 5996309124Sdim#define _mm_mask_getexp_round_ss(W, U, A, B, R) __extension__ ({\ 5997309124Sdim (__m128)__builtin_ia32_getexpss128_round_mask((__v4sf)(__m128)(A), \ 5998309124Sdim (__v4sf)(__m128)(B), \ 5999309124Sdim (__v4sf)(__m128)(W), \ 6000309124Sdim (__mmask8)(U), (int)(R)); }) 6001309124Sdim 6002309124Sdimstatic __inline__ __m128 __DEFAULT_FN_ATTRS 6003309124Sdim_mm_maskz_getexp_ss (__mmask8 __U, __m128 __A, __m128 __B) 6004309124Sdim{ 6005309124Sdim return (__m128) __builtin_ia32_getexpss128_round_mask ((__v4sf) __A, 6006309124Sdim (__v4sf) __B, 6007309124Sdim (__v4sf) _mm_setzero_pd (), 6008309124Sdim (__mmask8) __U, 6009309124Sdim _MM_FROUND_CUR_DIRECTION); 6010309124Sdim} 6011309124Sdim 6012309124Sdim#define _mm_maskz_getexp_round_ss(U, A, B, R) __extension__ ({\ 6013309124Sdim (__m128)__builtin_ia32_getexpss128_round_mask((__v4sf)(__m128)(A), \ 6014309124Sdim (__v4sf)(__m128)(B), \ 6015309124Sdim (__v4sf)_mm_setzero_ps(), \ 6016309124Sdim (__mmask8)(U), (int)(R)); }) 6017309124Sdim 6018309124Sdim#define _mm_getmant_round_sd(A, B, C, D, R) __extension__ ({ \ 6019309124Sdim (__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \ 6020309124Sdim (__v2df)(__m128d)(B), \ 6021309124Sdim (int)(((D)<<2) | (C)), \ 6022309124Sdim (__v2df)_mm_setzero_pd(), \ 6023309124Sdim (__mmask8)-1, (int)(R)); }) 6024309124Sdim 6025309124Sdim#define _mm_getmant_sd(A, B, C, D) __extension__ ({ \ 6026309124Sdim (__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \ 6027309124Sdim (__v2df)(__m128d)(B), \ 6028309124Sdim (int)(((D)<<2) | (C)), \ 6029309124Sdim (__v2df)_mm_setzero_pd(), \ 6030309124Sdim (__mmask8)-1, \ 6031309124Sdim _MM_FROUND_CUR_DIRECTION); }) 6032309124Sdim 6033309124Sdim#define _mm_mask_getmant_sd(W, U, A, B, C, D) __extension__ ({\ 6034309124Sdim (__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \ 6035309124Sdim (__v2df)(__m128d)(B), \ 6036309124Sdim (int)(((D)<<2) | (C)), \ 6037309124Sdim (__v2df)(__m128d)(W), \ 6038309124Sdim (__mmask8)(U), \ 6039309124Sdim _MM_FROUND_CUR_DIRECTION); }) 6040309124Sdim 6041309124Sdim#define _mm_mask_getmant_round_sd(W, U, A, B, C, D, R)({\ 6042309124Sdim (__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \ 6043309124Sdim (__v2df)(__m128d)(B), \ 6044309124Sdim (int)(((D)<<2) | (C)), \ 6045309124Sdim (__v2df)(__m128d)(W), \ 6046309124Sdim (__mmask8)(U), (int)(R)); }) 6047309124Sdim 6048309124Sdim#define _mm_maskz_getmant_sd(U, A, B, C, D) __extension__ ({\ 6049309124Sdim (__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \ 6050309124Sdim (__v2df)(__m128d)(B), \ 6051309124Sdim (int)(((D)<<2) | (C)), \ 6052309124Sdim (__v2df)_mm_setzero_pd(), \ 6053309124Sdim (__mmask8)(U), \ 6054309124Sdim _MM_FROUND_CUR_DIRECTION); }) 6055309124Sdim 6056309124Sdim#define _mm_maskz_getmant_round_sd(U, A, B, C, D, R) __extension__ ({\ 6057309124Sdim (__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \ 6058309124Sdim (__v2df)(__m128d)(B), \ 6059309124Sdim (int)(((D)<<2) | (C)), \ 6060309124Sdim (__v2df)_mm_setzero_pd(), \ 6061309124Sdim (__mmask8)(U), (int)(R)); }) 6062309124Sdim 6063309124Sdim#define _mm_getmant_round_ss(A, B, C, D, R) __extension__ ({ \ 6064309124Sdim (__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \ 6065309124Sdim (__v4sf)(__m128)(B), \ 6066309124Sdim (int)(((D)<<2) | (C)), \ 6067309124Sdim (__v4sf)_mm_setzero_ps(), \ 6068309124Sdim (__mmask8)-1, (int)(R)); }) 6069309124Sdim 6070309124Sdim#define _mm_getmant_ss(A, B, C, D) __extension__ ({ \ 6071309124Sdim (__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \ 6072309124Sdim (__v4sf)(__m128)(B), \ 6073309124Sdim (int)(((D)<<2) | (C)), \ 6074309124Sdim (__v4sf)_mm_setzero_ps(), \ 6075309124Sdim (__mmask8)-1, \ 6076309124Sdim _MM_FROUND_CUR_DIRECTION); }) 6077309124Sdim 6078309124Sdim#define _mm_mask_getmant_ss(W, U, A, B, C, D) __extension__ ({\ 6079309124Sdim (__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \ 6080309124Sdim (__v4sf)(__m128)(B), \ 6081309124Sdim (int)(((D)<<2) | (C)), \ 6082309124Sdim (__v4sf)(__m128)(W), \ 6083309124Sdim (__mmask8)(U), \ 6084309124Sdim _MM_FROUND_CUR_DIRECTION); }) 6085309124Sdim 6086309124Sdim#define _mm_mask_getmant_round_ss(W, U, A, B, C, D, R)({\ 6087309124Sdim (__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \ 6088309124Sdim (__v4sf)(__m128)(B), \ 6089309124Sdim (int)(((D)<<2) | (C)), \ 6090309124Sdim (__v4sf)(__m128)(W), \ 6091309124Sdim (__mmask8)(U), (int)(R)); }) 6092309124Sdim 6093309124Sdim#define _mm_maskz_getmant_ss(U, A, B, C, D) __extension__ ({\ 6094309124Sdim (__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \ 6095309124Sdim (__v4sf)(__m128)(B), \ 6096309124Sdim (int)(((D)<<2) | (C)), \ 6097309124Sdim (__v4sf)_mm_setzero_pd(), \ 6098309124Sdim (__mmask8)(U), \ 6099309124Sdim _MM_FROUND_CUR_DIRECTION); }) 6100309124Sdim 6101309124Sdim#define _mm_maskz_getmant_round_ss(U, A, B, C, D, R) __extension__ ({\ 6102309124Sdim (__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \ 6103309124Sdim (__v4sf)(__m128)(B), \ 6104309124Sdim (int)(((D)<<2) | (C)), \ 6105309124Sdim (__v4sf)_mm_setzero_ps(), \ 6106309124Sdim (__mmask8)(U), (int)(R)); }) 6107309124Sdim 6108309124Sdimstatic __inline__ __mmask16 __DEFAULT_FN_ATTRS 6109309124Sdim_mm512_kmov (__mmask16 __A) 6110309124Sdim{ 6111309124Sdim return __A; 6112309124Sdim} 6113309124Sdim 6114309124Sdim#define _mm_comi_round_sd(A, B, P, R) __extension__ ({\ 6115309124Sdim (int)__builtin_ia32_vcomisd((__v2df)(__m128d)(A), (__v2df)(__m128d)(B), \ 6116309124Sdim (int)(P), (int)(R)); }) 6117309124Sdim 6118309124Sdim#define _mm_comi_round_ss(A, B, P, R) __extension__ ({\ 6119309124Sdim (int)__builtin_ia32_vcomiss((__v4sf)(__m128)(A), (__v4sf)(__m128)(B), \ 6120309124Sdim (int)(P), (int)(R)); }) 6121309124Sdim 6122314564Sdim#ifdef __x86_64__ 6123309124Sdim#define _mm_cvt_roundsd_si64(A, R) __extension__ ({ \ 6124309124Sdim (long long)__builtin_ia32_vcvtsd2si64((__v2df)(__m128d)(A), (int)(R)); }) 6125314564Sdim#endif 6126309124Sdim 6127309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 6128309124Sdim_mm512_mask2_permutex2var_epi32 (__m512i __A, __m512i __I, 6129309124Sdim __mmask16 __U, __m512i __B) 6130309124Sdim{ 6131309124Sdim return (__m512i) __builtin_ia32_vpermi2vard512_mask ((__v16si) __A, 6132309124Sdim (__v16si) __I 6133309124Sdim /* idx */ , 6134309124Sdim (__v16si) __B, 6135309124Sdim (__mmask16) __U); 6136309124Sdim} 6137309124Sdim 6138309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 6139314564Sdim_mm512_sll_epi32(__m512i __A, __m128i __B) 6140309124Sdim{ 6141314564Sdim return (__m512i)__builtin_ia32_pslld512((__v16si) __A, (__v4si)__B); 6142309124Sdim} 6143309124Sdim 6144309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 6145314564Sdim_mm512_mask_sll_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m128i __B) 6146309124Sdim{ 6147314564Sdim return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, 6148314564Sdim (__v16si)_mm512_sll_epi32(__A, __B), 6149314564Sdim (__v16si)__W); 6150309124Sdim} 6151309124Sdim 6152309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 6153314564Sdim_mm512_maskz_sll_epi32(__mmask16 __U, __m512i __A, __m128i __B) 6154309124Sdim{ 6155314564Sdim return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, 6156314564Sdim (__v16si)_mm512_sll_epi32(__A, __B), 6157314564Sdim (__v16si)_mm512_setzero_si512()); 6158309124Sdim} 6159309124Sdim 6160309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 6161314564Sdim_mm512_sll_epi64(__m512i __A, __m128i __B) 6162309124Sdim{ 6163314564Sdim return (__m512i)__builtin_ia32_psllq512((__v8di)__A, (__v2di)__B); 6164309124Sdim} 6165309124Sdim 6166309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 6167314564Sdim_mm512_mask_sll_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m128i __B) 6168309124Sdim{ 6169314564Sdim return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, 6170314564Sdim (__v8di)_mm512_sll_epi64(__A, __B), 6171314564Sdim (__v8di)__W); 6172309124Sdim} 6173309124Sdim 6174309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 6175314564Sdim_mm512_maskz_sll_epi64(__mmask8 __U, __m512i __A, __m128i __B) 6176309124Sdim{ 6177314564Sdim return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, 6178314564Sdim (__v8di)_mm512_sll_epi64(__A, __B), 6179314564Sdim (__v8di)_mm512_setzero_si512()); 6180309124Sdim} 6181309124Sdim 6182309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 6183314564Sdim_mm512_sllv_epi32(__m512i __X, __m512i __Y) 6184309124Sdim{ 6185314564Sdim return (__m512i)__builtin_ia32_psllv16si((__v16si)__X, (__v16si)__Y); 6186309124Sdim} 6187309124Sdim 6188309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 6189314564Sdim_mm512_mask_sllv_epi32(__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y) 6190309124Sdim{ 6191314564Sdim return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, 6192314564Sdim (__v16si)_mm512_sllv_epi32(__X, __Y), 6193314564Sdim (__v16si)__W); 6194309124Sdim} 6195309124Sdim 6196309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 6197314564Sdim_mm512_maskz_sllv_epi32(__mmask16 __U, __m512i __X, __m512i __Y) 6198309124Sdim{ 6199314564Sdim return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, 6200314564Sdim (__v16si)_mm512_sllv_epi32(__X, __Y), 6201314564Sdim (__v16si)_mm512_setzero_si512()); 6202309124Sdim} 6203309124Sdim 6204309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 6205314564Sdim_mm512_sllv_epi64(__m512i __X, __m512i __Y) 6206309124Sdim{ 6207314564Sdim return (__m512i)__builtin_ia32_psllv8di((__v8di)__X, (__v8di)__Y); 6208309124Sdim} 6209309124Sdim 6210309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 6211314564Sdim_mm512_mask_sllv_epi64(__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y) 6212309124Sdim{ 6213314564Sdim return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, 6214314564Sdim (__v8di)_mm512_sllv_epi64(__X, __Y), 6215314564Sdim (__v8di)__W); 6216309124Sdim} 6217309124Sdim 6218309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 6219314564Sdim_mm512_maskz_sllv_epi64(__mmask8 __U, __m512i __X, __m512i __Y) 6220309124Sdim{ 6221314564Sdim return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, 6222314564Sdim (__v8di)_mm512_sllv_epi64(__X, __Y), 6223314564Sdim (__v8di)_mm512_setzero_si512()); 6224309124Sdim} 6225309124Sdim 6226309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 6227314564Sdim_mm512_sra_epi32(__m512i __A, __m128i __B) 6228309124Sdim{ 6229314564Sdim return (__m512i)__builtin_ia32_psrad512((__v16si) __A, (__v4si)__B); 6230309124Sdim} 6231309124Sdim 6232309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 6233314564Sdim_mm512_mask_sra_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m128i __B) 6234309124Sdim{ 6235314564Sdim return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, 6236314564Sdim (__v16si)_mm512_sra_epi32(__A, __B), 6237314564Sdim (__v16si)__W); 6238309124Sdim} 6239309124Sdim 6240309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 6241314564Sdim_mm512_maskz_sra_epi32(__mmask16 __U, __m512i __A, __m128i __B) 6242309124Sdim{ 6243314564Sdim return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, 6244314564Sdim (__v16si)_mm512_sra_epi32(__A, __B), 6245314564Sdim (__v16si)_mm512_setzero_si512()); 6246309124Sdim} 6247309124Sdim 6248309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 6249314564Sdim_mm512_sra_epi64(__m512i __A, __m128i __B) 6250309124Sdim{ 6251314564Sdim return (__m512i)__builtin_ia32_psraq512((__v8di)__A, (__v2di)__B); 6252309124Sdim} 6253309124Sdim 6254309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 6255314564Sdim_mm512_mask_sra_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m128i __B) 6256309124Sdim{ 6257314564Sdim return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, 6258314564Sdim (__v8di)_mm512_sra_epi64(__A, __B), 6259314564Sdim (__v8di)__W); 6260309124Sdim} 6261309124Sdim 6262309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 6263314564Sdim_mm512_maskz_sra_epi64(__mmask8 __U, __m512i __A, __m128i __B) 6264309124Sdim{ 6265314564Sdim return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, 6266314564Sdim (__v8di)_mm512_sra_epi64(__A, __B), 6267314564Sdim (__v8di)_mm512_setzero_si512()); 6268309124Sdim} 6269309124Sdim 6270309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 6271314564Sdim_mm512_srav_epi32(__m512i __X, __m512i __Y) 6272309124Sdim{ 6273314564Sdim return (__m512i)__builtin_ia32_psrav16si((__v16si)__X, (__v16si)__Y); 6274309124Sdim} 6275309124Sdim 6276309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 6277314564Sdim_mm512_mask_srav_epi32(__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y) 6278309124Sdim{ 6279314564Sdim return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, 6280314564Sdim (__v16si)_mm512_srav_epi32(__X, __Y), 6281314564Sdim (__v16si)__W); 6282309124Sdim} 6283309124Sdim 6284309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 6285314564Sdim_mm512_maskz_srav_epi32(__mmask16 __U, __m512i __X, __m512i __Y) 6286309124Sdim{ 6287314564Sdim return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, 6288314564Sdim (__v16si)_mm512_srav_epi32(__X, __Y), 6289314564Sdim (__v16si)_mm512_setzero_si512()); 6290309124Sdim} 6291309124Sdim 6292309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 6293314564Sdim_mm512_srav_epi64(__m512i __X, __m512i __Y) 6294309124Sdim{ 6295314564Sdim return (__m512i)__builtin_ia32_psrav8di((__v8di)__X, (__v8di)__Y); 6296309124Sdim} 6297309124Sdim 6298309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 6299314564Sdim_mm512_mask_srav_epi64(__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y) 6300309124Sdim{ 6301314564Sdim return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, 6302314564Sdim (__v8di)_mm512_srav_epi64(__X, __Y), 6303314564Sdim (__v8di)__W); 6304309124Sdim} 6305309124Sdim 6306309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 6307314564Sdim_mm512_maskz_srav_epi64(__mmask8 __U, __m512i __X, __m512i __Y) 6308309124Sdim{ 6309314564Sdim return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, 6310314564Sdim (__v8di)_mm512_srav_epi64(__X, __Y), 6311314564Sdim (__v8di)_mm512_setzero_si512()); 6312309124Sdim} 6313309124Sdim 6314309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 6315314564Sdim_mm512_srl_epi32(__m512i __A, __m128i __B) 6316309124Sdim{ 6317314564Sdim return (__m512i)__builtin_ia32_psrld512((__v16si) __A, (__v4si)__B); 6318309124Sdim} 6319309124Sdim 6320309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 6321314564Sdim_mm512_mask_srl_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m128i __B) 6322309124Sdim{ 6323314564Sdim return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, 6324314564Sdim (__v16si)_mm512_srl_epi32(__A, __B), 6325314564Sdim (__v16si)__W); 6326309124Sdim} 6327309124Sdim 6328309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 6329314564Sdim_mm512_maskz_srl_epi32(__mmask16 __U, __m512i __A, __m128i __B) 6330309124Sdim{ 6331314564Sdim return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, 6332314564Sdim (__v16si)_mm512_srl_epi32(__A, __B), 6333314564Sdim (__v16si)_mm512_setzero_si512()); 6334309124Sdim} 6335309124Sdim 6336309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 6337314564Sdim_mm512_srl_epi64(__m512i __A, __m128i __B) 6338309124Sdim{ 6339314564Sdim return (__m512i)__builtin_ia32_psrlq512((__v8di)__A, (__v2di)__B); 6340309124Sdim} 6341309124Sdim 6342309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 6343314564Sdim_mm512_mask_srl_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m128i __B) 6344309124Sdim{ 6345314564Sdim return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, 6346314564Sdim (__v8di)_mm512_srl_epi64(__A, __B), 6347314564Sdim (__v8di)__W); 6348309124Sdim} 6349309124Sdim 6350309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 6351314564Sdim_mm512_maskz_srl_epi64(__mmask8 __U, __m512i __A, __m128i __B) 6352309124Sdim{ 6353314564Sdim return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, 6354314564Sdim (__v8di)_mm512_srl_epi64(__A, __B), 6355314564Sdim (__v8di)_mm512_setzero_si512()); 6356309124Sdim} 6357309124Sdim 6358309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 6359314564Sdim_mm512_srlv_epi32(__m512i __X, __m512i __Y) 6360309124Sdim{ 6361314564Sdim return (__m512i)__builtin_ia32_psrlv16si((__v16si)__X, (__v16si)__Y); 6362309124Sdim} 6363309124Sdim 6364309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 6365314564Sdim_mm512_mask_srlv_epi32(__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y) 6366309124Sdim{ 6367314564Sdim return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, 6368314564Sdim (__v16si)_mm512_srlv_epi32(__X, __Y), 6369314564Sdim (__v16si)__W); 6370309124Sdim} 6371309124Sdim 6372309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 6373314564Sdim_mm512_maskz_srlv_epi32(__mmask16 __U, __m512i __X, __m512i __Y) 6374309124Sdim{ 6375314564Sdim return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, 6376314564Sdim (__v16si)_mm512_srlv_epi32(__X, __Y), 6377314564Sdim (__v16si)_mm512_setzero_si512()); 6378309124Sdim} 6379309124Sdim 6380309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 6381309124Sdim_mm512_srlv_epi64 (__m512i __X, __m512i __Y) 6382309124Sdim{ 6383314564Sdim return (__m512i)__builtin_ia32_psrlv8di((__v8di)__X, (__v8di)__Y); 6384309124Sdim} 6385309124Sdim 6386309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 6387314564Sdim_mm512_mask_srlv_epi64(__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y) 6388309124Sdim{ 6389314564Sdim return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, 6390314564Sdim (__v8di)_mm512_srlv_epi64(__X, __Y), 6391314564Sdim (__v8di)__W); 6392309124Sdim} 6393309124Sdim 6394309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 6395314564Sdim_mm512_maskz_srlv_epi64(__mmask8 __U, __m512i __X, __m512i __Y) 6396309124Sdim{ 6397314564Sdim return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, 6398314564Sdim (__v8di)_mm512_srlv_epi64(__X, __Y), 6399314564Sdim (__v8di)_mm512_setzero_si512()); 6400309124Sdim} 6401309124Sdim 6402309124Sdim#define _mm512_ternarylogic_epi32(A, B, C, imm) __extension__ ({ \ 6403309124Sdim (__m512i)__builtin_ia32_pternlogd512_mask((__v16si)(__m512i)(A), \ 6404309124Sdim (__v16si)(__m512i)(B), \ 6405309124Sdim (__v16si)(__m512i)(C), (int)(imm), \ 6406309124Sdim (__mmask16)-1); }) 6407309124Sdim 6408309124Sdim#define _mm512_mask_ternarylogic_epi32(A, U, B, C, imm) __extension__ ({ \ 6409309124Sdim (__m512i)__builtin_ia32_pternlogd512_mask((__v16si)(__m512i)(A), \ 6410309124Sdim (__v16si)(__m512i)(B), \ 6411309124Sdim (__v16si)(__m512i)(C), (int)(imm), \ 6412309124Sdim (__mmask16)(U)); }) 6413309124Sdim 6414309124Sdim#define _mm512_maskz_ternarylogic_epi32(U, A, B, C, imm) __extension__ ({ \ 6415309124Sdim (__m512i)__builtin_ia32_pternlogd512_maskz((__v16si)(__m512i)(A), \ 6416309124Sdim (__v16si)(__m512i)(B), \ 6417309124Sdim (__v16si)(__m512i)(C), \ 6418309124Sdim (int)(imm), (__mmask16)(U)); }) 6419309124Sdim 6420309124Sdim#define _mm512_ternarylogic_epi64(A, B, C, imm) __extension__ ({ \ 6421309124Sdim (__m512i)__builtin_ia32_pternlogq512_mask((__v8di)(__m512i)(A), \ 6422309124Sdim (__v8di)(__m512i)(B), \ 6423309124Sdim (__v8di)(__m512i)(C), (int)(imm), \ 6424309124Sdim (__mmask8)-1); }) 6425309124Sdim 6426309124Sdim#define _mm512_mask_ternarylogic_epi64(A, U, B, C, imm) __extension__ ({ \ 6427309124Sdim (__m512i)__builtin_ia32_pternlogq512_mask((__v8di)(__m512i)(A), \ 6428309124Sdim (__v8di)(__m512i)(B), \ 6429309124Sdim (__v8di)(__m512i)(C), (int)(imm), \ 6430309124Sdim (__mmask8)(U)); }) 6431309124Sdim 6432309124Sdim#define _mm512_maskz_ternarylogic_epi64(U, A, B, C, imm) __extension__ ({ \ 6433309124Sdim (__m512i)__builtin_ia32_pternlogq512_maskz((__v8di)(__m512i)(A), \ 6434309124Sdim (__v8di)(__m512i)(B), \ 6435309124Sdim (__v8di)(__m512i)(C), (int)(imm), \ 6436309124Sdim (__mmask8)(U)); }) 6437309124Sdim 6438314564Sdim#ifdef __x86_64__ 6439309124Sdim#define _mm_cvt_roundsd_i64(A, R) __extension__ ({ \ 6440309124Sdim (long long)__builtin_ia32_vcvtsd2si64((__v2df)(__m128d)(A), (int)(R)); }) 6441314564Sdim#endif 6442309124Sdim 6443309124Sdim#define _mm_cvt_roundsd_si32(A, R) __extension__ ({ \ 6444309124Sdim (int)__builtin_ia32_vcvtsd2si32((__v2df)(__m128d)(A), (int)(R)); }) 6445309124Sdim 6446309124Sdim#define _mm_cvt_roundsd_i32(A, R) __extension__ ({ \ 6447309124Sdim (int)__builtin_ia32_vcvtsd2si32((__v2df)(__m128d)(A), (int)(R)); }) 6448309124Sdim 6449309124Sdim#define _mm_cvt_roundsd_u32(A, R) __extension__ ({ \ 6450309124Sdim (unsigned int)__builtin_ia32_vcvtsd2usi32((__v2df)(__m128d)(A), (int)(R)); }) 6451309124Sdim 6452309124Sdimstatic __inline__ unsigned __DEFAULT_FN_ATTRS 6453309124Sdim_mm_cvtsd_u32 (__m128d __A) 6454309124Sdim{ 6455309124Sdim return (unsigned) __builtin_ia32_vcvtsd2usi32 ((__v2df) __A, 6456309124Sdim _MM_FROUND_CUR_DIRECTION); 6457309124Sdim} 6458309124Sdim 6459314564Sdim#ifdef __x86_64__ 6460309124Sdim#define _mm_cvt_roundsd_u64(A, R) __extension__ ({ \ 6461309124Sdim (unsigned long long)__builtin_ia32_vcvtsd2usi64((__v2df)(__m128d)(A), \ 6462309124Sdim (int)(R)); }) 6463309124Sdim 6464309124Sdimstatic __inline__ unsigned long long __DEFAULT_FN_ATTRS 6465309124Sdim_mm_cvtsd_u64 (__m128d __A) 6466309124Sdim{ 6467309124Sdim return (unsigned long long) __builtin_ia32_vcvtsd2usi64 ((__v2df) 6468309124Sdim __A, 6469309124Sdim _MM_FROUND_CUR_DIRECTION); 6470309124Sdim} 6471314564Sdim#endif 6472309124Sdim 6473309124Sdim#define _mm_cvt_roundss_si32(A, R) __extension__ ({ \ 6474309124Sdim (int)__builtin_ia32_vcvtss2si32((__v4sf)(__m128)(A), (int)(R)); }) 6475309124Sdim 6476309124Sdim#define _mm_cvt_roundss_i32(A, R) __extension__ ({ \ 6477309124Sdim (int)__builtin_ia32_vcvtss2si32((__v4sf)(__m128)(A), (int)(R)); }) 6478309124Sdim 6479314564Sdim#ifdef __x86_64__ 6480309124Sdim#define _mm_cvt_roundss_si64(A, R) __extension__ ({ \ 6481309124Sdim (long long)__builtin_ia32_vcvtss2si64((__v4sf)(__m128)(A), (int)(R)); }) 6482309124Sdim 6483309124Sdim#define _mm_cvt_roundss_i64(A, R) __extension__ ({ \ 6484309124Sdim (long long)__builtin_ia32_vcvtss2si64((__v4sf)(__m128)(A), (int)(R)); }) 6485314564Sdim#endif 6486309124Sdim 6487309124Sdim#define _mm_cvt_roundss_u32(A, R) __extension__ ({ \ 6488309124Sdim (unsigned int)__builtin_ia32_vcvtss2usi32((__v4sf)(__m128)(A), (int)(R)); }) 6489309124Sdim 6490309124Sdimstatic __inline__ unsigned __DEFAULT_FN_ATTRS 6491309124Sdim_mm_cvtss_u32 (__m128 __A) 6492309124Sdim{ 6493309124Sdim return (unsigned) __builtin_ia32_vcvtss2usi32 ((__v4sf) __A, 6494309124Sdim _MM_FROUND_CUR_DIRECTION); 6495309124Sdim} 6496309124Sdim 6497314564Sdim#ifdef __x86_64__ 6498309124Sdim#define _mm_cvt_roundss_u64(A, R) __extension__ ({ \ 6499309124Sdim (unsigned long long)__builtin_ia32_vcvtss2usi64((__v4sf)(__m128)(A), \ 6500309124Sdim (int)(R)); }) 6501309124Sdim 6502309124Sdimstatic __inline__ unsigned long long __DEFAULT_FN_ATTRS 6503309124Sdim_mm_cvtss_u64 (__m128 __A) 6504309124Sdim{ 6505309124Sdim return (unsigned long long) __builtin_ia32_vcvtss2usi64 ((__v4sf) 6506309124Sdim __A, 6507309124Sdim _MM_FROUND_CUR_DIRECTION); 6508309124Sdim} 6509314564Sdim#endif 6510309124Sdim 6511309124Sdim#define _mm_cvtt_roundsd_i32(A, R) __extension__ ({ \ 6512309124Sdim (int)__builtin_ia32_vcvttsd2si32((__v2df)(__m128d)(A), (int)(R)); }) 6513309124Sdim 6514309124Sdim#define _mm_cvtt_roundsd_si32(A, R) __extension__ ({ \ 6515309124Sdim (int)__builtin_ia32_vcvttsd2si32((__v2df)(__m128d)(A), (int)(R)); }) 6516309124Sdim 6517309124Sdimstatic __inline__ int __DEFAULT_FN_ATTRS 6518309124Sdim_mm_cvttsd_i32 (__m128d __A) 6519309124Sdim{ 6520309124Sdim return (int) __builtin_ia32_vcvttsd2si32 ((__v2df) __A, 6521309124Sdim _MM_FROUND_CUR_DIRECTION); 6522309124Sdim} 6523309124Sdim 6524314564Sdim#ifdef __x86_64__ 6525309124Sdim#define _mm_cvtt_roundsd_si64(A, R) __extension__ ({ \ 6526309124Sdim (long long)__builtin_ia32_vcvttsd2si64((__v2df)(__m128d)(A), (int)(R)); }) 6527309124Sdim 6528309124Sdim#define _mm_cvtt_roundsd_i64(A, R) __extension__ ({ \ 6529309124Sdim (long long)__builtin_ia32_vcvttsd2si64((__v2df)(__m128d)(A), (int)(R)); }) 6530309124Sdim 6531309124Sdimstatic __inline__ long long __DEFAULT_FN_ATTRS 6532309124Sdim_mm_cvttsd_i64 (__m128d __A) 6533309124Sdim{ 6534309124Sdim return (long long) __builtin_ia32_vcvttsd2si64 ((__v2df) __A, 6535309124Sdim _MM_FROUND_CUR_DIRECTION); 6536309124Sdim} 6537314564Sdim#endif 6538309124Sdim 6539309124Sdim#define _mm_cvtt_roundsd_u32(A, R) __extension__ ({ \ 6540309124Sdim (unsigned int)__builtin_ia32_vcvttsd2usi32((__v2df)(__m128d)(A), (int)(R)); }) 6541309124Sdim 6542309124Sdimstatic __inline__ unsigned __DEFAULT_FN_ATTRS 6543309124Sdim_mm_cvttsd_u32 (__m128d __A) 6544309124Sdim{ 6545309124Sdim return (unsigned) __builtin_ia32_vcvttsd2usi32 ((__v2df) __A, 6546309124Sdim _MM_FROUND_CUR_DIRECTION); 6547309124Sdim} 6548309124Sdim 6549314564Sdim#ifdef __x86_64__ 6550309124Sdim#define _mm_cvtt_roundsd_u64(A, R) __extension__ ({ \ 6551309124Sdim (unsigned long long)__builtin_ia32_vcvttsd2usi64((__v2df)(__m128d)(A), \ 6552309124Sdim (int)(R)); }) 6553309124Sdim 6554309124Sdimstatic __inline__ unsigned long long __DEFAULT_FN_ATTRS 6555309124Sdim_mm_cvttsd_u64 (__m128d __A) 6556309124Sdim{ 6557309124Sdim return (unsigned long long) __builtin_ia32_vcvttsd2usi64 ((__v2df) 6558309124Sdim __A, 6559309124Sdim _MM_FROUND_CUR_DIRECTION); 6560309124Sdim} 6561314564Sdim#endif 6562309124Sdim 6563309124Sdim#define _mm_cvtt_roundss_i32(A, R) __extension__ ({ \ 6564309124Sdim (int)__builtin_ia32_vcvttss2si32((__v4sf)(__m128)(A), (int)(R)); }) 6565309124Sdim 6566309124Sdim#define _mm_cvtt_roundss_si32(A, R) __extension__ ({ \ 6567309124Sdim (int)__builtin_ia32_vcvttss2si32((__v4sf)(__m128)(A), (int)(R)); }) 6568309124Sdim 6569309124Sdimstatic __inline__ int __DEFAULT_FN_ATTRS 6570309124Sdim_mm_cvttss_i32 (__m128 __A) 6571309124Sdim{ 6572309124Sdim return (int) __builtin_ia32_vcvttss2si32 ((__v4sf) __A, 6573309124Sdim _MM_FROUND_CUR_DIRECTION); 6574309124Sdim} 6575309124Sdim 6576314564Sdim#ifdef __x86_64__ 6577309124Sdim#define _mm_cvtt_roundss_i64(A, R) __extension__ ({ \ 6578309124Sdim (long long)__builtin_ia32_vcvttss2si64((__v4sf)(__m128)(A), (int)(R)); }) 6579309124Sdim 6580309124Sdim#define _mm_cvtt_roundss_si64(A, R) __extension__ ({ \ 6581309124Sdim (long long)__builtin_ia32_vcvttss2si64((__v4sf)(__m128)(A), (int)(R)); }) 6582309124Sdim 6583309124Sdimstatic __inline__ long long __DEFAULT_FN_ATTRS 6584309124Sdim_mm_cvttss_i64 (__m128 __A) 6585309124Sdim{ 6586309124Sdim return (long long) __builtin_ia32_vcvttss2si64 ((__v4sf) __A, 6587309124Sdim _MM_FROUND_CUR_DIRECTION); 6588309124Sdim} 6589314564Sdim#endif 6590309124Sdim 6591309124Sdim#define _mm_cvtt_roundss_u32(A, R) __extension__ ({ \ 6592309124Sdim (unsigned int)__builtin_ia32_vcvttss2usi32((__v4sf)(__m128)(A), (int)(R)); }) 6593309124Sdim 6594309124Sdimstatic __inline__ unsigned __DEFAULT_FN_ATTRS 6595309124Sdim_mm_cvttss_u32 (__m128 __A) 6596309124Sdim{ 6597309124Sdim return (unsigned) __builtin_ia32_vcvttss2usi32 ((__v4sf) __A, 6598309124Sdim _MM_FROUND_CUR_DIRECTION); 6599309124Sdim} 6600309124Sdim 6601314564Sdim#ifdef __x86_64__ 6602309124Sdim#define _mm_cvtt_roundss_u64(A, R) __extension__ ({ \ 6603309124Sdim (unsigned long long)__builtin_ia32_vcvttss2usi64((__v4sf)(__m128)(A), \ 6604309124Sdim (int)(R)); }) 6605309124Sdim 6606309124Sdimstatic __inline__ unsigned long long __DEFAULT_FN_ATTRS 6607309124Sdim_mm_cvttss_u64 (__m128 __A) 6608309124Sdim{ 6609309124Sdim return (unsigned long long) __builtin_ia32_vcvttss2usi64 ((__v4sf) 6610309124Sdim __A, 6611309124Sdim _MM_FROUND_CUR_DIRECTION); 6612309124Sdim} 6613314564Sdim#endif 6614309124Sdim 6615309124Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS 6616309124Sdim_mm512_mask2_permutex2var_pd (__m512d __A, __m512i __I, __mmask8 __U, 6617309124Sdim __m512d __B) 6618309124Sdim{ 6619309124Sdim return (__m512d) __builtin_ia32_vpermi2varpd512_mask ((__v8df) __A, 6620309124Sdim (__v8di) __I 6621309124Sdim /* idx */ , 6622309124Sdim (__v8df) __B, 6623309124Sdim (__mmask8) __U); 6624309124Sdim} 6625309124Sdim 6626309124Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS 6627309124Sdim_mm512_mask2_permutex2var_ps (__m512 __A, __m512i __I, __mmask16 __U, 6628309124Sdim __m512 __B) 6629309124Sdim{ 6630309124Sdim return (__m512) __builtin_ia32_vpermi2varps512_mask ((__v16sf) __A, 6631309124Sdim (__v16si) __I 6632309124Sdim /* idx */ , 6633309124Sdim (__v16sf) __B, 6634309124Sdim (__mmask16) __U); 6635309124Sdim} 6636309124Sdim 6637309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 6638309124Sdim_mm512_mask2_permutex2var_epi64 (__m512i __A, __m512i __I, 6639309124Sdim __mmask8 __U, __m512i __B) 6640309124Sdim{ 6641309124Sdim return (__m512i) __builtin_ia32_vpermi2varq512_mask ((__v8di) __A, 6642309124Sdim (__v8di) __I 6643309124Sdim /* idx */ , 6644309124Sdim (__v8di) __B, 6645309124Sdim (__mmask8) __U); 6646309124Sdim} 6647309124Sdim 6648309124Sdim#define _mm512_permute_pd(X, C) __extension__ ({ \ 6649309124Sdim (__m512d)__builtin_shufflevector((__v8df)(__m512d)(X), \ 6650309124Sdim (__v8df)_mm512_undefined_pd(), \ 6651309124Sdim 0 + (((C) >> 0) & 0x1), \ 6652309124Sdim 0 + (((C) >> 1) & 0x1), \ 6653309124Sdim 2 + (((C) >> 2) & 0x1), \ 6654309124Sdim 2 + (((C) >> 3) & 0x1), \ 6655309124Sdim 4 + (((C) >> 4) & 0x1), \ 6656309124Sdim 4 + (((C) >> 5) & 0x1), \ 6657309124Sdim 6 + (((C) >> 6) & 0x1), \ 6658309124Sdim 6 + (((C) >> 7) & 0x1)); }) 6659309124Sdim 6660309124Sdim#define _mm512_mask_permute_pd(W, U, X, C) __extension__ ({ \ 6661309124Sdim (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ 6662309124Sdim (__v8df)_mm512_permute_pd((X), (C)), \ 6663309124Sdim (__v8df)(__m512d)(W)); }) 6664309124Sdim 6665309124Sdim#define _mm512_maskz_permute_pd(U, X, C) __extension__ ({ \ 6666309124Sdim (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ 6667309124Sdim (__v8df)_mm512_permute_pd((X), (C)), \ 6668309124Sdim (__v8df)_mm512_setzero_pd()); }) 6669309124Sdim 6670309124Sdim#define _mm512_permute_ps(X, C) __extension__ ({ \ 6671309124Sdim (__m512)__builtin_shufflevector((__v16sf)(__m512)(X), \ 6672309124Sdim (__v16sf)_mm512_undefined_ps(), \ 6673309124Sdim 0 + (((C) >> 0) & 0x3), \ 6674309124Sdim 0 + (((C) >> 2) & 0x3), \ 6675309124Sdim 0 + (((C) >> 4) & 0x3), \ 6676309124Sdim 0 + (((C) >> 6) & 0x3), \ 6677309124Sdim 4 + (((C) >> 0) & 0x3), \ 6678309124Sdim 4 + (((C) >> 2) & 0x3), \ 6679309124Sdim 4 + (((C) >> 4) & 0x3), \ 6680309124Sdim 4 + (((C) >> 6) & 0x3), \ 6681309124Sdim 8 + (((C) >> 0) & 0x3), \ 6682309124Sdim 8 + (((C) >> 2) & 0x3), \ 6683309124Sdim 8 + (((C) >> 4) & 0x3), \ 6684309124Sdim 8 + (((C) >> 6) & 0x3), \ 6685309124Sdim 12 + (((C) >> 0) & 0x3), \ 6686309124Sdim 12 + (((C) >> 2) & 0x3), \ 6687309124Sdim 12 + (((C) >> 4) & 0x3), \ 6688309124Sdim 12 + (((C) >> 6) & 0x3)); }) 6689309124Sdim 6690309124Sdim#define _mm512_mask_permute_ps(W, U, X, C) __extension__ ({ \ 6691309124Sdim (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ 6692309124Sdim (__v16sf)_mm512_permute_ps((X), (C)), \ 6693309124Sdim (__v16sf)(__m512)(W)); }) 6694309124Sdim 6695309124Sdim#define _mm512_maskz_permute_ps(U, X, C) __extension__ ({ \ 6696309124Sdim (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ 6697309124Sdim (__v16sf)_mm512_permute_ps((X), (C)), \ 6698309124Sdim (__v16sf)_mm512_setzero_ps()); }) 6699309124Sdim 6700309124Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS 6701314564Sdim_mm512_permutevar_pd(__m512d __A, __m512i __C) 6702309124Sdim{ 6703314564Sdim return (__m512d)__builtin_ia32_vpermilvarpd512((__v8df)__A, (__v8di)__C); 6704309124Sdim} 6705309124Sdim 6706309124Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS 6707314564Sdim_mm512_mask_permutevar_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512i __C) 6708309124Sdim{ 6709314564Sdim return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, 6710314564Sdim (__v8df)_mm512_permutevar_pd(__A, __C), 6711314564Sdim (__v8df)__W); 6712309124Sdim} 6713309124Sdim 6714309124Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS 6715314564Sdim_mm512_maskz_permutevar_pd(__mmask8 __U, __m512d __A, __m512i __C) 6716309124Sdim{ 6717314564Sdim return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, 6718314564Sdim (__v8df)_mm512_permutevar_pd(__A, __C), 6719314564Sdim (__v8df)_mm512_setzero_pd()); 6720309124Sdim} 6721309124Sdim 6722309124Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS 6723314564Sdim_mm512_permutevar_ps(__m512 __A, __m512i __C) 6724309124Sdim{ 6725314564Sdim return (__m512)__builtin_ia32_vpermilvarps512((__v16sf)__A, (__v16si)__C); 6726309124Sdim} 6727309124Sdim 6728309124Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS 6729314564Sdim_mm512_mask_permutevar_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512i __C) 6730309124Sdim{ 6731314564Sdim return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, 6732314564Sdim (__v16sf)_mm512_permutevar_ps(__A, __C), 6733314564Sdim (__v16sf)__W); 6734309124Sdim} 6735309124Sdim 6736309124Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS 6737314564Sdim_mm512_maskz_permutevar_ps(__mmask16 __U, __m512 __A, __m512i __C) 6738309124Sdim{ 6739314564Sdim return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, 6740314564Sdim (__v16sf)_mm512_permutevar_ps(__A, __C), 6741314564Sdim (__v16sf)_mm512_setzero_ps()); 6742309124Sdim} 6743309124Sdim 6744309124Sdimstatic __inline __m512d __DEFAULT_FN_ATTRS 6745309124Sdim_mm512_permutex2var_pd(__m512d __A, __m512i __I, __m512d __B) 6746309124Sdim{ 6747309124Sdim return (__m512d) __builtin_ia32_vpermt2varpd512_mask ((__v8di) __I 6748309124Sdim /* idx */ , 6749309124Sdim (__v8df) __A, 6750309124Sdim (__v8df) __B, 6751309124Sdim (__mmask8) -1); 6752309124Sdim} 6753309124Sdim 6754309124Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS 6755309124Sdim_mm512_mask_permutex2var_pd (__m512d __A, __mmask8 __U, __m512i __I, __m512d __B) 6756309124Sdim{ 6757309124Sdim return (__m512d) __builtin_ia32_vpermt2varpd512_mask ((__v8di) __I 6758309124Sdim /* idx */ , 6759309124Sdim (__v8df) __A, 6760309124Sdim (__v8df) __B, 6761309124Sdim (__mmask8) __U); 6762309124Sdim} 6763309124Sdim 6764309124Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS 6765309124Sdim_mm512_maskz_permutex2var_pd (__mmask8 __U, __m512d __A, __m512i __I, 6766309124Sdim __m512d __B) 6767309124Sdim{ 6768309124Sdim return (__m512d) __builtin_ia32_vpermt2varpd512_maskz ((__v8di) __I 6769309124Sdim /* idx */ , 6770309124Sdim (__v8df) __A, 6771309124Sdim (__v8df) __B, 6772309124Sdim (__mmask8) __U); 6773309124Sdim} 6774309124Sdim 6775309124Sdimstatic __inline __m512 __DEFAULT_FN_ATTRS 6776309124Sdim_mm512_permutex2var_ps(__m512 __A, __m512i __I, __m512 __B) 6777309124Sdim{ 6778309124Sdim return (__m512) __builtin_ia32_vpermt2varps512_mask ((__v16si) __I 6779309124Sdim /* idx */ , 6780309124Sdim (__v16sf) __A, 6781309124Sdim (__v16sf) __B, 6782309124Sdim (__mmask16) -1); 6783309124Sdim} 6784309124Sdim 6785309124Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS 6786309124Sdim_mm512_mask_permutex2var_ps (__m512 __A, __mmask16 __U, __m512i __I, __m512 __B) 6787309124Sdim{ 6788309124Sdim return (__m512) __builtin_ia32_vpermt2varps512_mask ((__v16si) __I 6789309124Sdim /* idx */ , 6790309124Sdim (__v16sf) __A, 6791309124Sdim (__v16sf) __B, 6792309124Sdim (__mmask16) __U); 6793309124Sdim} 6794309124Sdim 6795309124Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS 6796309124Sdim_mm512_maskz_permutex2var_ps (__mmask16 __U, __m512 __A, __m512i __I, 6797309124Sdim __m512 __B) 6798309124Sdim{ 6799309124Sdim return (__m512) __builtin_ia32_vpermt2varps512_maskz ((__v16si) __I 6800309124Sdim /* idx */ , 6801309124Sdim (__v16sf) __A, 6802309124Sdim (__v16sf) __B, 6803309124Sdim (__mmask16) __U); 6804309124Sdim} 6805309124Sdim 6806309124Sdimstatic __inline__ __mmask16 __DEFAULT_FN_ATTRS 6807309124Sdim_mm512_testn_epi32_mask (__m512i __A, __m512i __B) 6808309124Sdim{ 6809309124Sdim return (__mmask16) __builtin_ia32_ptestnmd512 ((__v16si) __A, 6810309124Sdim (__v16si) __B, 6811309124Sdim (__mmask16) -1); 6812309124Sdim} 6813309124Sdim 6814309124Sdimstatic __inline__ __mmask16 __DEFAULT_FN_ATTRS 6815309124Sdim_mm512_mask_testn_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B) 6816309124Sdim{ 6817309124Sdim return (__mmask16) __builtin_ia32_ptestnmd512 ((__v16si) __A, 6818309124Sdim (__v16si) __B, __U); 6819309124Sdim} 6820309124Sdim 6821309124Sdimstatic __inline__ __mmask8 __DEFAULT_FN_ATTRS 6822309124Sdim_mm512_testn_epi64_mask (__m512i __A, __m512i __B) 6823309124Sdim{ 6824309124Sdim return (__mmask8) __builtin_ia32_ptestnmq512 ((__v8di) __A, 6825309124Sdim (__v8di) __B, 6826309124Sdim (__mmask8) -1); 6827309124Sdim} 6828309124Sdim 6829309124Sdimstatic __inline__ __mmask8 __DEFAULT_FN_ATTRS 6830309124Sdim_mm512_mask_testn_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B) 6831309124Sdim{ 6832309124Sdim return (__mmask8) __builtin_ia32_ptestnmq512 ((__v8di) __A, 6833309124Sdim (__v8di) __B, __U); 6834309124Sdim} 6835309124Sdim 6836309124Sdim#define _mm512_cvtt_roundpd_epu32(A, R) __extension__ ({ \ 6837309124Sdim (__m256i)__builtin_ia32_cvttpd2udq512_mask((__v8df)(__m512d)(A), \ 6838309124Sdim (__v8si)_mm256_undefined_si256(), \ 6839309124Sdim (__mmask8)-1, (int)(R)); }) 6840309124Sdim 6841309124Sdim#define _mm512_mask_cvtt_roundpd_epu32(W, U, A, R) __extension__ ({ \ 6842309124Sdim (__m256i)__builtin_ia32_cvttpd2udq512_mask((__v8df)(__m512d)(A), \ 6843309124Sdim (__v8si)(__m256i)(W), \ 6844309124Sdim (__mmask8)(U), (int)(R)); }) 6845309124Sdim 6846309124Sdim#define _mm512_maskz_cvtt_roundpd_epu32(U, A, R) __extension__ ({ \ 6847309124Sdim (__m256i)__builtin_ia32_cvttpd2udq512_mask((__v8df)(__m512d)(A), \ 6848309124Sdim (__v8si)_mm256_setzero_si256(), \ 6849309124Sdim (__mmask8)(U), (int)(R)); }) 6850309124Sdim 6851309124Sdimstatic __inline__ __m256i __DEFAULT_FN_ATTRS 6852309124Sdim_mm512_cvttpd_epu32 (__m512d __A) 6853309124Sdim{ 6854309124Sdim return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A, 6855309124Sdim (__v8si) 6856309124Sdim _mm256_undefined_si256 (), 6857309124Sdim (__mmask8) -1, 6858309124Sdim _MM_FROUND_CUR_DIRECTION); 6859309124Sdim} 6860309124Sdim 6861309124Sdimstatic __inline__ __m256i __DEFAULT_FN_ATTRS 6862309124Sdim_mm512_mask_cvttpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A) 6863309124Sdim{ 6864309124Sdim return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A, 6865309124Sdim (__v8si) __W, 6866309124Sdim (__mmask8) __U, 6867309124Sdim _MM_FROUND_CUR_DIRECTION); 6868309124Sdim} 6869309124Sdim 6870309124Sdimstatic __inline__ __m256i __DEFAULT_FN_ATTRS 6871309124Sdim_mm512_maskz_cvttpd_epu32 (__mmask8 __U, __m512d __A) 6872309124Sdim{ 6873309124Sdim return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A, 6874309124Sdim (__v8si) 6875309124Sdim _mm256_setzero_si256 (), 6876309124Sdim (__mmask8) __U, 6877309124Sdim _MM_FROUND_CUR_DIRECTION); 6878309124Sdim} 6879309124Sdim 6880309124Sdim#define _mm_roundscale_round_sd(A, B, imm, R) __extension__ ({ \ 6881309124Sdim (__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \ 6882309124Sdim (__v2df)(__m128d)(B), \ 6883309124Sdim (__v2df)_mm_setzero_pd(), \ 6884309124Sdim (__mmask8)-1, (int)(imm), \ 6885309124Sdim (int)(R)); }) 6886309124Sdim 6887309124Sdim#define _mm_roundscale_sd(A, B, imm) __extension__ ({ \ 6888309124Sdim (__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \ 6889309124Sdim (__v2df)(__m128d)(B), \ 6890309124Sdim (__v2df)_mm_setzero_pd(), \ 6891309124Sdim (__mmask8)-1, (int)(imm), \ 6892309124Sdim _MM_FROUND_CUR_DIRECTION); }) 6893309124Sdim 6894309124Sdim#define _mm_mask_roundscale_sd(W, U, A, B, imm) __extension__ ({ \ 6895309124Sdim (__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \ 6896309124Sdim (__v2df)(__m128d)(B), \ 6897309124Sdim (__v2df)(__m128d)(W), \ 6898309124Sdim (__mmask8)(U), (int)(imm), \ 6899309124Sdim _MM_FROUND_CUR_DIRECTION); }) 6900309124Sdim 6901309124Sdim#define _mm_mask_roundscale_round_sd(W, U, A, B, I, R) __extension__ ({ \ 6902309124Sdim (__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \ 6903309124Sdim (__v2df)(__m128d)(B), \ 6904309124Sdim (__v2df)(__m128d)(W), \ 6905309124Sdim (__mmask8)(U), (int)(I), \ 6906309124Sdim (int)(R)); }) 6907309124Sdim 6908309124Sdim#define _mm_maskz_roundscale_sd(U, A, B, I) __extension__ ({ \ 6909309124Sdim (__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \ 6910309124Sdim (__v2df)(__m128d)(B), \ 6911309124Sdim (__v2df)_mm_setzero_pd(), \ 6912309124Sdim (__mmask8)(U), (int)(I), \ 6913309124Sdim _MM_FROUND_CUR_DIRECTION); }) 6914309124Sdim 6915309124Sdim#define _mm_maskz_roundscale_round_sd(U, A, B, I, R) __extension__ ({ \ 6916309124Sdim (__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \ 6917309124Sdim (__v2df)(__m128d)(B), \ 6918309124Sdim (__v2df)_mm_setzero_pd(), \ 6919309124Sdim (__mmask8)(U), (int)(I), \ 6920309124Sdim (int)(R)); }) 6921309124Sdim 6922309124Sdim#define _mm_roundscale_round_ss(A, B, imm, R) __extension__ ({ \ 6923309124Sdim (__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \ 6924309124Sdim (__v4sf)(__m128)(B), \ 6925309124Sdim (__v4sf)_mm_setzero_ps(), \ 6926309124Sdim (__mmask8)-1, (int)(imm), \ 6927309124Sdim (int)(R)); }) 6928309124Sdim 6929309124Sdim#define _mm_roundscale_ss(A, B, imm) __extension__ ({ \ 6930309124Sdim (__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \ 6931309124Sdim (__v4sf)(__m128)(B), \ 6932309124Sdim (__v4sf)_mm_setzero_ps(), \ 6933309124Sdim (__mmask8)-1, (int)(imm), \ 6934309124Sdim _MM_FROUND_CUR_DIRECTION); }) 6935309124Sdim 6936309124Sdim#define _mm_mask_roundscale_ss(W, U, A, B, I) __extension__ ({ \ 6937309124Sdim (__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \ 6938309124Sdim (__v4sf)(__m128)(B), \ 6939309124Sdim (__v4sf)(__m128)(W), \ 6940309124Sdim (__mmask8)(U), (int)(I), \ 6941309124Sdim _MM_FROUND_CUR_DIRECTION); }) 6942309124Sdim 6943309124Sdim#define _mm_mask_roundscale_round_ss(W, U, A, B, I, R) __extension__ ({ \ 6944309124Sdim (__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \ 6945309124Sdim (__v4sf)(__m128)(B), \ 6946309124Sdim (__v4sf)(__m128)(W), \ 6947309124Sdim (__mmask8)(U), (int)(I), \ 6948309124Sdim (int)(R)); }) 6949309124Sdim 6950309124Sdim#define _mm_maskz_roundscale_ss(U, A, B, I) __extension__ ({ \ 6951309124Sdim (__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \ 6952309124Sdim (__v4sf)(__m128)(B), \ 6953309124Sdim (__v4sf)_mm_setzero_ps(), \ 6954309124Sdim (__mmask8)(U), (int)(I), \ 6955309124Sdim _MM_FROUND_CUR_DIRECTION); }) 6956309124Sdim 6957309124Sdim#define _mm_maskz_roundscale_round_ss(U, A, B, I, R) __extension__ ({ \ 6958309124Sdim (__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \ 6959309124Sdim (__v4sf)(__m128)(B), \ 6960309124Sdim (__v4sf)_mm_setzero_ps(), \ 6961309124Sdim (__mmask8)(U), (int)(I), \ 6962309124Sdim (int)(R)); }) 6963309124Sdim 6964309124Sdim#define _mm512_scalef_round_pd(A, B, R) __extension__ ({ \ 6965309124Sdim (__m512d)__builtin_ia32_scalefpd512_mask((__v8df)(__m512d)(A), \ 6966309124Sdim (__v8df)(__m512d)(B), \ 6967309124Sdim (__v8df)_mm512_undefined_pd(), \ 6968309124Sdim (__mmask8)-1, (int)(R)); }) 6969309124Sdim 6970309124Sdim#define _mm512_mask_scalef_round_pd(W, U, A, B, R) __extension__ ({ \ 6971309124Sdim (__m512d)__builtin_ia32_scalefpd512_mask((__v8df)(__m512d)(A), \ 6972309124Sdim (__v8df)(__m512d)(B), \ 6973309124Sdim (__v8df)(__m512d)(W), \ 6974309124Sdim (__mmask8)(U), (int)(R)); }) 6975309124Sdim 6976309124Sdim#define _mm512_maskz_scalef_round_pd(U, A, B, R) __extension__ ({ \ 6977309124Sdim (__m512d)__builtin_ia32_scalefpd512_mask((__v8df)(__m512d)(A), \ 6978309124Sdim (__v8df)(__m512d)(B), \ 6979309124Sdim (__v8df)_mm512_setzero_pd(), \ 6980309124Sdim (__mmask8)(U), (int)(R)); }) 6981309124Sdim 6982309124Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS 6983309124Sdim_mm512_scalef_pd (__m512d __A, __m512d __B) 6984309124Sdim{ 6985309124Sdim return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A, 6986309124Sdim (__v8df) __B, 6987309124Sdim (__v8df) 6988309124Sdim _mm512_undefined_pd (), 6989309124Sdim (__mmask8) -1, 6990309124Sdim _MM_FROUND_CUR_DIRECTION); 6991309124Sdim} 6992309124Sdim 6993309124Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS 6994309124Sdim_mm512_mask_scalef_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) 6995309124Sdim{ 6996309124Sdim return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A, 6997309124Sdim (__v8df) __B, 6998309124Sdim (__v8df) __W, 6999309124Sdim (__mmask8) __U, 7000309124Sdim _MM_FROUND_CUR_DIRECTION); 7001309124Sdim} 7002309124Sdim 7003309124Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS 7004309124Sdim_mm512_maskz_scalef_pd (__mmask8 __U, __m512d __A, __m512d __B) 7005309124Sdim{ 7006309124Sdim return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A, 7007309124Sdim (__v8df) __B, 7008309124Sdim (__v8df) 7009309124Sdim _mm512_setzero_pd (), 7010309124Sdim (__mmask8) __U, 7011309124Sdim _MM_FROUND_CUR_DIRECTION); 7012309124Sdim} 7013309124Sdim 7014309124Sdim#define _mm512_scalef_round_ps(A, B, R) __extension__ ({ \ 7015309124Sdim (__m512)__builtin_ia32_scalefps512_mask((__v16sf)(__m512)(A), \ 7016309124Sdim (__v16sf)(__m512)(B), \ 7017309124Sdim (__v16sf)_mm512_undefined_ps(), \ 7018309124Sdim (__mmask16)-1, (int)(R)); }) 7019309124Sdim 7020309124Sdim#define _mm512_mask_scalef_round_ps(W, U, A, B, R) __extension__ ({ \ 7021309124Sdim (__m512)__builtin_ia32_scalefps512_mask((__v16sf)(__m512)(A), \ 7022309124Sdim (__v16sf)(__m512)(B), \ 7023309124Sdim (__v16sf)(__m512)(W), \ 7024309124Sdim (__mmask16)(U), (int)(R)); }) 7025309124Sdim 7026309124Sdim#define _mm512_maskz_scalef_round_ps(U, A, B, R) __extension__ ({ \ 7027309124Sdim (__m512)__builtin_ia32_scalefps512_mask((__v16sf)(__m512)(A), \ 7028309124Sdim (__v16sf)(__m512)(B), \ 7029309124Sdim (__v16sf)_mm512_setzero_ps(), \ 7030309124Sdim (__mmask16)(U), (int)(R)); }) 7031309124Sdim 7032309124Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS 7033309124Sdim_mm512_scalef_ps (__m512 __A, __m512 __B) 7034309124Sdim{ 7035309124Sdim return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A, 7036309124Sdim (__v16sf) __B, 7037309124Sdim (__v16sf) 7038309124Sdim _mm512_undefined_ps (), 7039309124Sdim (__mmask16) -1, 7040309124Sdim _MM_FROUND_CUR_DIRECTION); 7041309124Sdim} 7042309124Sdim 7043309124Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS 7044309124Sdim_mm512_mask_scalef_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) 7045309124Sdim{ 7046309124Sdim return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A, 7047309124Sdim (__v16sf) __B, 7048309124Sdim (__v16sf) __W, 7049309124Sdim (__mmask16) __U, 7050309124Sdim _MM_FROUND_CUR_DIRECTION); 7051309124Sdim} 7052309124Sdim 7053309124Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS 7054309124Sdim_mm512_maskz_scalef_ps (__mmask16 __U, __m512 __A, __m512 __B) 7055309124Sdim{ 7056309124Sdim return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A, 7057309124Sdim (__v16sf) __B, 7058309124Sdim (__v16sf) 7059309124Sdim _mm512_setzero_ps (), 7060309124Sdim (__mmask16) __U, 7061309124Sdim _MM_FROUND_CUR_DIRECTION); 7062309124Sdim} 7063309124Sdim 7064309124Sdim#define _mm_scalef_round_sd(A, B, R) __extension__ ({ \ 7065309124Sdim (__m128d)__builtin_ia32_scalefsd_round_mask((__v2df)(__m128d)(A), \ 7066309124Sdim (__v2df)(__m128d)(B), \ 7067309124Sdim (__v2df)_mm_setzero_pd(), \ 7068309124Sdim (__mmask8)-1, (int)(R)); }) 7069309124Sdim 7070309124Sdimstatic __inline__ __m128d __DEFAULT_FN_ATTRS 7071309124Sdim_mm_scalef_sd (__m128d __A, __m128d __B) 7072309124Sdim{ 7073309124Sdim return (__m128d) __builtin_ia32_scalefsd_round_mask ((__v2df) __A, 7074309124Sdim (__v2df)( __B), (__v2df) _mm_setzero_pd(), 7075309124Sdim (__mmask8) -1, 7076309124Sdim _MM_FROUND_CUR_DIRECTION); 7077309124Sdim} 7078309124Sdim 7079309124Sdimstatic __inline__ __m128d __DEFAULT_FN_ATTRS 7080309124Sdim_mm_mask_scalef_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) 7081309124Sdim{ 7082309124Sdim return (__m128d) __builtin_ia32_scalefsd_round_mask ( (__v2df) __A, 7083309124Sdim (__v2df) __B, 7084309124Sdim (__v2df) __W, 7085309124Sdim (__mmask8) __U, 7086309124Sdim _MM_FROUND_CUR_DIRECTION); 7087309124Sdim} 7088309124Sdim 7089309124Sdim#define _mm_mask_scalef_round_sd(W, U, A, B, R) __extension__ ({ \ 7090309124Sdim (__m128d)__builtin_ia32_scalefsd_round_mask((__v2df)(__m128d)(A), \ 7091309124Sdim (__v2df)(__m128d)(B), \ 7092309124Sdim (__v2df)(__m128d)(W), \ 7093309124Sdim (__mmask8)(U), (int)(R)); }) 7094309124Sdim 7095309124Sdimstatic __inline__ __m128d __DEFAULT_FN_ATTRS 7096309124Sdim_mm_maskz_scalef_sd (__mmask8 __U, __m128d __A, __m128d __B) 7097309124Sdim{ 7098309124Sdim return (__m128d) __builtin_ia32_scalefsd_round_mask ( (__v2df) __A, 7099309124Sdim (__v2df) __B, 7100309124Sdim (__v2df) _mm_setzero_pd (), 7101309124Sdim (__mmask8) __U, 7102309124Sdim _MM_FROUND_CUR_DIRECTION); 7103309124Sdim} 7104309124Sdim 7105309124Sdim#define _mm_maskz_scalef_round_sd(U, A, B, R) __extension__ ({ \ 7106309124Sdim (__m128d)__builtin_ia32_scalefsd_round_mask((__v2df)(__m128d)(A), \ 7107309124Sdim (__v2df)(__m128d)(B), \ 7108309124Sdim (__v2df)_mm_setzero_pd(), \ 7109309124Sdim (__mmask8)(U), (int)(R)); }) 7110309124Sdim 7111309124Sdim#define _mm_scalef_round_ss(A, B, R) __extension__ ({ \ 7112309124Sdim (__m128)__builtin_ia32_scalefss_round_mask((__v4sf)(__m128)(A), \ 7113309124Sdim (__v4sf)(__m128)(B), \ 7114309124Sdim (__v4sf)_mm_setzero_ps(), \ 7115309124Sdim (__mmask8)-1, (int)(R)); }) 7116309124Sdim 7117309124Sdimstatic __inline__ __m128 __DEFAULT_FN_ATTRS 7118309124Sdim_mm_scalef_ss (__m128 __A, __m128 __B) 7119309124Sdim{ 7120309124Sdim return (__m128) __builtin_ia32_scalefss_round_mask ((__v4sf) __A, 7121309124Sdim (__v4sf)( __B), (__v4sf) _mm_setzero_ps(), 7122309124Sdim (__mmask8) -1, 7123309124Sdim _MM_FROUND_CUR_DIRECTION); 7124309124Sdim} 7125309124Sdim 7126309124Sdimstatic __inline__ __m128 __DEFAULT_FN_ATTRS 7127309124Sdim_mm_mask_scalef_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) 7128309124Sdim{ 7129309124Sdim return (__m128) __builtin_ia32_scalefss_round_mask ( (__v4sf) __A, 7130309124Sdim (__v4sf) __B, 7131309124Sdim (__v4sf) __W, 7132309124Sdim (__mmask8) __U, 7133309124Sdim _MM_FROUND_CUR_DIRECTION); 7134309124Sdim} 7135309124Sdim 7136309124Sdim#define _mm_mask_scalef_round_ss(W, U, A, B, R) __extension__ ({ \ 7137309124Sdim (__m128)__builtin_ia32_scalefss_round_mask((__v4sf)(__m128)(A), \ 7138309124Sdim (__v4sf)(__m128)(B), \ 7139309124Sdim (__v4sf)(__m128)(W), \ 7140309124Sdim (__mmask8)(U), (int)(R)); }) 7141309124Sdim 7142309124Sdimstatic __inline__ __m128 __DEFAULT_FN_ATTRS 7143309124Sdim_mm_maskz_scalef_ss (__mmask8 __U, __m128 __A, __m128 __B) 7144309124Sdim{ 7145309124Sdim return (__m128) __builtin_ia32_scalefss_round_mask ( (__v4sf) __A, 7146309124Sdim (__v4sf) __B, 7147309124Sdim (__v4sf) _mm_setzero_ps (), 7148309124Sdim (__mmask8) __U, 7149309124Sdim _MM_FROUND_CUR_DIRECTION); 7150309124Sdim} 7151309124Sdim 7152309124Sdim#define _mm_maskz_scalef_round_ss(U, A, B, R) __extension__ ({ \ 7153309124Sdim (__m128)__builtin_ia32_scalefss_round_mask((__v4sf)(__m128)(A), \ 7154309124Sdim (__v4sf)(__m128)(B), \ 7155309124Sdim (__v4sf)_mm_setzero_ps(), \ 7156309124Sdim (__mmask8)(U), \ 7157309124Sdim _MM_FROUND_CUR_DIRECTION); }) 7158309124Sdim 7159314564Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 7160314564Sdim_mm512_srai_epi32(__m512i __A, int __B) 7161314564Sdim{ 7162314564Sdim return (__m512i)__builtin_ia32_psradi512((__v16si)__A, __B); 7163314564Sdim} 7164309124Sdim 7165314564Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 7166314564Sdim_mm512_mask_srai_epi32(__m512i __W, __mmask16 __U, __m512i __A, int __B) 7167314564Sdim{ 7168314564Sdim return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, \ 7169314564Sdim (__v16si)_mm512_srai_epi32(__A, __B), \ 7170314564Sdim (__v16si)__W); 7171314564Sdim} 7172309124Sdim 7173314564Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 7174314564Sdim_mm512_maskz_srai_epi32(__mmask16 __U, __m512i __A, int __B) { 7175314564Sdim return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, \ 7176314564Sdim (__v16si)_mm512_srai_epi32(__A, __B), \ 7177314564Sdim (__v16si)_mm512_setzero_si512()); 7178314564Sdim} 7179309124Sdim 7180314564Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 7181314564Sdim_mm512_srai_epi64(__m512i __A, int __B) 7182314564Sdim{ 7183314564Sdim return (__m512i)__builtin_ia32_psraqi512((__v8di)__A, __B); 7184314564Sdim} 7185309124Sdim 7186314564Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 7187314564Sdim_mm512_mask_srai_epi64(__m512i __W, __mmask8 __U, __m512i __A, int __B) 7188314564Sdim{ 7189314564Sdim return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, \ 7190314564Sdim (__v8di)_mm512_srai_epi64(__A, __B), \ 7191314564Sdim (__v8di)__W); 7192314564Sdim} 7193309124Sdim 7194314564Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 7195314564Sdim_mm512_maskz_srai_epi64(__mmask8 __U, __m512i __A, int __B) 7196314564Sdim{ 7197314564Sdim return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, \ 7198314564Sdim (__v8di)_mm512_srai_epi64(__A, __B), \ 7199314564Sdim (__v8di)_mm512_setzero_si512()); 7200314564Sdim} 7201309124Sdim 7202309124Sdim#define _mm512_shuffle_f32x4(A, B, imm) __extension__ ({ \ 7203309124Sdim (__m512)__builtin_ia32_shuf_f32x4_mask((__v16sf)(__m512)(A), \ 7204309124Sdim (__v16sf)(__m512)(B), (int)(imm), \ 7205309124Sdim (__v16sf)_mm512_undefined_ps(), \ 7206309124Sdim (__mmask16)-1); }) 7207309124Sdim 7208309124Sdim#define _mm512_mask_shuffle_f32x4(W, U, A, B, imm) __extension__ ({ \ 7209309124Sdim (__m512)__builtin_ia32_shuf_f32x4_mask((__v16sf)(__m512)(A), \ 7210309124Sdim (__v16sf)(__m512)(B), (int)(imm), \ 7211309124Sdim (__v16sf)(__m512)(W), \ 7212309124Sdim (__mmask16)(U)); }) 7213309124Sdim 7214309124Sdim#define _mm512_maskz_shuffle_f32x4(U, A, B, imm) __extension__ ({ \ 7215309124Sdim (__m512)__builtin_ia32_shuf_f32x4_mask((__v16sf)(__m512)(A), \ 7216309124Sdim (__v16sf)(__m512)(B), (int)(imm), \ 7217309124Sdim (__v16sf)_mm512_setzero_ps(), \ 7218309124Sdim (__mmask16)(U)); }) 7219309124Sdim 7220309124Sdim#define _mm512_shuffle_f64x2(A, B, imm) __extension__ ({ \ 7221309124Sdim (__m512d)__builtin_ia32_shuf_f64x2_mask((__v8df)(__m512d)(A), \ 7222309124Sdim (__v8df)(__m512d)(B), (int)(imm), \ 7223309124Sdim (__v8df)_mm512_undefined_pd(), \ 7224309124Sdim (__mmask8)-1); }) 7225309124Sdim 7226309124Sdim#define _mm512_mask_shuffle_f64x2(W, U, A, B, imm) __extension__ ({ \ 7227309124Sdim (__m512d)__builtin_ia32_shuf_f64x2_mask((__v8df)(__m512d)(A), \ 7228309124Sdim (__v8df)(__m512d)(B), (int)(imm), \ 7229309124Sdim (__v8df)(__m512d)(W), \ 7230309124Sdim (__mmask8)(U)); }) 7231309124Sdim 7232309124Sdim#define _mm512_maskz_shuffle_f64x2(U, A, B, imm) __extension__ ({ \ 7233309124Sdim (__m512d)__builtin_ia32_shuf_f64x2_mask((__v8df)(__m512d)(A), \ 7234309124Sdim (__v8df)(__m512d)(B), (int)(imm), \ 7235309124Sdim (__v8df)_mm512_setzero_pd(), \ 7236309124Sdim (__mmask8)(U)); }) 7237309124Sdim 7238309124Sdim#define _mm512_shuffle_i32x4(A, B, imm) __extension__ ({ \ 7239309124Sdim (__m512i)__builtin_ia32_shuf_i32x4_mask((__v16si)(__m512i)(A), \ 7240309124Sdim (__v16si)(__m512i)(B), (int)(imm), \ 7241309124Sdim (__v16si)_mm512_setzero_si512(), \ 7242309124Sdim (__mmask16)-1); }) 7243309124Sdim 7244309124Sdim#define _mm512_mask_shuffle_i32x4(W, U, A, B, imm) __extension__ ({ \ 7245309124Sdim (__m512i)__builtin_ia32_shuf_i32x4_mask((__v16si)(__m512i)(A), \ 7246309124Sdim (__v16si)(__m512i)(B), (int)(imm), \ 7247309124Sdim (__v16si)(__m512i)(W), \ 7248309124Sdim (__mmask16)(U)); }) 7249309124Sdim 7250309124Sdim#define _mm512_maskz_shuffle_i32x4(U, A, B, imm) __extension__ ({ \ 7251309124Sdim (__m512i)__builtin_ia32_shuf_i32x4_mask((__v16si)(__m512i)(A), \ 7252309124Sdim (__v16si)(__m512i)(B), (int)(imm), \ 7253309124Sdim (__v16si)_mm512_setzero_si512(), \ 7254309124Sdim (__mmask16)(U)); }) 7255309124Sdim 7256309124Sdim#define _mm512_shuffle_i64x2(A, B, imm) __extension__ ({ \ 7257309124Sdim (__m512i)__builtin_ia32_shuf_i64x2_mask((__v8di)(__m512i)(A), \ 7258309124Sdim (__v8di)(__m512i)(B), (int)(imm), \ 7259309124Sdim (__v8di)_mm512_setzero_si512(), \ 7260309124Sdim (__mmask8)-1); }) 7261309124Sdim 7262309124Sdim#define _mm512_mask_shuffle_i64x2(W, U, A, B, imm) __extension__ ({ \ 7263309124Sdim (__m512i)__builtin_ia32_shuf_i64x2_mask((__v8di)(__m512i)(A), \ 7264309124Sdim (__v8di)(__m512i)(B), (int)(imm), \ 7265309124Sdim (__v8di)(__m512i)(W), \ 7266309124Sdim (__mmask8)(U)); }) 7267309124Sdim 7268309124Sdim#define _mm512_maskz_shuffle_i64x2(U, A, B, imm) __extension__ ({ \ 7269309124Sdim (__m512i)__builtin_ia32_shuf_i64x2_mask((__v8di)(__m512i)(A), \ 7270309124Sdim (__v8di)(__m512i)(B), (int)(imm), \ 7271309124Sdim (__v8di)_mm512_setzero_si512(), \ 7272309124Sdim (__mmask8)(U)); }) 7273309124Sdim 7274309124Sdim#define _mm512_shuffle_pd(A, B, M) __extension__ ({ \ 7275309124Sdim (__m512d)__builtin_shufflevector((__v8df)(__m512d)(A), \ 7276309124Sdim (__v8df)(__m512d)(B), \ 7277309124Sdim 0 + (((M) >> 0) & 0x1), \ 7278309124Sdim 8 + (((M) >> 1) & 0x1), \ 7279309124Sdim 2 + (((M) >> 2) & 0x1), \ 7280309124Sdim 10 + (((M) >> 3) & 0x1), \ 7281309124Sdim 4 + (((M) >> 4) & 0x1), \ 7282309124Sdim 12 + (((M) >> 5) & 0x1), \ 7283309124Sdim 6 + (((M) >> 6) & 0x1), \ 7284309124Sdim 14 + (((M) >> 7) & 0x1)); }) 7285309124Sdim 7286309124Sdim#define _mm512_mask_shuffle_pd(W, U, A, B, M) __extension__ ({ \ 7287309124Sdim (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ 7288309124Sdim (__v8df)_mm512_shuffle_pd((A), (B), (M)), \ 7289309124Sdim (__v8df)(__m512d)(W)); }) 7290309124Sdim 7291309124Sdim#define _mm512_maskz_shuffle_pd(U, A, B, M) __extension__ ({ \ 7292309124Sdim (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ 7293309124Sdim (__v8df)_mm512_shuffle_pd((A), (B), (M)), \ 7294309124Sdim (__v8df)_mm512_setzero_pd()); }) 7295309124Sdim 7296309124Sdim#define _mm512_shuffle_ps(A, B, M) __extension__ ({ \ 7297309124Sdim (__m512d)__builtin_shufflevector((__v16sf)(__m512)(A), \ 7298309124Sdim (__v16sf)(__m512)(B), \ 7299309124Sdim 0 + (((M) >> 0) & 0x3), \ 7300309124Sdim 0 + (((M) >> 2) & 0x3), \ 7301309124Sdim 16 + (((M) >> 4) & 0x3), \ 7302309124Sdim 16 + (((M) >> 6) & 0x3), \ 7303309124Sdim 4 + (((M) >> 0) & 0x3), \ 7304309124Sdim 4 + (((M) >> 2) & 0x3), \ 7305309124Sdim 20 + (((M) >> 4) & 0x3), \ 7306309124Sdim 20 + (((M) >> 6) & 0x3), \ 7307309124Sdim 8 + (((M) >> 0) & 0x3), \ 7308309124Sdim 8 + (((M) >> 2) & 0x3), \ 7309309124Sdim 24 + (((M) >> 4) & 0x3), \ 7310309124Sdim 24 + (((M) >> 6) & 0x3), \ 7311309124Sdim 12 + (((M) >> 0) & 0x3), \ 7312309124Sdim 12 + (((M) >> 2) & 0x3), \ 7313309124Sdim 28 + (((M) >> 4) & 0x3), \ 7314309124Sdim 28 + (((M) >> 6) & 0x3)); }) 7315309124Sdim 7316309124Sdim#define _mm512_mask_shuffle_ps(W, U, A, B, M) __extension__ ({ \ 7317309124Sdim (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ 7318309124Sdim (__v16sf)_mm512_shuffle_ps((A), (B), (M)), \ 7319309124Sdim (__v16sf)(__m512)(W)); }) 7320309124Sdim 7321309124Sdim#define _mm512_maskz_shuffle_ps(U, A, B, M) __extension__ ({ \ 7322309124Sdim (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ 7323309124Sdim (__v16sf)_mm512_shuffle_ps((A), (B), (M)), \ 7324309124Sdim (__v16sf)_mm512_setzero_ps()); }) 7325309124Sdim 7326309124Sdim#define _mm_sqrt_round_sd(A, B, R) __extension__ ({ \ 7327309124Sdim (__m128d)__builtin_ia32_sqrtsd_round_mask((__v2df)(__m128d)(A), \ 7328309124Sdim (__v2df)(__m128d)(B), \ 7329309124Sdim (__v2df)_mm_setzero_pd(), \ 7330309124Sdim (__mmask8)-1, (int)(R)); }) 7331309124Sdim 7332309124Sdimstatic __inline__ __m128d __DEFAULT_FN_ATTRS 7333309124Sdim_mm_mask_sqrt_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) 7334309124Sdim{ 7335309124Sdim return (__m128d) __builtin_ia32_sqrtsd_round_mask ( (__v2df) __A, 7336309124Sdim (__v2df) __B, 7337309124Sdim (__v2df) __W, 7338309124Sdim (__mmask8) __U, 7339309124Sdim _MM_FROUND_CUR_DIRECTION); 7340309124Sdim} 7341309124Sdim 7342309124Sdim#define _mm_mask_sqrt_round_sd(W, U, A, B, R) __extension__ ({ \ 7343309124Sdim (__m128d)__builtin_ia32_sqrtsd_round_mask((__v2df)(__m128d)(A), \ 7344309124Sdim (__v2df)(__m128d)(B), \ 7345309124Sdim (__v2df)(__m128d)(W), \ 7346309124Sdim (__mmask8)(U), (int)(R)); }) 7347309124Sdim 7348309124Sdimstatic __inline__ __m128d __DEFAULT_FN_ATTRS 7349309124Sdim_mm_maskz_sqrt_sd (__mmask8 __U, __m128d __A, __m128d __B) 7350309124Sdim{ 7351309124Sdim return (__m128d) __builtin_ia32_sqrtsd_round_mask ( (__v2df) __A, 7352309124Sdim (__v2df) __B, 7353309124Sdim (__v2df) _mm_setzero_pd (), 7354309124Sdim (__mmask8) __U, 7355309124Sdim _MM_FROUND_CUR_DIRECTION); 7356309124Sdim} 7357309124Sdim 7358309124Sdim#define _mm_maskz_sqrt_round_sd(U, A, B, R) __extension__ ({ \ 7359309124Sdim (__m128d)__builtin_ia32_sqrtsd_round_mask((__v2df)(__m128d)(A), \ 7360309124Sdim (__v2df)(__m128d)(B), \ 7361309124Sdim (__v2df)_mm_setzero_pd(), \ 7362309124Sdim (__mmask8)(U), (int)(R)); }) 7363309124Sdim 7364309124Sdim#define _mm_sqrt_round_ss(A, B, R) __extension__ ({ \ 7365309124Sdim (__m128)__builtin_ia32_sqrtss_round_mask((__v4sf)(__m128)(A), \ 7366309124Sdim (__v4sf)(__m128)(B), \ 7367309124Sdim (__v4sf)_mm_setzero_ps(), \ 7368309124Sdim (__mmask8)-1, (int)(R)); }) 7369309124Sdim 7370309124Sdimstatic __inline__ __m128 __DEFAULT_FN_ATTRS 7371309124Sdim_mm_mask_sqrt_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) 7372309124Sdim{ 7373309124Sdim return (__m128) __builtin_ia32_sqrtss_round_mask ( (__v4sf) __A, 7374309124Sdim (__v4sf) __B, 7375309124Sdim (__v4sf) __W, 7376309124Sdim (__mmask8) __U, 7377309124Sdim _MM_FROUND_CUR_DIRECTION); 7378309124Sdim} 7379309124Sdim 7380309124Sdim#define _mm_mask_sqrt_round_ss(W, U, A, B, R) __extension__ ({ \ 7381309124Sdim (__m128)__builtin_ia32_sqrtss_round_mask((__v4sf)(__m128)(A), \ 7382309124Sdim (__v4sf)(__m128)(B), \ 7383309124Sdim (__v4sf)(__m128)(W), (__mmask8)(U), \ 7384309124Sdim (int)(R)); }) 7385309124Sdim 7386309124Sdimstatic __inline__ __m128 __DEFAULT_FN_ATTRS 7387309124Sdim_mm_maskz_sqrt_ss (__mmask8 __U, __m128 __A, __m128 __B) 7388309124Sdim{ 7389309124Sdim return (__m128) __builtin_ia32_sqrtss_round_mask ( (__v4sf) __A, 7390309124Sdim (__v4sf) __B, 7391309124Sdim (__v4sf) _mm_setzero_ps (), 7392309124Sdim (__mmask8) __U, 7393309124Sdim _MM_FROUND_CUR_DIRECTION); 7394309124Sdim} 7395309124Sdim 7396309124Sdim#define _mm_maskz_sqrt_round_ss(U, A, B, R) __extension__ ({ \ 7397309124Sdim (__m128)__builtin_ia32_sqrtss_round_mask((__v4sf)(__m128)(A), \ 7398309124Sdim (__v4sf)(__m128)(B), \ 7399309124Sdim (__v4sf)_mm_setzero_ps(), \ 7400309124Sdim (__mmask8)(U), (int)(R)); }) 7401309124Sdim 7402309124Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS 7403321369Sdim_mm512_broadcast_f32x4(__m128 __A) 7404309124Sdim{ 7405321369Sdim return (__m512)__builtin_shufflevector((__v4sf)__A, (__v4sf)__A, 7406321369Sdim 0, 1, 2, 3, 0, 1, 2, 3, 7407321369Sdim 0, 1, 2, 3, 0, 1, 2, 3); 7408309124Sdim} 7409309124Sdim 7410309124Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS 7411321369Sdim_mm512_mask_broadcast_f32x4(__m512 __O, __mmask16 __M, __m128 __A) 7412309124Sdim{ 7413321369Sdim return (__m512)__builtin_ia32_selectps_512((__mmask16)__M, 7414321369Sdim (__v16sf)_mm512_broadcast_f32x4(__A), 7415321369Sdim (__v16sf)__O); 7416309124Sdim} 7417309124Sdim 7418309124Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS 7419321369Sdim_mm512_maskz_broadcast_f32x4(__mmask16 __M, __m128 __A) 7420309124Sdim{ 7421321369Sdim return (__m512)__builtin_ia32_selectps_512((__mmask16)__M, 7422321369Sdim (__v16sf)_mm512_broadcast_f32x4(__A), 7423321369Sdim (__v16sf)_mm512_setzero_ps()); 7424309124Sdim} 7425309124Sdim 7426309124Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS 7427321369Sdim_mm512_broadcast_f64x4(__m256d __A) 7428309124Sdim{ 7429321369Sdim return (__m512d)__builtin_shufflevector((__v4df)__A, (__v4df)__A, 7430321369Sdim 0, 1, 2, 3, 0, 1, 2, 3); 7431309124Sdim} 7432309124Sdim 7433309124Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS 7434321369Sdim_mm512_mask_broadcast_f64x4(__m512d __O, __mmask8 __M, __m256d __A) 7435309124Sdim{ 7436321369Sdim return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__M, 7437321369Sdim (__v8df)_mm512_broadcast_f64x4(__A), 7438321369Sdim (__v8df)__O); 7439309124Sdim} 7440309124Sdim 7441309124Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS 7442321369Sdim_mm512_maskz_broadcast_f64x4(__mmask8 __M, __m256d __A) 7443309124Sdim{ 7444321369Sdim return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__M, 7445321369Sdim (__v8df)_mm512_broadcast_f64x4(__A), 7446321369Sdim (__v8df)_mm512_setzero_pd()); 7447309124Sdim} 7448309124Sdim 7449309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 7450321369Sdim_mm512_broadcast_i32x4(__m128i __A) 7451309124Sdim{ 7452321369Sdim return (__m512i)__builtin_shufflevector((__v4si)__A, (__v4si)__A, 7453321369Sdim 0, 1, 2, 3, 0, 1, 2, 3, 7454321369Sdim 0, 1, 2, 3, 0, 1, 2, 3); 7455309124Sdim} 7456309124Sdim 7457309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 7458321369Sdim_mm512_mask_broadcast_i32x4(__m512i __O, __mmask16 __M, __m128i __A) 7459309124Sdim{ 7460321369Sdim return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M, 7461321369Sdim (__v16si)_mm512_broadcast_i32x4(__A), 7462321369Sdim (__v16si)__O); 7463309124Sdim} 7464309124Sdim 7465309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 7466321369Sdim_mm512_maskz_broadcast_i32x4(__mmask16 __M, __m128i __A) 7467309124Sdim{ 7468321369Sdim return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M, 7469321369Sdim (__v16si)_mm512_broadcast_i32x4(__A), 7470321369Sdim (__v16si)_mm512_setzero_si512()); 7471309124Sdim} 7472309124Sdim 7473309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 7474321369Sdim_mm512_broadcast_i64x4(__m256i __A) 7475309124Sdim{ 7476321369Sdim return (__m512i)__builtin_shufflevector((__v4di)__A, (__v4di)__A, 7477321369Sdim 0, 1, 2, 3, 0, 1, 2, 3); 7478309124Sdim} 7479309124Sdim 7480309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 7481321369Sdim_mm512_mask_broadcast_i64x4(__m512i __O, __mmask8 __M, __m256i __A) 7482309124Sdim{ 7483321369Sdim return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M, 7484321369Sdim (__v8di)_mm512_broadcast_i64x4(__A), 7485321369Sdim (__v8di)__O); 7486309124Sdim} 7487309124Sdim 7488309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 7489321369Sdim_mm512_maskz_broadcast_i64x4(__mmask8 __M, __m256i __A) 7490309124Sdim{ 7491321369Sdim return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M, 7492321369Sdim (__v8di)_mm512_broadcast_i64x4(__A), 7493321369Sdim (__v8di)_mm512_setzero_si512()); 7494309124Sdim} 7495309124Sdim 7496309124Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS 7497309124Sdim_mm512_mask_broadcastsd_pd (__m512d __O, __mmask8 __M, __m128d __A) 7498309124Sdim{ 7499309124Sdim return (__m512d)__builtin_ia32_selectpd_512(__M, 7500309124Sdim (__v8df) _mm512_broadcastsd_pd(__A), 7501309124Sdim (__v8df) __O); 7502309124Sdim} 7503309124Sdim 7504309124Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS 7505309124Sdim_mm512_maskz_broadcastsd_pd (__mmask8 __M, __m128d __A) 7506309124Sdim{ 7507309124Sdim return (__m512d)__builtin_ia32_selectpd_512(__M, 7508309124Sdim (__v8df) _mm512_broadcastsd_pd(__A), 7509309124Sdim (__v8df) _mm512_setzero_pd()); 7510309124Sdim} 7511309124Sdim 7512309124Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS 7513309124Sdim_mm512_mask_broadcastss_ps (__m512 __O, __mmask16 __M, __m128 __A) 7514309124Sdim{ 7515309124Sdim return (__m512)__builtin_ia32_selectps_512(__M, 7516309124Sdim (__v16sf) _mm512_broadcastss_ps(__A), 7517309124Sdim (__v16sf) __O); 7518309124Sdim} 7519309124Sdim 7520309124Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS 7521309124Sdim_mm512_maskz_broadcastss_ps (__mmask16 __M, __m128 __A) 7522309124Sdim{ 7523309124Sdim return (__m512)__builtin_ia32_selectps_512(__M, 7524309124Sdim (__v16sf) _mm512_broadcastss_ps(__A), 7525309124Sdim (__v16sf) _mm512_setzero_ps()); 7526309124Sdim} 7527309124Sdim 7528309124Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS 7529309124Sdim_mm512_cvtsepi32_epi8 (__m512i __A) 7530309124Sdim{ 7531309124Sdim return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A, 7532309124Sdim (__v16qi) _mm_undefined_si128 (), 7533309124Sdim (__mmask16) -1); 7534309124Sdim} 7535309124Sdim 7536309124Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS 7537309124Sdim_mm512_mask_cvtsepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A) 7538309124Sdim{ 7539309124Sdim return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A, 7540309124Sdim (__v16qi) __O, __M); 7541309124Sdim} 7542309124Sdim 7543309124Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS 7544309124Sdim_mm512_maskz_cvtsepi32_epi8 (__mmask16 __M, __m512i __A) 7545309124Sdim{ 7546309124Sdim return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A, 7547309124Sdim (__v16qi) _mm_setzero_si128 (), 7548309124Sdim __M); 7549309124Sdim} 7550309124Sdim 7551309124Sdimstatic __inline__ void __DEFAULT_FN_ATTRS 7552309124Sdim_mm512_mask_cvtsepi32_storeu_epi8 (void * __P, __mmask16 __M, __m512i __A) 7553309124Sdim{ 7554309124Sdim __builtin_ia32_pmovsdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M); 7555309124Sdim} 7556309124Sdim 7557309124Sdimstatic __inline__ __m256i __DEFAULT_FN_ATTRS 7558309124Sdim_mm512_cvtsepi32_epi16 (__m512i __A) 7559309124Sdim{ 7560309124Sdim return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A, 7561309124Sdim (__v16hi) _mm256_undefined_si256 (), 7562309124Sdim (__mmask16) -1); 7563309124Sdim} 7564309124Sdim 7565309124Sdimstatic __inline__ __m256i __DEFAULT_FN_ATTRS 7566309124Sdim_mm512_mask_cvtsepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A) 7567309124Sdim{ 7568309124Sdim return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A, 7569309124Sdim (__v16hi) __O, __M); 7570309124Sdim} 7571309124Sdim 7572309124Sdimstatic __inline__ __m256i __DEFAULT_FN_ATTRS 7573309124Sdim_mm512_maskz_cvtsepi32_epi16 (__mmask16 __M, __m512i __A) 7574309124Sdim{ 7575309124Sdim return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A, 7576309124Sdim (__v16hi) _mm256_setzero_si256 (), 7577309124Sdim __M); 7578309124Sdim} 7579309124Sdim 7580309124Sdimstatic __inline__ void __DEFAULT_FN_ATTRS 7581309124Sdim_mm512_mask_cvtsepi32_storeu_epi16 (void *__P, __mmask16 __M, __m512i __A) 7582309124Sdim{ 7583309124Sdim __builtin_ia32_pmovsdw512mem_mask ((__v16hi*) __P, (__v16si) __A, __M); 7584309124Sdim} 7585309124Sdim 7586309124Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS 7587309124Sdim_mm512_cvtsepi64_epi8 (__m512i __A) 7588309124Sdim{ 7589309124Sdim return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A, 7590309124Sdim (__v16qi) _mm_undefined_si128 (), 7591309124Sdim (__mmask8) -1); 7592309124Sdim} 7593309124Sdim 7594309124Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS 7595309124Sdim_mm512_mask_cvtsepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A) 7596309124Sdim{ 7597309124Sdim return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A, 7598309124Sdim (__v16qi) __O, __M); 7599309124Sdim} 7600309124Sdim 7601309124Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS 7602309124Sdim_mm512_maskz_cvtsepi64_epi8 (__mmask8 __M, __m512i __A) 7603309124Sdim{ 7604309124Sdim return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A, 7605309124Sdim (__v16qi) _mm_setzero_si128 (), 7606309124Sdim __M); 7607309124Sdim} 7608309124Sdim 7609309124Sdimstatic __inline__ void __DEFAULT_FN_ATTRS 7610309124Sdim_mm512_mask_cvtsepi64_storeu_epi8 (void * __P, __mmask8 __M, __m512i __A) 7611309124Sdim{ 7612309124Sdim __builtin_ia32_pmovsqb512mem_mask ((__v16qi *) __P, (__v8di) __A, __M); 7613309124Sdim} 7614309124Sdim 7615309124Sdimstatic __inline__ __m256i __DEFAULT_FN_ATTRS 7616309124Sdim_mm512_cvtsepi64_epi32 (__m512i __A) 7617309124Sdim{ 7618309124Sdim return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A, 7619309124Sdim (__v8si) _mm256_undefined_si256 (), 7620309124Sdim (__mmask8) -1); 7621309124Sdim} 7622309124Sdim 7623309124Sdimstatic __inline__ __m256i __DEFAULT_FN_ATTRS 7624309124Sdim_mm512_mask_cvtsepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A) 7625309124Sdim{ 7626309124Sdim return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A, 7627309124Sdim (__v8si) __O, __M); 7628309124Sdim} 7629309124Sdim 7630309124Sdimstatic __inline__ __m256i __DEFAULT_FN_ATTRS 7631309124Sdim_mm512_maskz_cvtsepi64_epi32 (__mmask8 __M, __m512i __A) 7632309124Sdim{ 7633309124Sdim return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A, 7634309124Sdim (__v8si) _mm256_setzero_si256 (), 7635309124Sdim __M); 7636309124Sdim} 7637309124Sdim 7638309124Sdimstatic __inline__ void __DEFAULT_FN_ATTRS 7639309124Sdim_mm512_mask_cvtsepi64_storeu_epi32 (void *__P, __mmask8 __M, __m512i __A) 7640309124Sdim{ 7641309124Sdim __builtin_ia32_pmovsqd512mem_mask ((__v8si *) __P, (__v8di) __A, __M); 7642309124Sdim} 7643309124Sdim 7644309124Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS 7645309124Sdim_mm512_cvtsepi64_epi16 (__m512i __A) 7646309124Sdim{ 7647309124Sdim return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A, 7648309124Sdim (__v8hi) _mm_undefined_si128 (), 7649309124Sdim (__mmask8) -1); 7650309124Sdim} 7651309124Sdim 7652309124Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS 7653309124Sdim_mm512_mask_cvtsepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A) 7654309124Sdim{ 7655309124Sdim return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A, 7656309124Sdim (__v8hi) __O, __M); 7657309124Sdim} 7658309124Sdim 7659309124Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS 7660309124Sdim_mm512_maskz_cvtsepi64_epi16 (__mmask8 __M, __m512i __A) 7661309124Sdim{ 7662309124Sdim return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A, 7663309124Sdim (__v8hi) _mm_setzero_si128 (), 7664309124Sdim __M); 7665309124Sdim} 7666309124Sdim 7667309124Sdimstatic __inline__ void __DEFAULT_FN_ATTRS 7668309124Sdim_mm512_mask_cvtsepi64_storeu_epi16 (void * __P, __mmask8 __M, __m512i __A) 7669309124Sdim{ 7670309124Sdim __builtin_ia32_pmovsqw512mem_mask ((__v8hi *) __P, (__v8di) __A, __M); 7671309124Sdim} 7672309124Sdim 7673309124Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS 7674309124Sdim_mm512_cvtusepi32_epi8 (__m512i __A) 7675309124Sdim{ 7676309124Sdim return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A, 7677309124Sdim (__v16qi) _mm_undefined_si128 (), 7678309124Sdim (__mmask16) -1); 7679309124Sdim} 7680309124Sdim 7681309124Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS 7682309124Sdim_mm512_mask_cvtusepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A) 7683309124Sdim{ 7684309124Sdim return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A, 7685309124Sdim (__v16qi) __O, 7686309124Sdim __M); 7687309124Sdim} 7688309124Sdim 7689309124Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS 7690309124Sdim_mm512_maskz_cvtusepi32_epi8 (__mmask16 __M, __m512i __A) 7691309124Sdim{ 7692309124Sdim return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A, 7693309124Sdim (__v16qi) _mm_setzero_si128 (), 7694309124Sdim __M); 7695309124Sdim} 7696309124Sdim 7697309124Sdimstatic __inline__ void __DEFAULT_FN_ATTRS 7698309124Sdim_mm512_mask_cvtusepi32_storeu_epi8 (void * __P, __mmask16 __M, __m512i __A) 7699309124Sdim{ 7700309124Sdim __builtin_ia32_pmovusdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M); 7701309124Sdim} 7702309124Sdim 7703309124Sdimstatic __inline__ __m256i __DEFAULT_FN_ATTRS 7704309124Sdim_mm512_cvtusepi32_epi16 (__m512i __A) 7705309124Sdim{ 7706309124Sdim return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A, 7707309124Sdim (__v16hi) _mm256_undefined_si256 (), 7708309124Sdim (__mmask16) -1); 7709309124Sdim} 7710309124Sdim 7711309124Sdimstatic __inline__ __m256i __DEFAULT_FN_ATTRS 7712309124Sdim_mm512_mask_cvtusepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A) 7713309124Sdim{ 7714309124Sdim return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A, 7715309124Sdim (__v16hi) __O, 7716309124Sdim __M); 7717309124Sdim} 7718309124Sdim 7719309124Sdimstatic __inline__ __m256i __DEFAULT_FN_ATTRS 7720309124Sdim_mm512_maskz_cvtusepi32_epi16 (__mmask16 __M, __m512i __A) 7721309124Sdim{ 7722309124Sdim return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A, 7723309124Sdim (__v16hi) _mm256_setzero_si256 (), 7724309124Sdim __M); 7725309124Sdim} 7726309124Sdim 7727309124Sdimstatic __inline__ void __DEFAULT_FN_ATTRS 7728309124Sdim_mm512_mask_cvtusepi32_storeu_epi16 (void *__P, __mmask16 __M, __m512i __A) 7729309124Sdim{ 7730309124Sdim __builtin_ia32_pmovusdw512mem_mask ((__v16hi*) __P, (__v16si) __A, __M); 7731309124Sdim} 7732309124Sdim 7733309124Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS 7734309124Sdim_mm512_cvtusepi64_epi8 (__m512i __A) 7735309124Sdim{ 7736309124Sdim return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A, 7737309124Sdim (__v16qi) _mm_undefined_si128 (), 7738309124Sdim (__mmask8) -1); 7739309124Sdim} 7740309124Sdim 7741309124Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS 7742309124Sdim_mm512_mask_cvtusepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A) 7743309124Sdim{ 7744309124Sdim return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A, 7745309124Sdim (__v16qi) __O, 7746309124Sdim __M); 7747309124Sdim} 7748309124Sdim 7749309124Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS 7750309124Sdim_mm512_maskz_cvtusepi64_epi8 (__mmask8 __M, __m512i __A) 7751309124Sdim{ 7752309124Sdim return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A, 7753309124Sdim (__v16qi) _mm_setzero_si128 (), 7754309124Sdim __M); 7755309124Sdim} 7756309124Sdim 7757309124Sdimstatic __inline__ void __DEFAULT_FN_ATTRS 7758309124Sdim_mm512_mask_cvtusepi64_storeu_epi8 (void * __P, __mmask8 __M, __m512i __A) 7759309124Sdim{ 7760309124Sdim __builtin_ia32_pmovusqb512mem_mask ((__v16qi *) __P, (__v8di) __A, __M); 7761309124Sdim} 7762309124Sdim 7763309124Sdimstatic __inline__ __m256i __DEFAULT_FN_ATTRS 7764309124Sdim_mm512_cvtusepi64_epi32 (__m512i __A) 7765309124Sdim{ 7766309124Sdim return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A, 7767309124Sdim (__v8si) _mm256_undefined_si256 (), 7768309124Sdim (__mmask8) -1); 7769309124Sdim} 7770309124Sdim 7771309124Sdimstatic __inline__ __m256i __DEFAULT_FN_ATTRS 7772309124Sdim_mm512_mask_cvtusepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A) 7773309124Sdim{ 7774309124Sdim return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A, 7775309124Sdim (__v8si) __O, __M); 7776309124Sdim} 7777309124Sdim 7778309124Sdimstatic __inline__ __m256i __DEFAULT_FN_ATTRS 7779309124Sdim_mm512_maskz_cvtusepi64_epi32 (__mmask8 __M, __m512i __A) 7780309124Sdim{ 7781309124Sdim return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A, 7782309124Sdim (__v8si) _mm256_setzero_si256 (), 7783309124Sdim __M); 7784309124Sdim} 7785309124Sdim 7786309124Sdimstatic __inline__ void __DEFAULT_FN_ATTRS 7787309124Sdim_mm512_mask_cvtusepi64_storeu_epi32 (void* __P, __mmask8 __M, __m512i __A) 7788309124Sdim{ 7789309124Sdim __builtin_ia32_pmovusqd512mem_mask ((__v8si*) __P, (__v8di) __A, __M); 7790309124Sdim} 7791309124Sdim 7792309124Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS 7793309124Sdim_mm512_cvtusepi64_epi16 (__m512i __A) 7794309124Sdim{ 7795309124Sdim return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A, 7796309124Sdim (__v8hi) _mm_undefined_si128 (), 7797309124Sdim (__mmask8) -1); 7798309124Sdim} 7799309124Sdim 7800309124Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS 7801309124Sdim_mm512_mask_cvtusepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A) 7802309124Sdim{ 7803309124Sdim return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A, 7804309124Sdim (__v8hi) __O, __M); 7805309124Sdim} 7806309124Sdim 7807309124Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS 7808309124Sdim_mm512_maskz_cvtusepi64_epi16 (__mmask8 __M, __m512i __A) 7809309124Sdim{ 7810309124Sdim return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A, 7811309124Sdim (__v8hi) _mm_setzero_si128 (), 7812309124Sdim __M); 7813309124Sdim} 7814309124Sdim 7815309124Sdimstatic __inline__ void __DEFAULT_FN_ATTRS 7816309124Sdim_mm512_mask_cvtusepi64_storeu_epi16 (void *__P, __mmask8 __M, __m512i __A) 7817309124Sdim{ 7818309124Sdim __builtin_ia32_pmovusqw512mem_mask ((__v8hi*) __P, (__v8di) __A, __M); 7819309124Sdim} 7820309124Sdim 7821309124Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS 7822309124Sdim_mm512_cvtepi32_epi8 (__m512i __A) 7823309124Sdim{ 7824309124Sdim return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A, 7825309124Sdim (__v16qi) _mm_undefined_si128 (), 7826309124Sdim (__mmask16) -1); 7827309124Sdim} 7828309124Sdim 7829309124Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS 7830309124Sdim_mm512_mask_cvtepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A) 7831309124Sdim{ 7832309124Sdim return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A, 7833309124Sdim (__v16qi) __O, __M); 7834309124Sdim} 7835309124Sdim 7836309124Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS 7837309124Sdim_mm512_maskz_cvtepi32_epi8 (__mmask16 __M, __m512i __A) 7838309124Sdim{ 7839309124Sdim return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A, 7840309124Sdim (__v16qi) _mm_setzero_si128 (), 7841309124Sdim __M); 7842309124Sdim} 7843309124Sdim 7844309124Sdimstatic __inline__ void __DEFAULT_FN_ATTRS 7845309124Sdim_mm512_mask_cvtepi32_storeu_epi8 (void * __P, __mmask16 __M, __m512i __A) 7846309124Sdim{ 7847309124Sdim __builtin_ia32_pmovdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M); 7848309124Sdim} 7849309124Sdim 7850309124Sdimstatic __inline__ __m256i __DEFAULT_FN_ATTRS 7851309124Sdim_mm512_cvtepi32_epi16 (__m512i __A) 7852309124Sdim{ 7853309124Sdim return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A, 7854309124Sdim (__v16hi) _mm256_undefined_si256 (), 7855309124Sdim (__mmask16) -1); 7856309124Sdim} 7857309124Sdim 7858309124Sdimstatic __inline__ __m256i __DEFAULT_FN_ATTRS 7859309124Sdim_mm512_mask_cvtepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A) 7860309124Sdim{ 7861309124Sdim return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A, 7862309124Sdim (__v16hi) __O, __M); 7863309124Sdim} 7864309124Sdim 7865309124Sdimstatic __inline__ __m256i __DEFAULT_FN_ATTRS 7866309124Sdim_mm512_maskz_cvtepi32_epi16 (__mmask16 __M, __m512i __A) 7867309124Sdim{ 7868309124Sdim return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A, 7869309124Sdim (__v16hi) _mm256_setzero_si256 (), 7870309124Sdim __M); 7871309124Sdim} 7872309124Sdim 7873309124Sdimstatic __inline__ void __DEFAULT_FN_ATTRS 7874309124Sdim_mm512_mask_cvtepi32_storeu_epi16 (void * __P, __mmask16 __M, __m512i __A) 7875309124Sdim{ 7876309124Sdim __builtin_ia32_pmovdw512mem_mask ((__v16hi *) __P, (__v16si) __A, __M); 7877309124Sdim} 7878309124Sdim 7879309124Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS 7880309124Sdim_mm512_cvtepi64_epi8 (__m512i __A) 7881309124Sdim{ 7882309124Sdim return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A, 7883309124Sdim (__v16qi) _mm_undefined_si128 (), 7884309124Sdim (__mmask8) -1); 7885309124Sdim} 7886309124Sdim 7887309124Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS 7888309124Sdim_mm512_mask_cvtepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A) 7889309124Sdim{ 7890309124Sdim return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A, 7891309124Sdim (__v16qi) __O, __M); 7892309124Sdim} 7893309124Sdim 7894309124Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS 7895309124Sdim_mm512_maskz_cvtepi64_epi8 (__mmask8 __M, __m512i __A) 7896309124Sdim{ 7897309124Sdim return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A, 7898309124Sdim (__v16qi) _mm_setzero_si128 (), 7899309124Sdim __M); 7900309124Sdim} 7901309124Sdim 7902309124Sdimstatic __inline__ void __DEFAULT_FN_ATTRS 7903309124Sdim_mm512_mask_cvtepi64_storeu_epi8 (void * __P, __mmask8 __M, __m512i __A) 7904309124Sdim{ 7905309124Sdim __builtin_ia32_pmovqb512mem_mask ((__v16qi *) __P, (__v8di) __A, __M); 7906309124Sdim} 7907309124Sdim 7908309124Sdimstatic __inline__ __m256i __DEFAULT_FN_ATTRS 7909309124Sdim_mm512_cvtepi64_epi32 (__m512i __A) 7910309124Sdim{ 7911309124Sdim return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A, 7912309124Sdim (__v8si) _mm256_undefined_si256 (), 7913309124Sdim (__mmask8) -1); 7914309124Sdim} 7915309124Sdim 7916309124Sdimstatic __inline__ __m256i __DEFAULT_FN_ATTRS 7917309124Sdim_mm512_mask_cvtepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A) 7918309124Sdim{ 7919309124Sdim return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A, 7920309124Sdim (__v8si) __O, __M); 7921309124Sdim} 7922309124Sdim 7923309124Sdimstatic __inline__ __m256i __DEFAULT_FN_ATTRS 7924309124Sdim_mm512_maskz_cvtepi64_epi32 (__mmask8 __M, __m512i __A) 7925309124Sdim{ 7926309124Sdim return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A, 7927309124Sdim (__v8si) _mm256_setzero_si256 (), 7928309124Sdim __M); 7929309124Sdim} 7930309124Sdim 7931309124Sdimstatic __inline__ void __DEFAULT_FN_ATTRS 7932309124Sdim_mm512_mask_cvtepi64_storeu_epi32 (void* __P, __mmask8 __M, __m512i __A) 7933309124Sdim{ 7934309124Sdim __builtin_ia32_pmovqd512mem_mask ((__v8si *) __P, (__v8di) __A, __M); 7935309124Sdim} 7936309124Sdim 7937309124Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS 7938309124Sdim_mm512_cvtepi64_epi16 (__m512i __A) 7939309124Sdim{ 7940309124Sdim return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A, 7941309124Sdim (__v8hi) _mm_undefined_si128 (), 7942309124Sdim (__mmask8) -1); 7943309124Sdim} 7944309124Sdim 7945309124Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS 7946309124Sdim_mm512_mask_cvtepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A) 7947309124Sdim{ 7948309124Sdim return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A, 7949309124Sdim (__v8hi) __O, __M); 7950309124Sdim} 7951309124Sdim 7952309124Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS 7953309124Sdim_mm512_maskz_cvtepi64_epi16 (__mmask8 __M, __m512i __A) 7954309124Sdim{ 7955309124Sdim return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A, 7956309124Sdim (__v8hi) _mm_setzero_si128 (), 7957309124Sdim __M); 7958309124Sdim} 7959309124Sdim 7960309124Sdimstatic __inline__ void __DEFAULT_FN_ATTRS 7961309124Sdim_mm512_mask_cvtepi64_storeu_epi16 (void *__P, __mmask8 __M, __m512i __A) 7962309124Sdim{ 7963309124Sdim __builtin_ia32_pmovqw512mem_mask ((__v8hi *) __P, (__v8di) __A, __M); 7964309124Sdim} 7965309124Sdim 7966314564Sdim#define _mm512_extracti32x4_epi32(A, imm) __extension__ ({ \ 7967314564Sdim (__m128i)__builtin_shufflevector((__v16si)(__m512i)(A), \ 7968314564Sdim (__v16si)_mm512_undefined_epi32(), \ 7969314564Sdim 0 + ((imm) & 0x3) * 4, \ 7970314564Sdim 1 + ((imm) & 0x3) * 4, \ 7971314564Sdim 2 + ((imm) & 0x3) * 4, \ 7972314564Sdim 3 + ((imm) & 0x3) * 4); }) 7973309124Sdim 7974309124Sdim#define _mm512_mask_extracti32x4_epi32(W, U, A, imm) __extension__ ({ \ 7975321369Sdim (__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \ 7976314564Sdim (__v4si)_mm512_extracti32x4_epi32((A), (imm)), \ 7977321369Sdim (__v4si)(W)); }) 7978309124Sdim 7979309124Sdim#define _mm512_maskz_extracti32x4_epi32(U, A, imm) __extension__ ({ \ 7980321369Sdim (__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \ 7981314564Sdim (__v4si)_mm512_extracti32x4_epi32((A), (imm)), \ 7982314564Sdim (__v4si)_mm_setzero_si128()); }) 7983309124Sdim 7984314564Sdim#define _mm512_extracti64x4_epi64(A, imm) __extension__ ({ \ 7985314564Sdim (__m256i)__builtin_shufflevector((__v8di)(__m512i)(A), \ 7986314564Sdim (__v8di)_mm512_undefined_epi32(), \ 7987314564Sdim ((imm) & 1) ? 4 : 0, \ 7988314564Sdim ((imm) & 1) ? 5 : 1, \ 7989314564Sdim ((imm) & 1) ? 6 : 2, \ 7990314564Sdim ((imm) & 1) ? 7 : 3); }) 7991309124Sdim 7992309124Sdim#define _mm512_mask_extracti64x4_epi64(W, U, A, imm) __extension__ ({ \ 7993321369Sdim (__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \ 7994314564Sdim (__v4di)_mm512_extracti64x4_epi64((A), (imm)), \ 7995321369Sdim (__v4di)(W)); }) 7996309124Sdim 7997309124Sdim#define _mm512_maskz_extracti64x4_epi64(U, A, imm) __extension__ ({ \ 7998321369Sdim (__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \ 7999314564Sdim (__v4di)_mm512_extracti64x4_epi64((A), (imm)), \ 8000314564Sdim (__v4di)_mm256_setzero_si256()); }) 8001309124Sdim 8002309124Sdim#define _mm512_insertf64x4(A, B, imm) __extension__ ({ \ 8003314564Sdim (__m512d)__builtin_shufflevector((__v8df)(__m512d)(A), \ 8004314564Sdim (__v8df)_mm512_castpd256_pd512((__m256d)(B)), \ 8005314564Sdim ((imm) & 0x1) ? 0 : 8, \ 8006314564Sdim ((imm) & 0x1) ? 1 : 9, \ 8007314564Sdim ((imm) & 0x1) ? 2 : 10, \ 8008314564Sdim ((imm) & 0x1) ? 3 : 11, \ 8009314564Sdim ((imm) & 0x1) ? 8 : 4, \ 8010314564Sdim ((imm) & 0x1) ? 9 : 5, \ 8011314564Sdim ((imm) & 0x1) ? 10 : 6, \ 8012314564Sdim ((imm) & 0x1) ? 11 : 7); }) 8013309124Sdim 8014309124Sdim#define _mm512_mask_insertf64x4(W, U, A, B, imm) __extension__ ({ \ 8015314564Sdim (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ 8016314564Sdim (__v8df)_mm512_insertf64x4((A), (B), (imm)), \ 8017314564Sdim (__v8df)(W)); }) 8018309124Sdim 8019309124Sdim#define _mm512_maskz_insertf64x4(U, A, B, imm) __extension__ ({ \ 8020314564Sdim (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ 8021314564Sdim (__v8df)_mm512_insertf64x4((A), (B), (imm)), \ 8022314564Sdim (__v8df)_mm512_setzero_pd()); }) 8023309124Sdim 8024309124Sdim#define _mm512_inserti64x4(A, B, imm) __extension__ ({ \ 8025314564Sdim (__m512i)__builtin_shufflevector((__v8di)(__m512i)(A), \ 8026314564Sdim (__v8di)_mm512_castsi256_si512((__m256i)(B)), \ 8027314564Sdim ((imm) & 0x1) ? 0 : 8, \ 8028314564Sdim ((imm) & 0x1) ? 1 : 9, \ 8029314564Sdim ((imm) & 0x1) ? 2 : 10, \ 8030314564Sdim ((imm) & 0x1) ? 3 : 11, \ 8031314564Sdim ((imm) & 0x1) ? 8 : 4, \ 8032314564Sdim ((imm) & 0x1) ? 9 : 5, \ 8033314564Sdim ((imm) & 0x1) ? 10 : 6, \ 8034314564Sdim ((imm) & 0x1) ? 11 : 7); }) 8035309124Sdim 8036309124Sdim#define _mm512_mask_inserti64x4(W, U, A, B, imm) __extension__ ({ \ 8037314564Sdim (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \ 8038314564Sdim (__v8di)_mm512_inserti64x4((A), (B), (imm)), \ 8039314564Sdim (__v8di)(W)); }) 8040309124Sdim 8041309124Sdim#define _mm512_maskz_inserti64x4(U, A, B, imm) __extension__ ({ \ 8042314564Sdim (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \ 8043314564Sdim (__v8di)_mm512_inserti64x4((A), (B), (imm)), \ 8044314564Sdim (__v8di)_mm512_setzero_si512()); }) 8045309124Sdim 8046309124Sdim#define _mm512_insertf32x4(A, B, imm) __extension__ ({ \ 8047314564Sdim (__m512)__builtin_shufflevector((__v16sf)(__m512)(A), \ 8048314564Sdim (__v16sf)_mm512_castps128_ps512((__m128)(B)),\ 8049314564Sdim (((imm) & 0x3) == 0) ? 16 : 0, \ 8050314564Sdim (((imm) & 0x3) == 0) ? 17 : 1, \ 8051314564Sdim (((imm) & 0x3) == 0) ? 18 : 2, \ 8052314564Sdim (((imm) & 0x3) == 0) ? 19 : 3, \ 8053314564Sdim (((imm) & 0x3) == 1) ? 16 : 4, \ 8054314564Sdim (((imm) & 0x3) == 1) ? 17 : 5, \ 8055314564Sdim (((imm) & 0x3) == 1) ? 18 : 6, \ 8056314564Sdim (((imm) & 0x3) == 1) ? 19 : 7, \ 8057314564Sdim (((imm) & 0x3) == 2) ? 16 : 8, \ 8058314564Sdim (((imm) & 0x3) == 2) ? 17 : 9, \ 8059314564Sdim (((imm) & 0x3) == 2) ? 18 : 10, \ 8060314564Sdim (((imm) & 0x3) == 2) ? 19 : 11, \ 8061314564Sdim (((imm) & 0x3) == 3) ? 16 : 12, \ 8062314564Sdim (((imm) & 0x3) == 3) ? 17 : 13, \ 8063314564Sdim (((imm) & 0x3) == 3) ? 18 : 14, \ 8064314564Sdim (((imm) & 0x3) == 3) ? 19 : 15); }) 8065309124Sdim 8066309124Sdim#define _mm512_mask_insertf32x4(W, U, A, B, imm) __extension__ ({ \ 8067314564Sdim (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ 8068314564Sdim (__v16sf)_mm512_insertf32x4((A), (B), (imm)), \ 8069314564Sdim (__v16sf)(W)); }) 8070309124Sdim 8071309124Sdim#define _mm512_maskz_insertf32x4(U, A, B, imm) __extension__ ({ \ 8072314564Sdim (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ 8073314564Sdim (__v16sf)_mm512_insertf32x4((A), (B), (imm)), \ 8074314564Sdim (__v16sf)_mm512_setzero_ps()); }) 8075309124Sdim 8076309124Sdim#define _mm512_inserti32x4(A, B, imm) __extension__ ({ \ 8077314564Sdim (__m512i)__builtin_shufflevector((__v16si)(__m512i)(A), \ 8078314564Sdim (__v16si)_mm512_castsi128_si512((__m128i)(B)),\ 8079314564Sdim (((imm) & 0x3) == 0) ? 16 : 0, \ 8080314564Sdim (((imm) & 0x3) == 0) ? 17 : 1, \ 8081314564Sdim (((imm) & 0x3) == 0) ? 18 : 2, \ 8082314564Sdim (((imm) & 0x3) == 0) ? 19 : 3, \ 8083314564Sdim (((imm) & 0x3) == 1) ? 16 : 4, \ 8084314564Sdim (((imm) & 0x3) == 1) ? 17 : 5, \ 8085314564Sdim (((imm) & 0x3) == 1) ? 18 : 6, \ 8086314564Sdim (((imm) & 0x3) == 1) ? 19 : 7, \ 8087314564Sdim (((imm) & 0x3) == 2) ? 16 : 8, \ 8088314564Sdim (((imm) & 0x3) == 2) ? 17 : 9, \ 8089314564Sdim (((imm) & 0x3) == 2) ? 18 : 10, \ 8090314564Sdim (((imm) & 0x3) == 2) ? 19 : 11, \ 8091314564Sdim (((imm) & 0x3) == 3) ? 16 : 12, \ 8092314564Sdim (((imm) & 0x3) == 3) ? 17 : 13, \ 8093314564Sdim (((imm) & 0x3) == 3) ? 18 : 14, \ 8094314564Sdim (((imm) & 0x3) == 3) ? 19 : 15); }) 8095309124Sdim 8096309124Sdim#define _mm512_mask_inserti32x4(W, U, A, B, imm) __extension__ ({ \ 8097314564Sdim (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \ 8098314564Sdim (__v16si)_mm512_inserti32x4((A), (B), (imm)), \ 8099314564Sdim (__v16si)(W)); }) 8100309124Sdim 8101309124Sdim#define _mm512_maskz_inserti32x4(U, A, B, imm) __extension__ ({ \ 8102314564Sdim (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \ 8103314564Sdim (__v16si)_mm512_inserti32x4((A), (B), (imm)), \ 8104314564Sdim (__v16si)_mm512_setzero_si512()); }) 8105309124Sdim 8106309124Sdim#define _mm512_getmant_round_pd(A, B, C, R) __extension__ ({ \ 8107309124Sdim (__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \ 8108309124Sdim (int)(((C)<<2) | (B)), \ 8109309124Sdim (__v8df)_mm512_undefined_pd(), \ 8110309124Sdim (__mmask8)-1, (int)(R)); }) 8111309124Sdim 8112309124Sdim#define _mm512_mask_getmant_round_pd(W, U, A, B, C, R) __extension__ ({ \ 8113309124Sdim (__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \ 8114309124Sdim (int)(((C)<<2) | (B)), \ 8115309124Sdim (__v8df)(__m512d)(W), \ 8116309124Sdim (__mmask8)(U), (int)(R)); }) 8117309124Sdim 8118309124Sdim#define _mm512_maskz_getmant_round_pd(U, A, B, C, R) __extension__ ({ \ 8119309124Sdim (__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \ 8120309124Sdim (int)(((C)<<2) | (B)), \ 8121309124Sdim (__v8df)_mm512_setzero_pd(), \ 8122309124Sdim (__mmask8)(U), (int)(R)); }) 8123309124Sdim 8124309124Sdim#define _mm512_getmant_pd(A, B, C) __extension__ ({ \ 8125309124Sdim (__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \ 8126309124Sdim (int)(((C)<<2) | (B)), \ 8127309124Sdim (__v8df)_mm512_setzero_pd(), \ 8128309124Sdim (__mmask8)-1, \ 8129309124Sdim _MM_FROUND_CUR_DIRECTION); }) 8130309124Sdim 8131309124Sdim#define _mm512_mask_getmant_pd(W, U, A, B, C) __extension__ ({ \ 8132309124Sdim (__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \ 8133309124Sdim (int)(((C)<<2) | (B)), \ 8134309124Sdim (__v8df)(__m512d)(W), \ 8135309124Sdim (__mmask8)(U), \ 8136309124Sdim _MM_FROUND_CUR_DIRECTION); }) 8137309124Sdim 8138309124Sdim#define _mm512_maskz_getmant_pd(U, A, B, C) __extension__ ({ \ 8139309124Sdim (__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \ 8140309124Sdim (int)(((C)<<2) | (B)), \ 8141309124Sdim (__v8df)_mm512_setzero_pd(), \ 8142309124Sdim (__mmask8)(U), \ 8143309124Sdim _MM_FROUND_CUR_DIRECTION); }) 8144309124Sdim 8145309124Sdim#define _mm512_getmant_round_ps(A, B, C, R) __extension__ ({ \ 8146309124Sdim (__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \ 8147309124Sdim (int)(((C)<<2) | (B)), \ 8148309124Sdim (__v16sf)_mm512_undefined_ps(), \ 8149309124Sdim (__mmask16)-1, (int)(R)); }) 8150309124Sdim 8151309124Sdim#define _mm512_mask_getmant_round_ps(W, U, A, B, C, R) __extension__ ({ \ 8152309124Sdim (__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \ 8153309124Sdim (int)(((C)<<2) | (B)), \ 8154309124Sdim (__v16sf)(__m512)(W), \ 8155309124Sdim (__mmask16)(U), (int)(R)); }) 8156309124Sdim 8157309124Sdim#define _mm512_maskz_getmant_round_ps(U, A, B, C, R) __extension__ ({ \ 8158309124Sdim (__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \ 8159309124Sdim (int)(((C)<<2) | (B)), \ 8160309124Sdim (__v16sf)_mm512_setzero_ps(), \ 8161309124Sdim (__mmask16)(U), (int)(R)); }) 8162309124Sdim 8163309124Sdim#define _mm512_getmant_ps(A, B, C) __extension__ ({ \ 8164309124Sdim (__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \ 8165309124Sdim (int)(((C)<<2)|(B)), \ 8166309124Sdim (__v16sf)_mm512_undefined_ps(), \ 8167309124Sdim (__mmask16)-1, \ 8168309124Sdim _MM_FROUND_CUR_DIRECTION); }) 8169309124Sdim 8170309124Sdim#define _mm512_mask_getmant_ps(W, U, A, B, C) __extension__ ({ \ 8171309124Sdim (__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \ 8172309124Sdim (int)(((C)<<2)|(B)), \ 8173309124Sdim (__v16sf)(__m512)(W), \ 8174309124Sdim (__mmask16)(U), \ 8175309124Sdim _MM_FROUND_CUR_DIRECTION); }) 8176309124Sdim 8177309124Sdim#define _mm512_maskz_getmant_ps(U, A, B, C) __extension__ ({ \ 8178309124Sdim (__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \ 8179309124Sdim (int)(((C)<<2)|(B)), \ 8180309124Sdim (__v16sf)_mm512_setzero_ps(), \ 8181309124Sdim (__mmask16)(U), \ 8182309124Sdim _MM_FROUND_CUR_DIRECTION); }) 8183309124Sdim 8184309124Sdim#define _mm512_getexp_round_pd(A, R) __extension__ ({ \ 8185309124Sdim (__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \ 8186309124Sdim (__v8df)_mm512_undefined_pd(), \ 8187309124Sdim (__mmask8)-1, (int)(R)); }) 8188309124Sdim 8189309124Sdim#define _mm512_mask_getexp_round_pd(W, U, A, R) __extension__ ({ \ 8190309124Sdim (__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \ 8191309124Sdim (__v8df)(__m512d)(W), \ 8192309124Sdim (__mmask8)(U), (int)(R)); }) 8193309124Sdim 8194309124Sdim#define _mm512_maskz_getexp_round_pd(U, A, R) __extension__ ({ \ 8195309124Sdim (__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \ 8196309124Sdim (__v8df)_mm512_setzero_pd(), \ 8197309124Sdim (__mmask8)(U), (int)(R)); }) 8198309124Sdim 8199309124Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS 8200309124Sdim_mm512_getexp_pd (__m512d __A) 8201309124Sdim{ 8202309124Sdim return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A, 8203309124Sdim (__v8df) _mm512_undefined_pd (), 8204309124Sdim (__mmask8) -1, 8205309124Sdim _MM_FROUND_CUR_DIRECTION); 8206309124Sdim} 8207309124Sdim 8208309124Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS 8209309124Sdim_mm512_mask_getexp_pd (__m512d __W, __mmask8 __U, __m512d __A) 8210309124Sdim{ 8211309124Sdim return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A, 8212309124Sdim (__v8df) __W, 8213309124Sdim (__mmask8) __U, 8214309124Sdim _MM_FROUND_CUR_DIRECTION); 8215309124Sdim} 8216309124Sdim 8217309124Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS 8218309124Sdim_mm512_maskz_getexp_pd (__mmask8 __U, __m512d __A) 8219309124Sdim{ 8220309124Sdim return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A, 8221309124Sdim (__v8df) _mm512_setzero_pd (), 8222309124Sdim (__mmask8) __U, 8223309124Sdim _MM_FROUND_CUR_DIRECTION); 8224309124Sdim} 8225309124Sdim 8226309124Sdim#define _mm512_getexp_round_ps(A, R) __extension__ ({ \ 8227309124Sdim (__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \ 8228309124Sdim (__v16sf)_mm512_undefined_ps(), \ 8229309124Sdim (__mmask16)-1, (int)(R)); }) 8230309124Sdim 8231309124Sdim#define _mm512_mask_getexp_round_ps(W, U, A, R) __extension__ ({ \ 8232309124Sdim (__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \ 8233309124Sdim (__v16sf)(__m512)(W), \ 8234309124Sdim (__mmask16)(U), (int)(R)); }) 8235309124Sdim 8236309124Sdim#define _mm512_maskz_getexp_round_ps(U, A, R) __extension__ ({ \ 8237309124Sdim (__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \ 8238309124Sdim (__v16sf)_mm512_setzero_ps(), \ 8239309124Sdim (__mmask16)(U), (int)(R)); }) 8240309124Sdim 8241309124Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS 8242309124Sdim_mm512_getexp_ps (__m512 __A) 8243309124Sdim{ 8244309124Sdim return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A, 8245309124Sdim (__v16sf) _mm512_undefined_ps (), 8246309124Sdim (__mmask16) -1, 8247309124Sdim _MM_FROUND_CUR_DIRECTION); 8248309124Sdim} 8249309124Sdim 8250309124Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS 8251309124Sdim_mm512_mask_getexp_ps (__m512 __W, __mmask16 __U, __m512 __A) 8252309124Sdim{ 8253309124Sdim return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A, 8254309124Sdim (__v16sf) __W, 8255309124Sdim (__mmask16) __U, 8256309124Sdim _MM_FROUND_CUR_DIRECTION); 8257309124Sdim} 8258309124Sdim 8259309124Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS 8260309124Sdim_mm512_maskz_getexp_ps (__mmask16 __U, __m512 __A) 8261309124Sdim{ 8262309124Sdim return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A, 8263309124Sdim (__v16sf) _mm512_setzero_ps (), 8264309124Sdim (__mmask16) __U, 8265309124Sdim _MM_FROUND_CUR_DIRECTION); 8266309124Sdim} 8267309124Sdim 8268309124Sdim#define _mm512_i64gather_ps(index, addr, scale) __extension__ ({ \ 8269309124Sdim (__m256)__builtin_ia32_gatherdiv16sf((__v8sf)_mm256_undefined_ps(), \ 8270309124Sdim (float const *)(addr), \ 8271309124Sdim (__v8di)(__m512i)(index), (__mmask8)-1, \ 8272309124Sdim (int)(scale)); }) 8273309124Sdim 8274321369Sdim#define _mm512_mask_i64gather_ps(v1_old, mask, index, addr, scale) __extension__({\ 8275321369Sdim (__m256)__builtin_ia32_gatherdiv16sf((__v8sf)(__m256)(v1_old),\ 8276321369Sdim (float const *)(addr), \ 8277321369Sdim (__v8di)(__m512i)(index), \ 8278321369Sdim (__mmask8)(mask), (int)(scale)); }) 8279309124Sdim 8280309124Sdim#define _mm512_i64gather_epi32(index, addr, scale) __extension__ ({\ 8281309124Sdim (__m256i)__builtin_ia32_gatherdiv16si((__v8si)_mm256_undefined_ps(), \ 8282309124Sdim (int const *)(addr), \ 8283309124Sdim (__v8di)(__m512i)(index), \ 8284309124Sdim (__mmask8)-1, (int)(scale)); }) 8285309124Sdim 8286309124Sdim#define _mm512_mask_i64gather_epi32(v1_old, mask, index, addr, scale) __extension__ ({\ 8287309124Sdim (__m256i)__builtin_ia32_gatherdiv16si((__v8si)(__m256i)(v1_old), \ 8288309124Sdim (int const *)(addr), \ 8289309124Sdim (__v8di)(__m512i)(index), \ 8290309124Sdim (__mmask8)(mask), (int)(scale)); }) 8291309124Sdim 8292309124Sdim#define _mm512_i64gather_pd(index, addr, scale) __extension__ ({\ 8293309124Sdim (__m512d)__builtin_ia32_gatherdiv8df((__v8df)_mm512_undefined_pd(), \ 8294309124Sdim (double const *)(addr), \ 8295309124Sdim (__v8di)(__m512i)(index), (__mmask8)-1, \ 8296309124Sdim (int)(scale)); }) 8297309124Sdim 8298309124Sdim#define _mm512_mask_i64gather_pd(v1_old, mask, index, addr, scale) __extension__ ({\ 8299309124Sdim (__m512d)__builtin_ia32_gatherdiv8df((__v8df)(__m512d)(v1_old), \ 8300309124Sdim (double const *)(addr), \ 8301309124Sdim (__v8di)(__m512i)(index), \ 8302309124Sdim (__mmask8)(mask), (int)(scale)); }) 8303309124Sdim 8304309124Sdim#define _mm512_i64gather_epi64(index, addr, scale) __extension__ ({\ 8305309124Sdim (__m512i)__builtin_ia32_gatherdiv8di((__v8di)_mm512_undefined_pd(), \ 8306309124Sdim (long long const *)(addr), \ 8307309124Sdim (__v8di)(__m512i)(index), (__mmask8)-1, \ 8308309124Sdim (int)(scale)); }) 8309309124Sdim 8310309124Sdim#define _mm512_mask_i64gather_epi64(v1_old, mask, index, addr, scale) __extension__ ({\ 8311309124Sdim (__m512i)__builtin_ia32_gatherdiv8di((__v8di)(__m512i)(v1_old), \ 8312309124Sdim (long long const *)(addr), \ 8313309124Sdim (__v8di)(__m512i)(index), \ 8314309124Sdim (__mmask8)(mask), (int)(scale)); }) 8315309124Sdim 8316309124Sdim#define _mm512_i32gather_ps(index, addr, scale) __extension__ ({\ 8317309124Sdim (__m512)__builtin_ia32_gathersiv16sf((__v16sf)_mm512_undefined_ps(), \ 8318309124Sdim (float const *)(addr), \ 8319309124Sdim (__v16sf)(__m512)(index), \ 8320309124Sdim (__mmask16)-1, (int)(scale)); }) 8321309124Sdim 8322309124Sdim#define _mm512_mask_i32gather_ps(v1_old, mask, index, addr, scale) __extension__ ({\ 8323309124Sdim (__m512)__builtin_ia32_gathersiv16sf((__v16sf)(__m512)(v1_old), \ 8324309124Sdim (float const *)(addr), \ 8325309124Sdim (__v16sf)(__m512)(index), \ 8326309124Sdim (__mmask16)(mask), (int)(scale)); }) 8327309124Sdim 8328309124Sdim#define _mm512_i32gather_epi32(index, addr, scale) __extension__ ({\ 8329309124Sdim (__m512i)__builtin_ia32_gathersiv16si((__v16si)_mm512_undefined_epi32(), \ 8330309124Sdim (int const *)(addr), \ 8331309124Sdim (__v16si)(__m512i)(index), \ 8332309124Sdim (__mmask16)-1, (int)(scale)); }) 8333309124Sdim 8334309124Sdim#define _mm512_mask_i32gather_epi32(v1_old, mask, index, addr, scale) __extension__ ({\ 8335309124Sdim (__m512i)__builtin_ia32_gathersiv16si((__v16si)(__m512i)(v1_old), \ 8336309124Sdim (int const *)(addr), \ 8337309124Sdim (__v16si)(__m512i)(index), \ 8338309124Sdim (__mmask16)(mask), (int)(scale)); }) 8339309124Sdim 8340309124Sdim#define _mm512_i32gather_pd(index, addr, scale) __extension__ ({\ 8341309124Sdim (__m512d)__builtin_ia32_gathersiv8df((__v8df)_mm512_undefined_pd(), \ 8342309124Sdim (double const *)(addr), \ 8343309124Sdim (__v8si)(__m256i)(index), (__mmask8)-1, \ 8344309124Sdim (int)(scale)); }) 8345309124Sdim 8346309124Sdim#define _mm512_mask_i32gather_pd(v1_old, mask, index, addr, scale) __extension__ ({\ 8347309124Sdim (__m512d)__builtin_ia32_gathersiv8df((__v8df)(__m512d)(v1_old), \ 8348309124Sdim (double const *)(addr), \ 8349309124Sdim (__v8si)(__m256i)(index), \ 8350309124Sdim (__mmask8)(mask), (int)(scale)); }) 8351309124Sdim 8352309124Sdim#define _mm512_i32gather_epi64(index, addr, scale) __extension__ ({\ 8353309124Sdim (__m512i)__builtin_ia32_gathersiv8di((__v8di)_mm512_undefined_epi32(), \ 8354309124Sdim (long long const *)(addr), \ 8355309124Sdim (__v8si)(__m256i)(index), (__mmask8)-1, \ 8356309124Sdim (int)(scale)); }) 8357309124Sdim 8358309124Sdim#define _mm512_mask_i32gather_epi64(v1_old, mask, index, addr, scale) __extension__ ({\ 8359309124Sdim (__m512i)__builtin_ia32_gathersiv8di((__v8di)(__m512i)(v1_old), \ 8360309124Sdim (long long const *)(addr), \ 8361309124Sdim (__v8si)(__m256i)(index), \ 8362309124Sdim (__mmask8)(mask), (int)(scale)); }) 8363309124Sdim 8364309124Sdim#define _mm512_i64scatter_ps(addr, index, v1, scale) __extension__ ({\ 8365309124Sdim __builtin_ia32_scatterdiv16sf((float *)(addr), (__mmask8)-1, \ 8366309124Sdim (__v8di)(__m512i)(index), \ 8367309124Sdim (__v8sf)(__m256)(v1), (int)(scale)); }) 8368309124Sdim 8369309124Sdim#define _mm512_mask_i64scatter_ps(addr, mask, index, v1, scale) __extension__ ({\ 8370309124Sdim __builtin_ia32_scatterdiv16sf((float *)(addr), (__mmask8)(mask), \ 8371309124Sdim (__v8di)(__m512i)(index), \ 8372309124Sdim (__v8sf)(__m256)(v1), (int)(scale)); }) 8373309124Sdim 8374309124Sdim#define _mm512_i64scatter_epi32(addr, index, v1, scale) __extension__ ({\ 8375309124Sdim __builtin_ia32_scatterdiv16si((int *)(addr), (__mmask8)-1, \ 8376309124Sdim (__v8di)(__m512i)(index), \ 8377309124Sdim (__v8si)(__m256i)(v1), (int)(scale)); }) 8378309124Sdim 8379309124Sdim#define _mm512_mask_i64scatter_epi32(addr, mask, index, v1, scale) __extension__ ({\ 8380309124Sdim __builtin_ia32_scatterdiv16si((int *)(addr), (__mmask8)(mask), \ 8381309124Sdim (__v8di)(__m512i)(index), \ 8382309124Sdim (__v8si)(__m256i)(v1), (int)(scale)); }) 8383309124Sdim 8384309124Sdim#define _mm512_i64scatter_pd(addr, index, v1, scale) __extension__ ({\ 8385309124Sdim __builtin_ia32_scatterdiv8df((double *)(addr), (__mmask8)-1, \ 8386309124Sdim (__v8di)(__m512i)(index), \ 8387309124Sdim (__v8df)(__m512d)(v1), (int)(scale)); }) 8388309124Sdim 8389309124Sdim#define _mm512_mask_i64scatter_pd(addr, mask, index, v1, scale) __extension__ ({\ 8390309124Sdim __builtin_ia32_scatterdiv8df((double *)(addr), (__mmask8)(mask), \ 8391309124Sdim (__v8di)(__m512i)(index), \ 8392309124Sdim (__v8df)(__m512d)(v1), (int)(scale)); }) 8393309124Sdim 8394309124Sdim#define _mm512_i64scatter_epi64(addr, index, v1, scale) __extension__ ({\ 8395309124Sdim __builtin_ia32_scatterdiv8di((long long *)(addr), (__mmask8)-1, \ 8396309124Sdim (__v8di)(__m512i)(index), \ 8397309124Sdim (__v8di)(__m512i)(v1), (int)(scale)); }) 8398309124Sdim 8399309124Sdim#define _mm512_mask_i64scatter_epi64(addr, mask, index, v1, scale) __extension__ ({\ 8400309124Sdim __builtin_ia32_scatterdiv8di((long long *)(addr), (__mmask8)(mask), \ 8401309124Sdim (__v8di)(__m512i)(index), \ 8402309124Sdim (__v8di)(__m512i)(v1), (int)(scale)); }) 8403309124Sdim 8404309124Sdim#define _mm512_i32scatter_ps(addr, index, v1, scale) __extension__ ({\ 8405309124Sdim __builtin_ia32_scattersiv16sf((float *)(addr), (__mmask16)-1, \ 8406309124Sdim (__v16si)(__m512i)(index), \ 8407309124Sdim (__v16sf)(__m512)(v1), (int)(scale)); }) 8408309124Sdim 8409309124Sdim#define _mm512_mask_i32scatter_ps(addr, mask, index, v1, scale) __extension__ ({\ 8410309124Sdim __builtin_ia32_scattersiv16sf((float *)(addr), (__mmask16)(mask), \ 8411309124Sdim (__v16si)(__m512i)(index), \ 8412309124Sdim (__v16sf)(__m512)(v1), (int)(scale)); }) 8413309124Sdim 8414309124Sdim#define _mm512_i32scatter_epi32(addr, index, v1, scale) __extension__ ({\ 8415309124Sdim __builtin_ia32_scattersiv16si((int *)(addr), (__mmask16)-1, \ 8416309124Sdim (__v16si)(__m512i)(index), \ 8417309124Sdim (__v16si)(__m512i)(v1), (int)(scale)); }) 8418309124Sdim 8419309124Sdim#define _mm512_mask_i32scatter_epi32(addr, mask, index, v1, scale) __extension__ ({\ 8420309124Sdim __builtin_ia32_scattersiv16si((int *)(addr), (__mmask16)(mask), \ 8421309124Sdim (__v16si)(__m512i)(index), \ 8422309124Sdim (__v16si)(__m512i)(v1), (int)(scale)); }) 8423309124Sdim 8424309124Sdim#define _mm512_i32scatter_pd(addr, index, v1, scale) __extension__ ({\ 8425309124Sdim __builtin_ia32_scattersiv8df((double *)(addr), (__mmask8)-1, \ 8426309124Sdim (__v8si)(__m256i)(index), \ 8427309124Sdim (__v8df)(__m512d)(v1), (int)(scale)); }) 8428309124Sdim 8429309124Sdim#define _mm512_mask_i32scatter_pd(addr, mask, index, v1, scale) __extension__ ({\ 8430309124Sdim __builtin_ia32_scattersiv8df((double *)(addr), (__mmask8)(mask), \ 8431309124Sdim (__v8si)(__m256i)(index), \ 8432309124Sdim (__v8df)(__m512d)(v1), (int)(scale)); }) 8433309124Sdim 8434309124Sdim#define _mm512_i32scatter_epi64(addr, index, v1, scale) __extension__ ({\ 8435309124Sdim __builtin_ia32_scattersiv8di((long long *)(addr), (__mmask8)-1, \ 8436309124Sdim (__v8si)(__m256i)(index), \ 8437309124Sdim (__v8di)(__m512i)(v1), (int)(scale)); }) 8438309124Sdim 8439309124Sdim#define _mm512_mask_i32scatter_epi64(addr, mask, index, v1, scale) __extension__ ({\ 8440309124Sdim __builtin_ia32_scattersiv8di((long long *)(addr), (__mmask8)(mask), \ 8441309124Sdim (__v8si)(__m256i)(index), \ 8442309124Sdim (__v8di)(__m512i)(v1), (int)(scale)); }) 8443309124Sdim 8444309124Sdimstatic __inline__ __m128 __DEFAULT_FN_ATTRS 8445309124Sdim_mm_mask_fmadd_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) 8446309124Sdim{ 8447314564Sdim return (__m128) __builtin_ia32_vfmaddss3_mask ((__v4sf) __W, 8448314564Sdim (__v4sf) __A, 8449309124Sdim (__v4sf) __B, 8450309124Sdim (__mmask8) __U, 8451309124Sdim _MM_FROUND_CUR_DIRECTION); 8452309124Sdim} 8453309124Sdim 8454309124Sdim#define _mm_mask_fmadd_round_ss(W, U, A, B, R) __extension__({\ 8455314564Sdim (__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(W), \ 8456314564Sdim (__v4sf)(__m128)(A), \ 8457314564Sdim (__v4sf)(__m128)(B), (__mmask8)(U), \ 8458309124Sdim (int)(R)); }) 8459309124Sdim 8460309124Sdimstatic __inline__ __m128 __DEFAULT_FN_ATTRS 8461309124Sdim_mm_maskz_fmadd_ss (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C) 8462309124Sdim{ 8463309124Sdim return (__m128) __builtin_ia32_vfmaddss3_maskz ((__v4sf) __A, 8464309124Sdim (__v4sf) __B, 8465309124Sdim (__v4sf) __C, 8466309124Sdim (__mmask8) __U, 8467309124Sdim _MM_FROUND_CUR_DIRECTION); 8468309124Sdim} 8469309124Sdim 8470309124Sdim#define _mm_maskz_fmadd_round_ss(U, A, B, C, R) __extension__ ({\ 8471309124Sdim (__m128)__builtin_ia32_vfmaddss3_maskz((__v4sf)(__m128)(A), \ 8472309124Sdim (__v4sf)(__m128)(B), \ 8473309124Sdim (__v4sf)(__m128)(C), (__mmask8)(U), \ 8474309124Sdim _MM_FROUND_CUR_DIRECTION); }) 8475309124Sdim 8476309124Sdimstatic __inline__ __m128 __DEFAULT_FN_ATTRS 8477309124Sdim_mm_mask3_fmadd_ss (__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U) 8478309124Sdim{ 8479309124Sdim return (__m128) __builtin_ia32_vfmaddss3_mask3 ((__v4sf) __W, 8480309124Sdim (__v4sf) __X, 8481309124Sdim (__v4sf) __Y, 8482309124Sdim (__mmask8) __U, 8483309124Sdim _MM_FROUND_CUR_DIRECTION); 8484309124Sdim} 8485309124Sdim 8486309124Sdim#define _mm_mask3_fmadd_round_ss(W, X, Y, U, R) __extension__ ({\ 8487309124Sdim (__m128)__builtin_ia32_vfmaddss3_mask3((__v4sf)(__m128)(W), \ 8488309124Sdim (__v4sf)(__m128)(X), \ 8489309124Sdim (__v4sf)(__m128)(Y), (__mmask8)(U), \ 8490309124Sdim (int)(R)); }) 8491309124Sdim 8492309124Sdimstatic __inline__ __m128 __DEFAULT_FN_ATTRS 8493309124Sdim_mm_mask_fmsub_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) 8494309124Sdim{ 8495314564Sdim return (__m128) __builtin_ia32_vfmaddss3_mask ((__v4sf) __W, 8496314564Sdim (__v4sf) __A, 8497309124Sdim -(__v4sf) __B, 8498309124Sdim (__mmask8) __U, 8499309124Sdim _MM_FROUND_CUR_DIRECTION); 8500309124Sdim} 8501309124Sdim 8502309124Sdim#define _mm_mask_fmsub_round_ss(W, U, A, B, R) __extension__ ({\ 8503314564Sdim (__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(W), \ 8504314564Sdim (__v4sf)(__m128)(A), \ 8505314564Sdim (__v4sf)(__m128)(B), (__mmask8)(U), \ 8506309124Sdim (int)(R)); }) 8507309124Sdim 8508309124Sdimstatic __inline__ __m128 __DEFAULT_FN_ATTRS 8509309124Sdim_mm_maskz_fmsub_ss (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C) 8510309124Sdim{ 8511309124Sdim return (__m128) __builtin_ia32_vfmaddss3_maskz ((__v4sf) __A, 8512309124Sdim (__v4sf) __B, 8513309124Sdim -(__v4sf) __C, 8514309124Sdim (__mmask8) __U, 8515309124Sdim _MM_FROUND_CUR_DIRECTION); 8516309124Sdim} 8517309124Sdim 8518309124Sdim#define _mm_maskz_fmsub_round_ss(U, A, B, C, R) __extension__ ({\ 8519309124Sdim (__m128)__builtin_ia32_vfmaddss3_maskz((__v4sf)(__m128)(A), \ 8520309124Sdim (__v4sf)(__m128)(B), \ 8521309124Sdim -(__v4sf)(__m128)(C), (__mmask8)(U), \ 8522309124Sdim (int)(R)); }) 8523309124Sdim 8524309124Sdimstatic __inline__ __m128 __DEFAULT_FN_ATTRS 8525309124Sdim_mm_mask3_fmsub_ss (__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U) 8526309124Sdim{ 8527314564Sdim return (__m128) __builtin_ia32_vfmsubss3_mask3 ((__v4sf) __W, 8528309124Sdim (__v4sf) __X, 8529314564Sdim (__v4sf) __Y, 8530309124Sdim (__mmask8) __U, 8531309124Sdim _MM_FROUND_CUR_DIRECTION); 8532309124Sdim} 8533309124Sdim 8534309124Sdim#define _mm_mask3_fmsub_round_ss(W, X, Y, U, R) __extension__ ({\ 8535314564Sdim (__m128)__builtin_ia32_vfmsubss3_mask3((__v4sf)(__m128)(W), \ 8536309124Sdim (__v4sf)(__m128)(X), \ 8537314564Sdim (__v4sf)(__m128)(Y), (__mmask8)(U), \ 8538309124Sdim (int)(R)); }) 8539309124Sdim 8540309124Sdimstatic __inline__ __m128 __DEFAULT_FN_ATTRS 8541309124Sdim_mm_mask_fnmadd_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) 8542309124Sdim{ 8543314564Sdim return (__m128) __builtin_ia32_vfmaddss3_mask ((__v4sf) __W, 8544314564Sdim -(__v4sf) __A, 8545309124Sdim (__v4sf) __B, 8546309124Sdim (__mmask8) __U, 8547309124Sdim _MM_FROUND_CUR_DIRECTION); 8548309124Sdim} 8549309124Sdim 8550309124Sdim#define _mm_mask_fnmadd_round_ss(W, U, A, B, R) __extension__ ({\ 8551314564Sdim (__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(W), \ 8552314564Sdim -(__v4sf)(__m128)(A), \ 8553314564Sdim (__v4sf)(__m128)(B), (__mmask8)(U), \ 8554309124Sdim (int)(R)); }) 8555309124Sdim 8556309124Sdimstatic __inline__ __m128 __DEFAULT_FN_ATTRS 8557309124Sdim_mm_maskz_fnmadd_ss (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C) 8558309124Sdim{ 8559309124Sdim return (__m128) __builtin_ia32_vfmaddss3_maskz (-(__v4sf) __A, 8560309124Sdim (__v4sf) __B, 8561309124Sdim (__v4sf) __C, 8562309124Sdim (__mmask8) __U, 8563309124Sdim _MM_FROUND_CUR_DIRECTION); 8564309124Sdim} 8565309124Sdim 8566309124Sdim#define _mm_maskz_fnmadd_round_ss(U, A, B, C, R) __extension__ ({\ 8567309124Sdim (__m128)__builtin_ia32_vfmaddss3_maskz(-(__v4sf)(__m128)(A), \ 8568309124Sdim (__v4sf)(__m128)(B), \ 8569309124Sdim (__v4sf)(__m128)(C), (__mmask8)(U), \ 8570309124Sdim (int)(R)); }) 8571309124Sdim 8572309124Sdimstatic __inline__ __m128 __DEFAULT_FN_ATTRS 8573309124Sdim_mm_mask3_fnmadd_ss (__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U) 8574309124Sdim{ 8575309124Sdim return (__m128) __builtin_ia32_vfmaddss3_mask3 (-(__v4sf) __W, 8576309124Sdim (__v4sf) __X, 8577309124Sdim (__v4sf) __Y, 8578309124Sdim (__mmask8) __U, 8579309124Sdim _MM_FROUND_CUR_DIRECTION); 8580309124Sdim} 8581309124Sdim 8582309124Sdim#define _mm_mask3_fnmadd_round_ss(W, X, Y, U, R) __extension__({\ 8583309124Sdim (__m128)__builtin_ia32_vfmaddss3_mask3(-(__v4sf)(__m128)(W), \ 8584309124Sdim (__v4sf)(__m128)(X), \ 8585309124Sdim (__v4sf)(__m128)(Y), (__mmask8)(U), \ 8586309124Sdim (int)(R)); }) 8587309124Sdim 8588309124Sdimstatic __inline__ __m128 __DEFAULT_FN_ATTRS 8589309124Sdim_mm_mask_fnmsub_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) 8590309124Sdim{ 8591314564Sdim return (__m128) __builtin_ia32_vfmaddss3_mask ((__v4sf) __W, 8592314564Sdim -(__v4sf) __A, 8593309124Sdim -(__v4sf) __B, 8594309124Sdim (__mmask8) __U, 8595309124Sdim _MM_FROUND_CUR_DIRECTION); 8596309124Sdim} 8597309124Sdim 8598309124Sdim#define _mm_mask_fnmsub_round_ss(W, U, A, B, R) __extension__ ({\ 8599314564Sdim (__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(W), \ 8600314564Sdim -(__v4sf)(__m128)(A), \ 8601314564Sdim -(__v4sf)(__m128)(B), (__mmask8)(U), \ 8602309124Sdim (int)(R)); }) 8603309124Sdim 8604309124Sdimstatic __inline__ __m128 __DEFAULT_FN_ATTRS 8605309124Sdim_mm_maskz_fnmsub_ss (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C) 8606309124Sdim{ 8607309124Sdim return (__m128) __builtin_ia32_vfmaddss3_maskz (-(__v4sf) __A, 8608309124Sdim (__v4sf) __B, 8609309124Sdim -(__v4sf) __C, 8610309124Sdim (__mmask8) __U, 8611309124Sdim _MM_FROUND_CUR_DIRECTION); 8612309124Sdim} 8613309124Sdim 8614309124Sdim#define _mm_maskz_fnmsub_round_ss(U, A, B, C, R) __extension__ ({\ 8615309124Sdim (__m128)__builtin_ia32_vfmaddss3_maskz(-(__v4sf)(__m128)(A), \ 8616309124Sdim (__v4sf)(__m128)(B), \ 8617309124Sdim -(__v4sf)(__m128)(C), (__mmask8)(U), \ 8618309124Sdim _MM_FROUND_CUR_DIRECTION); }) 8619309124Sdim 8620309124Sdimstatic __inline__ __m128 __DEFAULT_FN_ATTRS 8621309124Sdim_mm_mask3_fnmsub_ss (__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U) 8622309124Sdim{ 8623314564Sdim return (__m128) __builtin_ia32_vfnmsubss3_mask3 ((__v4sf) __W, 8624309124Sdim (__v4sf) __X, 8625314564Sdim (__v4sf) __Y, 8626309124Sdim (__mmask8) __U, 8627309124Sdim _MM_FROUND_CUR_DIRECTION); 8628309124Sdim} 8629309124Sdim 8630309124Sdim#define _mm_mask3_fnmsub_round_ss(W, X, Y, U, R) __extension__({\ 8631314564Sdim (__m128)__builtin_ia32_vfnmsubss3_mask3((__v4sf)(__m128)(W), \ 8632309124Sdim (__v4sf)(__m128)(X), \ 8633314564Sdim (__v4sf)(__m128)(Y), (__mmask8)(U), \ 8634309124Sdim (int)(R)); }) 8635309124Sdim 8636309124Sdimstatic __inline__ __m128d __DEFAULT_FN_ATTRS 8637309124Sdim_mm_mask_fmadd_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) 8638309124Sdim{ 8639314564Sdim return (__m128d) __builtin_ia32_vfmaddsd3_mask ( (__v2df) __W, 8640314564Sdim (__v2df) __A, 8641309124Sdim (__v2df) __B, 8642309124Sdim (__mmask8) __U, 8643309124Sdim _MM_FROUND_CUR_DIRECTION); 8644309124Sdim} 8645309124Sdim 8646309124Sdim#define _mm_mask_fmadd_round_sd(W, U, A, B, R) __extension__({\ 8647314564Sdim (__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(W), \ 8648314564Sdim (__v2df)(__m128d)(A), \ 8649314564Sdim (__v2df)(__m128d)(B), (__mmask8)(U), \ 8650309124Sdim (int)(R)); }) 8651309124Sdim 8652309124Sdimstatic __inline__ __m128d __DEFAULT_FN_ATTRS 8653309124Sdim_mm_maskz_fmadd_sd (__mmask8 __U, __m128d __A, __m128d __B, __m128d __C) 8654309124Sdim{ 8655309124Sdim return (__m128d) __builtin_ia32_vfmaddsd3_maskz ( (__v2df) __A, 8656309124Sdim (__v2df) __B, 8657309124Sdim (__v2df) __C, 8658309124Sdim (__mmask8) __U, 8659309124Sdim _MM_FROUND_CUR_DIRECTION); 8660309124Sdim} 8661309124Sdim 8662309124Sdim#define _mm_maskz_fmadd_round_sd(U, A, B, C, R) __extension__ ({\ 8663309124Sdim (__m128d)__builtin_ia32_vfmaddsd3_maskz((__v2df)(__m128d)(A), \ 8664309124Sdim (__v2df)(__m128d)(B), \ 8665309124Sdim (__v2df)(__m128d)(C), (__mmask8)(U), \ 8666309124Sdim _MM_FROUND_CUR_DIRECTION); }) 8667309124Sdim 8668309124Sdimstatic __inline__ __m128d __DEFAULT_FN_ATTRS 8669309124Sdim_mm_mask3_fmadd_sd (__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U) 8670309124Sdim{ 8671309124Sdim return (__m128d) __builtin_ia32_vfmaddsd3_mask3 ((__v2df) __W, 8672309124Sdim (__v2df) __X, 8673309124Sdim (__v2df) __Y, 8674309124Sdim (__mmask8) __U, 8675309124Sdim _MM_FROUND_CUR_DIRECTION); 8676309124Sdim} 8677309124Sdim 8678309124Sdim#define _mm_mask3_fmadd_round_sd(W, X, Y, U, R) __extension__ ({\ 8679309124Sdim (__m128d)__builtin_ia32_vfmaddsd3_mask3((__v2df)(__m128d)(W), \ 8680309124Sdim (__v2df)(__m128d)(X), \ 8681309124Sdim (__v2df)(__m128d)(Y), (__mmask8)(U), \ 8682309124Sdim (int)(R)); }) 8683309124Sdim 8684309124Sdimstatic __inline__ __m128d __DEFAULT_FN_ATTRS 8685309124Sdim_mm_mask_fmsub_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) 8686309124Sdim{ 8687314564Sdim return (__m128d) __builtin_ia32_vfmaddsd3_mask ( (__v2df) __W, 8688314564Sdim (__v2df) __A, 8689309124Sdim -(__v2df) __B, 8690309124Sdim (__mmask8) __U, 8691309124Sdim _MM_FROUND_CUR_DIRECTION); 8692309124Sdim} 8693309124Sdim 8694309124Sdim#define _mm_mask_fmsub_round_sd(W, U, A, B, R) __extension__ ({\ 8695314564Sdim (__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(W), \ 8696314564Sdim (__v2df)(__m128d)(A), \ 8697314564Sdim -(__v2df)(__m128d)(B), (__mmask8)(U), \ 8698309124Sdim (int)(R)); }) 8699309124Sdim 8700309124Sdimstatic __inline__ __m128d __DEFAULT_FN_ATTRS 8701309124Sdim_mm_maskz_fmsub_sd (__mmask8 __U, __m128d __A, __m128d __B, __m128d __C) 8702309124Sdim{ 8703309124Sdim return (__m128d) __builtin_ia32_vfmaddsd3_maskz ( (__v2df) __A, 8704309124Sdim (__v2df) __B, 8705309124Sdim -(__v2df) __C, 8706309124Sdim (__mmask8) __U, 8707309124Sdim _MM_FROUND_CUR_DIRECTION); 8708309124Sdim} 8709309124Sdim 8710309124Sdim#define _mm_maskz_fmsub_round_sd(U, A, B, C, R) __extension__ ({\ 8711309124Sdim (__m128d)__builtin_ia32_vfmaddsd3_maskz((__v2df)(__m128d)(A), \ 8712309124Sdim (__v2df)(__m128d)(B), \ 8713309124Sdim -(__v2df)(__m128d)(C), \ 8714309124Sdim (__mmask8)(U), (int)(R)); }) 8715309124Sdim 8716309124Sdimstatic __inline__ __m128d __DEFAULT_FN_ATTRS 8717309124Sdim_mm_mask3_fmsub_sd (__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U) 8718309124Sdim{ 8719314564Sdim return (__m128d) __builtin_ia32_vfmsubsd3_mask3 ((__v2df) __W, 8720309124Sdim (__v2df) __X, 8721314564Sdim (__v2df) __Y, 8722309124Sdim (__mmask8) __U, 8723309124Sdim _MM_FROUND_CUR_DIRECTION); 8724309124Sdim} 8725309124Sdim 8726309124Sdim#define _mm_mask3_fmsub_round_sd(W, X, Y, U, R) __extension__ ({\ 8727314564Sdim (__m128d)__builtin_ia32_vfmsubsd3_mask3((__v2df)(__m128d)(W), \ 8728309124Sdim (__v2df)(__m128d)(X), \ 8729314564Sdim (__v2df)(__m128d)(Y), \ 8730309124Sdim (__mmask8)(U), (int)(R)); }) 8731309124Sdim 8732309124Sdimstatic __inline__ __m128d __DEFAULT_FN_ATTRS 8733309124Sdim_mm_mask_fnmadd_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) 8734309124Sdim{ 8735314564Sdim return (__m128d) __builtin_ia32_vfmaddsd3_mask ( (__v2df) __W, 8736314564Sdim -(__v2df) __A, 8737309124Sdim (__v2df) __B, 8738309124Sdim (__mmask8) __U, 8739309124Sdim _MM_FROUND_CUR_DIRECTION); 8740309124Sdim} 8741309124Sdim 8742309124Sdim#define _mm_mask_fnmadd_round_sd(W, U, A, B, R) __extension__ ({\ 8743314564Sdim (__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(W), \ 8744314564Sdim -(__v2df)(__m128d)(A), \ 8745314564Sdim (__v2df)(__m128d)(B), (__mmask8)(U), \ 8746309124Sdim (int)(R)); }) 8747309124Sdim 8748309124Sdimstatic __inline__ __m128d __DEFAULT_FN_ATTRS 8749309124Sdim_mm_maskz_fnmadd_sd (__mmask8 __U, __m128d __A, __m128d __B, __m128d __C) 8750309124Sdim{ 8751309124Sdim return (__m128d) __builtin_ia32_vfmaddsd3_maskz ( -(__v2df) __A, 8752309124Sdim (__v2df) __B, 8753309124Sdim (__v2df) __C, 8754309124Sdim (__mmask8) __U, 8755309124Sdim _MM_FROUND_CUR_DIRECTION); 8756309124Sdim} 8757309124Sdim 8758309124Sdim#define _mm_maskz_fnmadd_round_sd(U, A, B, C, R) __extension__ ({\ 8759309124Sdim (__m128d)__builtin_ia32_vfmaddsd3_maskz(-(__v2df)(__m128d)(A), \ 8760309124Sdim (__v2df)(__m128d)(B), \ 8761309124Sdim (__v2df)(__m128d)(C), (__mmask8)(U), \ 8762309124Sdim (int)(R)); }) 8763309124Sdim 8764309124Sdimstatic __inline__ __m128d __DEFAULT_FN_ATTRS 8765309124Sdim_mm_mask3_fnmadd_sd (__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U) 8766309124Sdim{ 8767309124Sdim return (__m128d) __builtin_ia32_vfmaddsd3_mask3 (-(__v2df) __W, 8768309124Sdim (__v2df) __X, 8769309124Sdim (__v2df) __Y, 8770309124Sdim (__mmask8) __U, 8771309124Sdim _MM_FROUND_CUR_DIRECTION); 8772309124Sdim} 8773309124Sdim 8774309124Sdim#define _mm_mask3_fnmadd_round_sd(W, X, Y, U, R) __extension__({\ 8775309124Sdim (__m128d)__builtin_ia32_vfmaddsd3_mask3(-(__v2df)(__m128d)(W), \ 8776309124Sdim (__v2df)(__m128d)(X), \ 8777309124Sdim (__v2df)(__m128d)(Y), (__mmask8)(U), \ 8778309124Sdim (int)(R)); }) 8779309124Sdim 8780309124Sdimstatic __inline__ __m128d __DEFAULT_FN_ATTRS 8781309124Sdim_mm_mask_fnmsub_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) 8782309124Sdim{ 8783314564Sdim return (__m128d) __builtin_ia32_vfmaddsd3_mask ( (__v2df) __W, 8784314564Sdim -(__v2df) __A, 8785309124Sdim -(__v2df) __B, 8786309124Sdim (__mmask8) __U, 8787309124Sdim _MM_FROUND_CUR_DIRECTION); 8788309124Sdim} 8789309124Sdim 8790309124Sdim#define _mm_mask_fnmsub_round_sd(W, U, A, B, R) __extension__ ({\ 8791314564Sdim (__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(W), \ 8792314564Sdim -(__v2df)(__m128d)(A), \ 8793314564Sdim -(__v2df)(__m128d)(B), (__mmask8)(U), \ 8794309124Sdim (int)(R)); }) 8795309124Sdim 8796309124Sdimstatic __inline__ __m128d __DEFAULT_FN_ATTRS 8797309124Sdim_mm_maskz_fnmsub_sd (__mmask8 __U, __m128d __A, __m128d __B, __m128d __C) 8798309124Sdim{ 8799309124Sdim return (__m128d) __builtin_ia32_vfmaddsd3_maskz ( -(__v2df) __A, 8800309124Sdim (__v2df) __B, 8801309124Sdim -(__v2df) __C, 8802309124Sdim (__mmask8) __U, 8803309124Sdim _MM_FROUND_CUR_DIRECTION); 8804309124Sdim} 8805309124Sdim 8806309124Sdim#define _mm_maskz_fnmsub_round_sd(U, A, B, C, R) __extension__ ({\ 8807309124Sdim (__m128d)__builtin_ia32_vfmaddsd3_maskz(-(__v2df)(__m128d)(A), \ 8808309124Sdim (__v2df)(__m128d)(B), \ 8809309124Sdim -(__v2df)(__m128d)(C), \ 8810309124Sdim (__mmask8)(U), \ 8811309124Sdim _MM_FROUND_CUR_DIRECTION); }) 8812309124Sdim 8813309124Sdimstatic __inline__ __m128d __DEFAULT_FN_ATTRS 8814309124Sdim_mm_mask3_fnmsub_sd (__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U) 8815309124Sdim{ 8816314564Sdim return (__m128d) __builtin_ia32_vfnmsubsd3_mask3 ((__v2df) (__W), 8817309124Sdim (__v2df) __X, 8818314564Sdim (__v2df) (__Y), 8819309124Sdim (__mmask8) __U, 8820309124Sdim _MM_FROUND_CUR_DIRECTION); 8821309124Sdim} 8822309124Sdim 8823309124Sdim#define _mm_mask3_fnmsub_round_sd(W, X, Y, U, R) __extension__({\ 8824314564Sdim (__m128d)__builtin_ia32_vfnmsubsd3_mask3((__v2df)(__m128d)(W), \ 8825309124Sdim (__v2df)(__m128d)(X), \ 8826314564Sdim (__v2df)(__m128d)(Y), \ 8827309124Sdim (__mmask8)(U), (int)(R)); }) 8828309124Sdim 8829309124Sdim#define _mm512_permutex_pd(X, C) __extension__ ({ \ 8830309124Sdim (__m512d)__builtin_shufflevector((__v8df)(__m512d)(X), \ 8831309124Sdim (__v8df)_mm512_undefined_pd(), \ 8832309124Sdim 0 + (((C) >> 0) & 0x3), \ 8833309124Sdim 0 + (((C) >> 2) & 0x3), \ 8834309124Sdim 0 + (((C) >> 4) & 0x3), \ 8835309124Sdim 0 + (((C) >> 6) & 0x3), \ 8836309124Sdim 4 + (((C) >> 0) & 0x3), \ 8837309124Sdim 4 + (((C) >> 2) & 0x3), \ 8838309124Sdim 4 + (((C) >> 4) & 0x3), \ 8839309124Sdim 4 + (((C) >> 6) & 0x3)); }) 8840309124Sdim 8841309124Sdim#define _mm512_mask_permutex_pd(W, U, X, C) __extension__ ({ \ 8842309124Sdim (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ 8843309124Sdim (__v8df)_mm512_permutex_pd((X), (C)), \ 8844309124Sdim (__v8df)(__m512d)(W)); }) 8845309124Sdim 8846309124Sdim#define _mm512_maskz_permutex_pd(U, X, C) __extension__ ({ \ 8847309124Sdim (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ 8848309124Sdim (__v8df)_mm512_permutex_pd((X), (C)), \ 8849309124Sdim (__v8df)_mm512_setzero_pd()); }) 8850309124Sdim 8851309124Sdim#define _mm512_permutex_epi64(X, C) __extension__ ({ \ 8852309124Sdim (__m512i)__builtin_shufflevector((__v8di)(__m512i)(X), \ 8853309124Sdim (__v8di)_mm512_undefined_epi32(), \ 8854309124Sdim 0 + (((C) >> 0) & 0x3), \ 8855309124Sdim 0 + (((C) >> 2) & 0x3), \ 8856309124Sdim 0 + (((C) >> 4) & 0x3), \ 8857309124Sdim 0 + (((C) >> 6) & 0x3), \ 8858309124Sdim 4 + (((C) >> 0) & 0x3), \ 8859309124Sdim 4 + (((C) >> 2) & 0x3), \ 8860309124Sdim 4 + (((C) >> 4) & 0x3), \ 8861309124Sdim 4 + (((C) >> 6) & 0x3)); }) 8862309124Sdim 8863309124Sdim#define _mm512_mask_permutex_epi64(W, U, X, C) __extension__ ({ \ 8864309124Sdim (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \ 8865309124Sdim (__v8di)_mm512_permutex_epi64((X), (C)), \ 8866309124Sdim (__v8di)(__m512i)(W)); }) 8867309124Sdim 8868309124Sdim#define _mm512_maskz_permutex_epi64(U, X, C) __extension__ ({ \ 8869309124Sdim (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \ 8870309124Sdim (__v8di)_mm512_permutex_epi64((X), (C)), \ 8871309124Sdim (__v8di)_mm512_setzero_si512()); }) 8872309124Sdim 8873309124Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS 8874309124Sdim_mm512_permutexvar_pd (__m512i __X, __m512d __Y) 8875309124Sdim{ 8876309124Sdim return (__m512d) __builtin_ia32_permvardf512_mask ((__v8df) __Y, 8877309124Sdim (__v8di) __X, 8878309124Sdim (__v8df) _mm512_undefined_pd (), 8879309124Sdim (__mmask8) -1); 8880309124Sdim} 8881309124Sdim 8882309124Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS 8883309124Sdim_mm512_mask_permutexvar_pd (__m512d __W, __mmask8 __U, __m512i __X, __m512d __Y) 8884309124Sdim{ 8885309124Sdim return (__m512d) __builtin_ia32_permvardf512_mask ((__v8df) __Y, 8886309124Sdim (__v8di) __X, 8887309124Sdim (__v8df) __W, 8888309124Sdim (__mmask8) __U); 8889309124Sdim} 8890309124Sdim 8891309124Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS 8892309124Sdim_mm512_maskz_permutexvar_pd (__mmask8 __U, __m512i __X, __m512d __Y) 8893309124Sdim{ 8894309124Sdim return (__m512d) __builtin_ia32_permvardf512_mask ((__v8df) __Y, 8895309124Sdim (__v8di) __X, 8896309124Sdim (__v8df) _mm512_setzero_pd (), 8897309124Sdim (__mmask8) __U); 8898309124Sdim} 8899309124Sdim 8900309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 8901309124Sdim_mm512_maskz_permutexvar_epi64 (__mmask8 __M, __m512i __X, __m512i __Y) 8902309124Sdim{ 8903309124Sdim return (__m512i) __builtin_ia32_permvardi512_mask ((__v8di) __Y, 8904309124Sdim (__v8di) __X, 8905309124Sdim (__v8di) _mm512_setzero_si512 (), 8906309124Sdim __M); 8907309124Sdim} 8908309124Sdim 8909309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 8910309124Sdim_mm512_permutexvar_epi64 (__m512i __X, __m512i __Y) 8911309124Sdim{ 8912309124Sdim return (__m512i) __builtin_ia32_permvardi512_mask ((__v8di) __Y, 8913309124Sdim (__v8di) __X, 8914309124Sdim (__v8di) _mm512_undefined_epi32 (), 8915309124Sdim (__mmask8) -1); 8916309124Sdim} 8917309124Sdim 8918309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 8919309124Sdim_mm512_mask_permutexvar_epi64 (__m512i __W, __mmask8 __M, __m512i __X, 8920309124Sdim __m512i __Y) 8921309124Sdim{ 8922309124Sdim return (__m512i) __builtin_ia32_permvardi512_mask ((__v8di) __Y, 8923309124Sdim (__v8di) __X, 8924309124Sdim (__v8di) __W, 8925309124Sdim __M); 8926309124Sdim} 8927309124Sdim 8928309124Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS 8929309124Sdim_mm512_permutexvar_ps (__m512i __X, __m512 __Y) 8930309124Sdim{ 8931309124Sdim return (__m512) __builtin_ia32_permvarsf512_mask ((__v16sf) __Y, 8932309124Sdim (__v16si) __X, 8933309124Sdim (__v16sf) _mm512_undefined_ps (), 8934309124Sdim (__mmask16) -1); 8935309124Sdim} 8936309124Sdim 8937309124Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS 8938309124Sdim_mm512_mask_permutexvar_ps (__m512 __W, __mmask16 __U, __m512i __X, __m512 __Y) 8939309124Sdim{ 8940309124Sdim return (__m512) __builtin_ia32_permvarsf512_mask ((__v16sf) __Y, 8941309124Sdim (__v16si) __X, 8942309124Sdim (__v16sf) __W, 8943309124Sdim (__mmask16) __U); 8944309124Sdim} 8945309124Sdim 8946309124Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS 8947309124Sdim_mm512_maskz_permutexvar_ps (__mmask16 __U, __m512i __X, __m512 __Y) 8948309124Sdim{ 8949309124Sdim return (__m512) __builtin_ia32_permvarsf512_mask ((__v16sf) __Y, 8950309124Sdim (__v16si) __X, 8951309124Sdim (__v16sf) _mm512_setzero_ps (), 8952309124Sdim (__mmask16) __U); 8953309124Sdim} 8954309124Sdim 8955309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 8956309124Sdim_mm512_maskz_permutexvar_epi32 (__mmask16 __M, __m512i __X, __m512i __Y) 8957309124Sdim{ 8958309124Sdim return (__m512i) __builtin_ia32_permvarsi512_mask ((__v16si) __Y, 8959309124Sdim (__v16si) __X, 8960309124Sdim (__v16si) _mm512_setzero_si512 (), 8961309124Sdim __M); 8962309124Sdim} 8963309124Sdim 8964309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 8965309124Sdim_mm512_permutexvar_epi32 (__m512i __X, __m512i __Y) 8966309124Sdim{ 8967309124Sdim return (__m512i) __builtin_ia32_permvarsi512_mask ((__v16si) __Y, 8968309124Sdim (__v16si) __X, 8969309124Sdim (__v16si) _mm512_undefined_epi32 (), 8970309124Sdim (__mmask16) -1); 8971309124Sdim} 8972309124Sdim 8973321369Sdim#define _mm512_permutevar_epi32 _mm512_permutexvar_epi32 8974321369Sdim 8975309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 8976309124Sdim_mm512_mask_permutexvar_epi32 (__m512i __W, __mmask16 __M, __m512i __X, 8977309124Sdim __m512i __Y) 8978309124Sdim{ 8979309124Sdim return (__m512i) __builtin_ia32_permvarsi512_mask ((__v16si) __Y, 8980309124Sdim (__v16si) __X, 8981309124Sdim (__v16si) __W, 8982309124Sdim __M); 8983309124Sdim} 8984309124Sdim 8985321369Sdim#define _mm512_mask_permutevar_epi32 _mm512_mask_permutexvar_epi32 8986321369Sdim 8987309124Sdimstatic __inline__ __mmask16 __DEFAULT_FN_ATTRS 8988309124Sdim_mm512_kand (__mmask16 __A, __mmask16 __B) 8989309124Sdim{ 8990309124Sdim return (__mmask16) __builtin_ia32_kandhi ((__mmask16) __A, (__mmask16) __B); 8991309124Sdim} 8992309124Sdim 8993309124Sdimstatic __inline__ __mmask16 __DEFAULT_FN_ATTRS 8994309124Sdim_mm512_kandn (__mmask16 __A, __mmask16 __B) 8995309124Sdim{ 8996309124Sdim return (__mmask16) __builtin_ia32_kandnhi ((__mmask16) __A, (__mmask16) __B); 8997309124Sdim} 8998309124Sdim 8999309124Sdimstatic __inline__ __mmask16 __DEFAULT_FN_ATTRS 9000309124Sdim_mm512_kor (__mmask16 __A, __mmask16 __B) 9001309124Sdim{ 9002309124Sdim return (__mmask16) __builtin_ia32_korhi ((__mmask16) __A, (__mmask16) __B); 9003309124Sdim} 9004309124Sdim 9005309124Sdimstatic __inline__ int __DEFAULT_FN_ATTRS 9006309124Sdim_mm512_kortestc (__mmask16 __A, __mmask16 __B) 9007309124Sdim{ 9008309124Sdim return __builtin_ia32_kortestchi ((__mmask16) __A, (__mmask16) __B); 9009309124Sdim} 9010309124Sdim 9011309124Sdimstatic __inline__ int __DEFAULT_FN_ATTRS 9012309124Sdim_mm512_kortestz (__mmask16 __A, __mmask16 __B) 9013309124Sdim{ 9014309124Sdim return __builtin_ia32_kortestzhi ((__mmask16) __A, (__mmask16) __B); 9015309124Sdim} 9016309124Sdim 9017309124Sdimstatic __inline__ __mmask16 __DEFAULT_FN_ATTRS 9018309124Sdim_mm512_kunpackb (__mmask16 __A, __mmask16 __B) 9019309124Sdim{ 9020309124Sdim return (__mmask16) __builtin_ia32_kunpckhi ((__mmask16) __A, (__mmask16) __B); 9021309124Sdim} 9022309124Sdim 9023309124Sdimstatic __inline__ __mmask16 __DEFAULT_FN_ATTRS 9024309124Sdim_mm512_kxnor (__mmask16 __A, __mmask16 __B) 9025309124Sdim{ 9026309124Sdim return (__mmask16) __builtin_ia32_kxnorhi ((__mmask16) __A, (__mmask16) __B); 9027309124Sdim} 9028309124Sdim 9029309124Sdimstatic __inline__ __mmask16 __DEFAULT_FN_ATTRS 9030309124Sdim_mm512_kxor (__mmask16 __A, __mmask16 __B) 9031309124Sdim{ 9032309124Sdim return (__mmask16) __builtin_ia32_kxorhi ((__mmask16) __A, (__mmask16) __B); 9033309124Sdim} 9034309124Sdim 9035309124Sdimstatic __inline__ void __DEFAULT_FN_ATTRS 9036309124Sdim_mm512_stream_si512 (__m512i * __P, __m512i __A) 9037309124Sdim{ 9038309124Sdim __builtin_nontemporal_store((__v8di)__A, (__v8di*)__P); 9039309124Sdim} 9040309124Sdim 9041309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 9042309124Sdim_mm512_stream_load_si512 (void *__P) 9043309124Sdim{ 9044321369Sdim return (__m512i) __builtin_nontemporal_load((const __v8di *)__P); 9045309124Sdim} 9046309124Sdim 9047309124Sdimstatic __inline__ void __DEFAULT_FN_ATTRS 9048309124Sdim_mm512_stream_pd (double *__P, __m512d __A) 9049309124Sdim{ 9050309124Sdim __builtin_nontemporal_store((__v8df)__A, (__v8df*)__P); 9051309124Sdim} 9052309124Sdim 9053309124Sdimstatic __inline__ void __DEFAULT_FN_ATTRS 9054309124Sdim_mm512_stream_ps (float *__P, __m512 __A) 9055309124Sdim{ 9056309124Sdim __builtin_nontemporal_store((__v16sf)__A, (__v16sf*)__P); 9057309124Sdim} 9058309124Sdim 9059309124Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS 9060309124Sdim_mm512_mask_compress_pd (__m512d __W, __mmask8 __U, __m512d __A) 9061309124Sdim{ 9062309124Sdim return (__m512d) __builtin_ia32_compressdf512_mask ((__v8df) __A, 9063309124Sdim (__v8df) __W, 9064309124Sdim (__mmask8) __U); 9065309124Sdim} 9066309124Sdim 9067309124Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS 9068309124Sdim_mm512_maskz_compress_pd (__mmask8 __U, __m512d __A) 9069309124Sdim{ 9070309124Sdim return (__m512d) __builtin_ia32_compressdf512_mask ((__v8df) __A, 9071309124Sdim (__v8df) 9072309124Sdim _mm512_setzero_pd (), 9073309124Sdim (__mmask8) __U); 9074309124Sdim} 9075309124Sdim 9076309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 9077309124Sdim_mm512_mask_compress_epi64 (__m512i __W, __mmask8 __U, __m512i __A) 9078309124Sdim{ 9079309124Sdim return (__m512i) __builtin_ia32_compressdi512_mask ((__v8di) __A, 9080309124Sdim (__v8di) __W, 9081309124Sdim (__mmask8) __U); 9082309124Sdim} 9083309124Sdim 9084309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 9085309124Sdim_mm512_maskz_compress_epi64 (__mmask8 __U, __m512i __A) 9086309124Sdim{ 9087309124Sdim return (__m512i) __builtin_ia32_compressdi512_mask ((__v8di) __A, 9088309124Sdim (__v8di) 9089309124Sdim _mm512_setzero_si512 (), 9090309124Sdim (__mmask8) __U); 9091309124Sdim} 9092309124Sdim 9093309124Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS 9094309124Sdim_mm512_mask_compress_ps (__m512 __W, __mmask16 __U, __m512 __A) 9095309124Sdim{ 9096309124Sdim return (__m512) __builtin_ia32_compresssf512_mask ((__v16sf) __A, 9097309124Sdim (__v16sf) __W, 9098309124Sdim (__mmask16) __U); 9099309124Sdim} 9100309124Sdim 9101309124Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS 9102309124Sdim_mm512_maskz_compress_ps (__mmask16 __U, __m512 __A) 9103309124Sdim{ 9104309124Sdim return (__m512) __builtin_ia32_compresssf512_mask ((__v16sf) __A, 9105309124Sdim (__v16sf) 9106309124Sdim _mm512_setzero_ps (), 9107309124Sdim (__mmask16) __U); 9108309124Sdim} 9109309124Sdim 9110309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 9111309124Sdim_mm512_mask_compress_epi32 (__m512i __W, __mmask16 __U, __m512i __A) 9112309124Sdim{ 9113309124Sdim return (__m512i) __builtin_ia32_compresssi512_mask ((__v16si) __A, 9114309124Sdim (__v16si) __W, 9115309124Sdim (__mmask16) __U); 9116309124Sdim} 9117309124Sdim 9118309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 9119309124Sdim_mm512_maskz_compress_epi32 (__mmask16 __U, __m512i __A) 9120309124Sdim{ 9121309124Sdim return (__m512i) __builtin_ia32_compresssi512_mask ((__v16si) __A, 9122309124Sdim (__v16si) 9123309124Sdim _mm512_setzero_si512 (), 9124309124Sdim (__mmask16) __U); 9125309124Sdim} 9126309124Sdim 9127309124Sdim#define _mm_cmp_round_ss_mask(X, Y, P, R) __extension__ ({ \ 9128309124Sdim (__mmask8)__builtin_ia32_cmpss_mask((__v4sf)(__m128)(X), \ 9129309124Sdim (__v4sf)(__m128)(Y), (int)(P), \ 9130309124Sdim (__mmask8)-1, (int)(R)); }) 9131309124Sdim 9132309124Sdim#define _mm_mask_cmp_round_ss_mask(M, X, Y, P, R) __extension__ ({ \ 9133309124Sdim (__mmask8)__builtin_ia32_cmpss_mask((__v4sf)(__m128)(X), \ 9134309124Sdim (__v4sf)(__m128)(Y), (int)(P), \ 9135309124Sdim (__mmask8)(M), (int)(R)); }) 9136309124Sdim 9137309124Sdim#define _mm_cmp_ss_mask(X, Y, P) __extension__ ({ \ 9138309124Sdim (__mmask8)__builtin_ia32_cmpss_mask((__v4sf)(__m128)(X), \ 9139309124Sdim (__v4sf)(__m128)(Y), (int)(P), \ 9140309124Sdim (__mmask8)-1, \ 9141309124Sdim _MM_FROUND_CUR_DIRECTION); }) 9142309124Sdim 9143309124Sdim#define _mm_mask_cmp_ss_mask(M, X, Y, P) __extension__ ({ \ 9144309124Sdim (__mmask8)__builtin_ia32_cmpss_mask((__v4sf)(__m128)(X), \ 9145309124Sdim (__v4sf)(__m128)(Y), (int)(P), \ 9146309124Sdim (__mmask8)(M), \ 9147309124Sdim _MM_FROUND_CUR_DIRECTION); }) 9148309124Sdim 9149309124Sdim#define _mm_cmp_round_sd_mask(X, Y, P, R) __extension__ ({ \ 9150309124Sdim (__mmask8)__builtin_ia32_cmpsd_mask((__v2df)(__m128d)(X), \ 9151309124Sdim (__v2df)(__m128d)(Y), (int)(P), \ 9152309124Sdim (__mmask8)-1, (int)(R)); }) 9153309124Sdim 9154309124Sdim#define _mm_mask_cmp_round_sd_mask(M, X, Y, P, R) __extension__ ({ \ 9155309124Sdim (__mmask8)__builtin_ia32_cmpsd_mask((__v2df)(__m128d)(X), \ 9156309124Sdim (__v2df)(__m128d)(Y), (int)(P), \ 9157309124Sdim (__mmask8)(M), (int)(R)); }) 9158309124Sdim 9159309124Sdim#define _mm_cmp_sd_mask(X, Y, P) __extension__ ({ \ 9160309124Sdim (__mmask8)__builtin_ia32_cmpsd_mask((__v2df)(__m128d)(X), \ 9161309124Sdim (__v2df)(__m128d)(Y), (int)(P), \ 9162309124Sdim (__mmask8)-1, \ 9163309124Sdim _MM_FROUND_CUR_DIRECTION); }) 9164309124Sdim 9165309124Sdim#define _mm_mask_cmp_sd_mask(M, X, Y, P) __extension__ ({ \ 9166309124Sdim (__mmask8)__builtin_ia32_cmpsd_mask((__v2df)(__m128d)(X), \ 9167309124Sdim (__v2df)(__m128d)(Y), (int)(P), \ 9168309124Sdim (__mmask8)(M), \ 9169309124Sdim _MM_FROUND_CUR_DIRECTION); }) 9170309124Sdim 9171309124Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS 9172309124Sdim_mm512_movehdup_ps (__m512 __A) 9173309124Sdim{ 9174309124Sdim return (__m512)__builtin_shufflevector((__v16sf)__A, (__v16sf)__A, 9175309124Sdim 1, 1, 3, 3, 5, 5, 7, 7, 9, 9, 11, 11, 13, 13, 15, 15); 9176309124Sdim} 9177309124Sdim 9178309124Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS 9179309124Sdim_mm512_mask_movehdup_ps (__m512 __W, __mmask16 __U, __m512 __A) 9180309124Sdim{ 9181309124Sdim return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, 9182309124Sdim (__v16sf)_mm512_movehdup_ps(__A), 9183309124Sdim (__v16sf)__W); 9184309124Sdim} 9185309124Sdim 9186309124Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS 9187309124Sdim_mm512_maskz_movehdup_ps (__mmask16 __U, __m512 __A) 9188309124Sdim{ 9189309124Sdim return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, 9190309124Sdim (__v16sf)_mm512_movehdup_ps(__A), 9191309124Sdim (__v16sf)_mm512_setzero_ps()); 9192309124Sdim} 9193309124Sdim 9194309124Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS 9195309124Sdim_mm512_moveldup_ps (__m512 __A) 9196309124Sdim{ 9197309124Sdim return (__m512)__builtin_shufflevector((__v16sf)__A, (__v16sf)__A, 9198309124Sdim 0, 0, 2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12, 14, 14); 9199309124Sdim} 9200309124Sdim 9201309124Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS 9202309124Sdim_mm512_mask_moveldup_ps (__m512 __W, __mmask16 __U, __m512 __A) 9203309124Sdim{ 9204309124Sdim return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, 9205309124Sdim (__v16sf)_mm512_moveldup_ps(__A), 9206309124Sdim (__v16sf)__W); 9207309124Sdim} 9208309124Sdim 9209309124Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS 9210309124Sdim_mm512_maskz_moveldup_ps (__mmask16 __U, __m512 __A) 9211309124Sdim{ 9212309124Sdim return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, 9213309124Sdim (__v16sf)_mm512_moveldup_ps(__A), 9214309124Sdim (__v16sf)_mm512_setzero_ps()); 9215309124Sdim} 9216309124Sdim 9217314564Sdimstatic __inline__ __m128 __DEFAULT_FN_ATTRS 9218314564Sdim_mm_mask_move_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) 9219314564Sdim{ 9220314564Sdim __m128 res = __A; 9221314564Sdim res[0] = (__U & 1) ? __B[0] : __W[0]; 9222314564Sdim return res; 9223314564Sdim} 9224314564Sdim 9225314564Sdimstatic __inline__ __m128 __DEFAULT_FN_ATTRS 9226314564Sdim_mm_maskz_move_ss (__mmask8 __U, __m128 __A, __m128 __B) 9227314564Sdim{ 9228314564Sdim __m128 res = __A; 9229314564Sdim res[0] = (__U & 1) ? __B[0] : 0; 9230314564Sdim return res; 9231314564Sdim} 9232314564Sdim 9233314564Sdimstatic __inline__ __m128d __DEFAULT_FN_ATTRS 9234314564Sdim_mm_mask_move_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) 9235314564Sdim{ 9236314564Sdim __m128d res = __A; 9237314564Sdim res[0] = (__U & 1) ? __B[0] : __W[0]; 9238314564Sdim return res; 9239314564Sdim} 9240314564Sdim 9241314564Sdimstatic __inline__ __m128d __DEFAULT_FN_ATTRS 9242314564Sdim_mm_maskz_move_sd (__mmask8 __U, __m128d __A, __m128d __B) 9243314564Sdim{ 9244314564Sdim __m128d res = __A; 9245314564Sdim res[0] = (__U & 1) ? __B[0] : 0; 9246314564Sdim return res; 9247314564Sdim} 9248314564Sdim 9249314564Sdimstatic __inline__ void __DEFAULT_FN_ATTRS 9250314564Sdim_mm_mask_store_ss (float * __W, __mmask8 __U, __m128 __A) 9251314564Sdim{ 9252314564Sdim __builtin_ia32_storess128_mask ((__v16sf *)__W, 9253314564Sdim (__v16sf) _mm512_castps128_ps512(__A), 9254314564Sdim (__mmask16) __U & (__mmask16)1); 9255314564Sdim} 9256314564Sdim 9257314564Sdimstatic __inline__ void __DEFAULT_FN_ATTRS 9258314564Sdim_mm_mask_store_sd (double * __W, __mmask8 __U, __m128d __A) 9259314564Sdim{ 9260314564Sdim __builtin_ia32_storesd128_mask ((__v8df *)__W, 9261314564Sdim (__v8df) _mm512_castpd128_pd512(__A), 9262314564Sdim (__mmask8) __U & 1); 9263314564Sdim} 9264314564Sdim 9265314564Sdimstatic __inline__ __m128 __DEFAULT_FN_ATTRS 9266314564Sdim_mm_mask_load_ss (__m128 __W, __mmask8 __U, const float* __A) 9267314564Sdim{ 9268314564Sdim __m128 src = (__v4sf) __builtin_shufflevector((__v4sf) __W, 9269314564Sdim (__v4sf) {0.0, 0.0, 0.0, 0.0}, 9270314564Sdim 0, 4, 4, 4); 9271314564Sdim 9272314564Sdim return (__m128) __builtin_shufflevector( 9273314564Sdim __builtin_ia32_loadss128_mask ((__v16sf *) __A, 9274314564Sdim (__v16sf) _mm512_castps128_ps512(src), 9275314564Sdim (__mmask16) __U & 1), 9276314564Sdim _mm512_undefined_ps(), 0, 1, 2, 3); 9277314564Sdim} 9278314564Sdim 9279314564Sdimstatic __inline__ __m128 __DEFAULT_FN_ATTRS 9280314564Sdim_mm_maskz_load_ss (__mmask8 __U, const float* __A) 9281314564Sdim{ 9282314564Sdim return (__m128) __builtin_shufflevector( 9283314564Sdim __builtin_ia32_loadss128_mask ((__v16sf *) __A, 9284314564Sdim (__v16sf) _mm512_setzero_ps(), 9285314564Sdim (__mmask16) __U & 1), 9286314564Sdim _mm512_undefined_ps(), 0, 1, 2, 3); 9287314564Sdim} 9288314564Sdim 9289314564Sdimstatic __inline__ __m128d __DEFAULT_FN_ATTRS 9290314564Sdim_mm_mask_load_sd (__m128d __W, __mmask8 __U, const double* __A) 9291314564Sdim{ 9292314564Sdim __m128d src = (__v2df) __builtin_shufflevector((__v2df) __W, 9293314564Sdim (__v2df) {0.0, 0.0}, 0, 2); 9294314564Sdim 9295314564Sdim return (__m128d) __builtin_shufflevector( 9296314564Sdim __builtin_ia32_loadsd128_mask ((__v8df *) __A, 9297314564Sdim (__v8df) _mm512_castpd128_pd512(src), 9298314564Sdim (__mmask8) __U & 1), 9299314564Sdim _mm512_undefined_pd(), 0, 1); 9300314564Sdim} 9301314564Sdim 9302314564Sdimstatic __inline__ __m128d __DEFAULT_FN_ATTRS 9303314564Sdim_mm_maskz_load_sd (__mmask8 __U, const double* __A) 9304314564Sdim{ 9305314564Sdim return (__m128d) __builtin_shufflevector( 9306314564Sdim __builtin_ia32_loadsd128_mask ((__v8df *) __A, 9307314564Sdim (__v8df) _mm512_setzero_pd(), 9308314564Sdim (__mmask8) __U & 1), 9309314564Sdim _mm512_undefined_pd(), 0, 1); 9310314564Sdim} 9311314564Sdim 9312309124Sdim#define _mm512_shuffle_epi32(A, I) __extension__ ({ \ 9313309124Sdim (__m512i)__builtin_shufflevector((__v16si)(__m512i)(A), \ 9314309124Sdim (__v16si)_mm512_undefined_epi32(), \ 9315309124Sdim 0 + (((I) >> 0) & 0x3), \ 9316309124Sdim 0 + (((I) >> 2) & 0x3), \ 9317309124Sdim 0 + (((I) >> 4) & 0x3), \ 9318309124Sdim 0 + (((I) >> 6) & 0x3), \ 9319309124Sdim 4 + (((I) >> 0) & 0x3), \ 9320309124Sdim 4 + (((I) >> 2) & 0x3), \ 9321309124Sdim 4 + (((I) >> 4) & 0x3), \ 9322309124Sdim 4 + (((I) >> 6) & 0x3), \ 9323309124Sdim 8 + (((I) >> 0) & 0x3), \ 9324309124Sdim 8 + (((I) >> 2) & 0x3), \ 9325309124Sdim 8 + (((I) >> 4) & 0x3), \ 9326309124Sdim 8 + (((I) >> 6) & 0x3), \ 9327309124Sdim 12 + (((I) >> 0) & 0x3), \ 9328309124Sdim 12 + (((I) >> 2) & 0x3), \ 9329309124Sdim 12 + (((I) >> 4) & 0x3), \ 9330309124Sdim 12 + (((I) >> 6) & 0x3)); }) 9331309124Sdim 9332309124Sdim#define _mm512_mask_shuffle_epi32(W, U, A, I) __extension__ ({ \ 9333309124Sdim (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \ 9334309124Sdim (__v16si)_mm512_shuffle_epi32((A), (I)), \ 9335309124Sdim (__v16si)(__m512i)(W)); }) 9336309124Sdim 9337309124Sdim#define _mm512_maskz_shuffle_epi32(U, A, I) __extension__ ({ \ 9338309124Sdim (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \ 9339309124Sdim (__v16si)_mm512_shuffle_epi32((A), (I)), \ 9340309124Sdim (__v16si)_mm512_setzero_si512()); }) 9341309124Sdim 9342309124Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS 9343309124Sdim_mm512_mask_expand_pd (__m512d __W, __mmask8 __U, __m512d __A) 9344309124Sdim{ 9345309124Sdim return (__m512d) __builtin_ia32_expanddf512_mask ((__v8df) __A, 9346309124Sdim (__v8df) __W, 9347309124Sdim (__mmask8) __U); 9348309124Sdim} 9349309124Sdim 9350309124Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS 9351309124Sdim_mm512_maskz_expand_pd (__mmask8 __U, __m512d __A) 9352309124Sdim{ 9353309124Sdim return (__m512d) __builtin_ia32_expanddf512_mask ((__v8df) __A, 9354309124Sdim (__v8df) _mm512_setzero_pd (), 9355309124Sdim (__mmask8) __U); 9356309124Sdim} 9357309124Sdim 9358309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 9359309124Sdim_mm512_mask_expand_epi64 (__m512i __W, __mmask8 __U, __m512i __A) 9360309124Sdim{ 9361309124Sdim return (__m512i) __builtin_ia32_expanddi512_mask ((__v8di) __A, 9362309124Sdim (__v8di) __W, 9363309124Sdim (__mmask8) __U); 9364309124Sdim} 9365309124Sdim 9366309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 9367309124Sdim_mm512_maskz_expand_epi64 ( __mmask8 __U, __m512i __A) 9368309124Sdim{ 9369309124Sdim return (__m512i) __builtin_ia32_expanddi512_mask ((__v8di) __A, 9370309124Sdim (__v8di) _mm512_setzero_pd (), 9371309124Sdim (__mmask8) __U); 9372309124Sdim} 9373309124Sdim 9374309124Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS 9375309124Sdim_mm512_mask_expandloadu_pd(__m512d __W, __mmask8 __U, void const *__P) 9376309124Sdim{ 9377309124Sdim return (__m512d) __builtin_ia32_expandloaddf512_mask ((const __v8df *)__P, 9378309124Sdim (__v8df) __W, 9379309124Sdim (__mmask8) __U); 9380309124Sdim} 9381309124Sdim 9382309124Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS 9383309124Sdim_mm512_maskz_expandloadu_pd(__mmask8 __U, void const *__P) 9384309124Sdim{ 9385309124Sdim return (__m512d) __builtin_ia32_expandloaddf512_mask ((const __v8df *)__P, 9386309124Sdim (__v8df) _mm512_setzero_pd(), 9387309124Sdim (__mmask8) __U); 9388309124Sdim} 9389309124Sdim 9390309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 9391309124Sdim_mm512_mask_expandloadu_epi64(__m512i __W, __mmask8 __U, void const *__P) 9392309124Sdim{ 9393309124Sdim return (__m512i) __builtin_ia32_expandloaddi512_mask ((const __v8di *)__P, 9394309124Sdim (__v8di) __W, 9395309124Sdim (__mmask8) __U); 9396309124Sdim} 9397309124Sdim 9398309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 9399309124Sdim_mm512_maskz_expandloadu_epi64(__mmask8 __U, void const *__P) 9400309124Sdim{ 9401309124Sdim return (__m512i) __builtin_ia32_expandloaddi512_mask ((const __v8di *)__P, 9402309124Sdim (__v8di) _mm512_setzero_pd(), 9403309124Sdim (__mmask8) __U); 9404309124Sdim} 9405309124Sdim 9406309124Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS 9407309124Sdim_mm512_mask_expandloadu_ps(__m512 __W, __mmask16 __U, void const *__P) 9408309124Sdim{ 9409309124Sdim return (__m512) __builtin_ia32_expandloadsf512_mask ((const __v16sf *)__P, 9410309124Sdim (__v16sf) __W, 9411309124Sdim (__mmask16) __U); 9412309124Sdim} 9413309124Sdim 9414309124Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS 9415309124Sdim_mm512_maskz_expandloadu_ps(__mmask16 __U, void const *__P) 9416309124Sdim{ 9417309124Sdim return (__m512) __builtin_ia32_expandloadsf512_mask ((const __v16sf *)__P, 9418309124Sdim (__v16sf) _mm512_setzero_ps(), 9419309124Sdim (__mmask16) __U); 9420309124Sdim} 9421309124Sdim 9422309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 9423309124Sdim_mm512_mask_expandloadu_epi32(__m512i __W, __mmask16 __U, void const *__P) 9424309124Sdim{ 9425309124Sdim return (__m512i) __builtin_ia32_expandloadsi512_mask ((const __v16si *)__P, 9426309124Sdim (__v16si) __W, 9427309124Sdim (__mmask16) __U); 9428309124Sdim} 9429309124Sdim 9430309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 9431309124Sdim_mm512_maskz_expandloadu_epi32(__mmask16 __U, void const *__P) 9432309124Sdim{ 9433309124Sdim return (__m512i) __builtin_ia32_expandloadsi512_mask ((const __v16si *)__P, 9434309124Sdim (__v16si) _mm512_setzero_ps(), 9435309124Sdim (__mmask16) __U); 9436309124Sdim} 9437309124Sdim 9438309124Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS 9439309124Sdim_mm512_mask_expand_ps (__m512 __W, __mmask16 __U, __m512 __A) 9440309124Sdim{ 9441309124Sdim return (__m512) __builtin_ia32_expandsf512_mask ((__v16sf) __A, 9442309124Sdim (__v16sf) __W, 9443309124Sdim (__mmask16) __U); 9444309124Sdim} 9445309124Sdim 9446309124Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS 9447309124Sdim_mm512_maskz_expand_ps (__mmask16 __U, __m512 __A) 9448309124Sdim{ 9449309124Sdim return (__m512) __builtin_ia32_expandsf512_mask ((__v16sf) __A, 9450309124Sdim (__v16sf) _mm512_setzero_ps(), 9451309124Sdim (__mmask16) __U); 9452309124Sdim} 9453309124Sdim 9454309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 9455309124Sdim_mm512_mask_expand_epi32 (__m512i __W, __mmask16 __U, __m512i __A) 9456309124Sdim{ 9457309124Sdim return (__m512i) __builtin_ia32_expandsi512_mask ((__v16si) __A, 9458309124Sdim (__v16si) __W, 9459309124Sdim (__mmask16) __U); 9460309124Sdim} 9461309124Sdim 9462309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 9463309124Sdim_mm512_maskz_expand_epi32 (__mmask16 __U, __m512i __A) 9464309124Sdim{ 9465309124Sdim return (__m512i) __builtin_ia32_expandsi512_mask ((__v16si) __A, 9466309124Sdim (__v16si) _mm512_setzero_ps(), 9467309124Sdim (__mmask16) __U); 9468309124Sdim} 9469309124Sdim 9470309124Sdim#define _mm512_cvt_roundps_pd(A, R) __extension__ ({ \ 9471309124Sdim (__m512d)__builtin_ia32_cvtps2pd512_mask((__v8sf)(__m256)(A), \ 9472309124Sdim (__v8df)_mm512_undefined_pd(), \ 9473309124Sdim (__mmask8)-1, (int)(R)); }) 9474309124Sdim 9475309124Sdim#define _mm512_mask_cvt_roundps_pd(W, U, A, R) __extension__ ({ \ 9476309124Sdim (__m512d)__builtin_ia32_cvtps2pd512_mask((__v8sf)(__m256)(A), \ 9477309124Sdim (__v8df)(__m512d)(W), \ 9478309124Sdim (__mmask8)(U), (int)(R)); }) 9479309124Sdim 9480309124Sdim#define _mm512_maskz_cvt_roundps_pd(U, A, R) __extension__ ({ \ 9481309124Sdim (__m512d)__builtin_ia32_cvtps2pd512_mask((__v8sf)(__m256)(A), \ 9482309124Sdim (__v8df)_mm512_setzero_pd(), \ 9483309124Sdim (__mmask8)(U), (int)(R)); }) 9484309124Sdim 9485309124Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS 9486309124Sdim_mm512_cvtps_pd (__m256 __A) 9487309124Sdim{ 9488309124Sdim return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A, 9489309124Sdim (__v8df) 9490309124Sdim _mm512_undefined_pd (), 9491309124Sdim (__mmask8) -1, 9492309124Sdim _MM_FROUND_CUR_DIRECTION); 9493309124Sdim} 9494309124Sdim 9495309124Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS 9496309124Sdim_mm512_mask_cvtps_pd (__m512d __W, __mmask8 __U, __m256 __A) 9497309124Sdim{ 9498309124Sdim return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A, 9499309124Sdim (__v8df) __W, 9500309124Sdim (__mmask8) __U, 9501309124Sdim _MM_FROUND_CUR_DIRECTION); 9502309124Sdim} 9503309124Sdim 9504309124Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS 9505309124Sdim_mm512_maskz_cvtps_pd (__mmask8 __U, __m256 __A) 9506309124Sdim{ 9507309124Sdim return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A, 9508309124Sdim (__v8df) 9509309124Sdim _mm512_setzero_pd (), 9510309124Sdim (__mmask8) __U, 9511309124Sdim _MM_FROUND_CUR_DIRECTION); 9512309124Sdim} 9513309124Sdim 9514314564Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS 9515314564Sdim_mm512_cvtpslo_pd (__m512 __A) 9516314564Sdim{ 9517314564Sdim return (__m512) _mm512_cvtps_pd(_mm512_castps512_ps256(__A)); 9518314564Sdim} 9519314564Sdim 9520314564Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS 9521314564Sdim_mm512_mask_cvtpslo_pd (__m512d __W, __mmask8 __U, __m512 __A) 9522314564Sdim{ 9523314564Sdim return (__m512) _mm512_mask_cvtps_pd(__W, __U, _mm512_castps512_ps256(__A)); 9524314564Sdim} 9525314564Sdim 9526309124Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS 9527309124Sdim_mm512_mask_mov_pd (__m512d __W, __mmask8 __U, __m512d __A) 9528309124Sdim{ 9529309124Sdim return (__m512d) __builtin_ia32_selectpd_512 ((__mmask8) __U, 9530309124Sdim (__v8df) __A, 9531309124Sdim (__v8df) __W); 9532309124Sdim} 9533309124Sdim 9534309124Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS 9535309124Sdim_mm512_maskz_mov_pd (__mmask8 __U, __m512d __A) 9536309124Sdim{ 9537309124Sdim return (__m512d) __builtin_ia32_selectpd_512 ((__mmask8) __U, 9538309124Sdim (__v8df) __A, 9539309124Sdim (__v8df) _mm512_setzero_pd ()); 9540309124Sdim} 9541309124Sdim 9542309124Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS 9543309124Sdim_mm512_mask_mov_ps (__m512 __W, __mmask16 __U, __m512 __A) 9544309124Sdim{ 9545309124Sdim return (__m512) __builtin_ia32_selectps_512 ((__mmask16) __U, 9546309124Sdim (__v16sf) __A, 9547309124Sdim (__v16sf) __W); 9548309124Sdim} 9549309124Sdim 9550309124Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS 9551309124Sdim_mm512_maskz_mov_ps (__mmask16 __U, __m512 __A) 9552309124Sdim{ 9553309124Sdim return (__m512) __builtin_ia32_selectps_512 ((__mmask16) __U, 9554309124Sdim (__v16sf) __A, 9555309124Sdim (__v16sf) _mm512_setzero_ps ()); 9556309124Sdim} 9557309124Sdim 9558309124Sdimstatic __inline__ void __DEFAULT_FN_ATTRS 9559309124Sdim_mm512_mask_compressstoreu_pd (void *__P, __mmask8 __U, __m512d __A) 9560309124Sdim{ 9561309124Sdim __builtin_ia32_compressstoredf512_mask ((__v8df *) __P, (__v8df) __A, 9562309124Sdim (__mmask8) __U); 9563309124Sdim} 9564309124Sdim 9565309124Sdimstatic __inline__ void __DEFAULT_FN_ATTRS 9566309124Sdim_mm512_mask_compressstoreu_epi64 (void *__P, __mmask8 __U, __m512i __A) 9567309124Sdim{ 9568309124Sdim __builtin_ia32_compressstoredi512_mask ((__v8di *) __P, (__v8di) __A, 9569309124Sdim (__mmask8) __U); 9570309124Sdim} 9571309124Sdim 9572309124Sdimstatic __inline__ void __DEFAULT_FN_ATTRS 9573309124Sdim_mm512_mask_compressstoreu_ps (void *__P, __mmask16 __U, __m512 __A) 9574309124Sdim{ 9575309124Sdim __builtin_ia32_compressstoresf512_mask ((__v16sf *) __P, (__v16sf) __A, 9576309124Sdim (__mmask16) __U); 9577309124Sdim} 9578309124Sdim 9579309124Sdimstatic __inline__ void __DEFAULT_FN_ATTRS 9580309124Sdim_mm512_mask_compressstoreu_epi32 (void *__P, __mmask16 __U, __m512i __A) 9581309124Sdim{ 9582309124Sdim __builtin_ia32_compressstoresi512_mask ((__v16si *) __P, (__v16si) __A, 9583309124Sdim (__mmask16) __U); 9584309124Sdim} 9585309124Sdim 9586309124Sdim#define _mm_cvt_roundsd_ss(A, B, R) __extension__ ({ \ 9587309124Sdim (__m128)__builtin_ia32_cvtsd2ss_round_mask((__v4sf)(__m128)(A), \ 9588309124Sdim (__v2df)(__m128d)(B), \ 9589309124Sdim (__v4sf)_mm_undefined_ps(), \ 9590309124Sdim (__mmask8)-1, (int)(R)); }) 9591309124Sdim 9592309124Sdim#define _mm_mask_cvt_roundsd_ss(W, U, A, B, R) __extension__ ({ \ 9593309124Sdim (__m128)__builtin_ia32_cvtsd2ss_round_mask((__v4sf)(__m128)(A), \ 9594309124Sdim (__v2df)(__m128d)(B), \ 9595309124Sdim (__v4sf)(__m128)(W), \ 9596309124Sdim (__mmask8)(U), (int)(R)); }) 9597309124Sdim 9598309124Sdim#define _mm_maskz_cvt_roundsd_ss(U, A, B, R) __extension__ ({ \ 9599309124Sdim (__m128)__builtin_ia32_cvtsd2ss_round_mask((__v4sf)(__m128)(A), \ 9600309124Sdim (__v2df)(__m128d)(B), \ 9601309124Sdim (__v4sf)_mm_setzero_ps(), \ 9602309124Sdim (__mmask8)(U), (int)(R)); }) 9603309124Sdim 9604309124Sdimstatic __inline__ __m128 __DEFAULT_FN_ATTRS 9605309124Sdim_mm_mask_cvtsd_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128d __B) 9606309124Sdim{ 9607309124Sdim return __builtin_ia32_cvtsd2ss_round_mask ((__v4sf)(__A), 9608309124Sdim (__v2df)(__B), 9609309124Sdim (__v4sf)(__W), 9610309124Sdim (__mmask8)(__U), _MM_FROUND_CUR_DIRECTION); 9611309124Sdim} 9612309124Sdim 9613309124Sdimstatic __inline__ __m128 __DEFAULT_FN_ATTRS 9614309124Sdim_mm_maskz_cvtsd_ss (__mmask8 __U, __m128 __A, __m128d __B) 9615309124Sdim{ 9616309124Sdim return __builtin_ia32_cvtsd2ss_round_mask ((__v4sf)(__A), 9617309124Sdim (__v2df)(__B), 9618309124Sdim (__v4sf)_mm_setzero_ps(), 9619309124Sdim (__mmask8)(__U), _MM_FROUND_CUR_DIRECTION); 9620309124Sdim} 9621309124Sdim 9622309124Sdim#define _mm_cvtss_i32 _mm_cvtss_si32 9623314564Sdim#define _mm_cvtsd_i32 _mm_cvtsd_si32 9624314564Sdim#define _mm_cvti32_sd _mm_cvtsi32_sd 9625314564Sdim#define _mm_cvti32_ss _mm_cvtsi32_ss 9626314564Sdim#ifdef __x86_64__ 9627309124Sdim#define _mm_cvtss_i64 _mm_cvtss_si64 9628309124Sdim#define _mm_cvtsd_i64 _mm_cvtsd_si64 9629309124Sdim#define _mm_cvti64_sd _mm_cvtsi64_sd 9630309124Sdim#define _mm_cvti64_ss _mm_cvtsi64_ss 9631314564Sdim#endif 9632309124Sdim 9633314564Sdim#ifdef __x86_64__ 9634309124Sdim#define _mm_cvt_roundi64_sd(A, B, R) __extension__ ({ \ 9635309124Sdim (__m128d)__builtin_ia32_cvtsi2sd64((__v2df)(__m128d)(A), (long long)(B), \ 9636309124Sdim (int)(R)); }) 9637309124Sdim 9638309124Sdim#define _mm_cvt_roundsi64_sd(A, B, R) __extension__ ({ \ 9639309124Sdim (__m128d)__builtin_ia32_cvtsi2sd64((__v2df)(__m128d)(A), (long long)(B), \ 9640309124Sdim (int)(R)); }) 9641314564Sdim#endif 9642309124Sdim 9643309124Sdim#define _mm_cvt_roundsi32_ss(A, B, R) __extension__ ({ \ 9644309124Sdim (__m128)__builtin_ia32_cvtsi2ss32((__v4sf)(__m128)(A), (int)(B), (int)(R)); }) 9645309124Sdim 9646309124Sdim#define _mm_cvt_roundi32_ss(A, B, R) __extension__ ({ \ 9647309124Sdim (__m128)__builtin_ia32_cvtsi2ss32((__v4sf)(__m128)(A), (int)(B), (int)(R)); }) 9648309124Sdim 9649314564Sdim#ifdef __x86_64__ 9650309124Sdim#define _mm_cvt_roundsi64_ss(A, B, R) __extension__ ({ \ 9651309124Sdim (__m128)__builtin_ia32_cvtsi2ss64((__v4sf)(__m128)(A), (long long)(B), \ 9652309124Sdim (int)(R)); }) 9653309124Sdim 9654309124Sdim#define _mm_cvt_roundi64_ss(A, B, R) __extension__ ({ \ 9655309124Sdim (__m128)__builtin_ia32_cvtsi2ss64((__v4sf)(__m128)(A), (long long)(B), \ 9656309124Sdim (int)(R)); }) 9657314564Sdim#endif 9658309124Sdim 9659309124Sdim#define _mm_cvt_roundss_sd(A, B, R) __extension__ ({ \ 9660309124Sdim (__m128d)__builtin_ia32_cvtss2sd_round_mask((__v2df)(__m128d)(A), \ 9661309124Sdim (__v4sf)(__m128)(B), \ 9662309124Sdim (__v2df)_mm_undefined_pd(), \ 9663309124Sdim (__mmask8)-1, (int)(R)); }) 9664309124Sdim 9665309124Sdim#define _mm_mask_cvt_roundss_sd(W, U, A, B, R) __extension__ ({ \ 9666309124Sdim (__m128d)__builtin_ia32_cvtss2sd_round_mask((__v2df)(__m128d)(A), \ 9667309124Sdim (__v4sf)(__m128)(B), \ 9668309124Sdim (__v2df)(__m128d)(W), \ 9669309124Sdim (__mmask8)(U), (int)(R)); }) 9670309124Sdim 9671309124Sdim#define _mm_maskz_cvt_roundss_sd(U, A, B, R) __extension__ ({ \ 9672309124Sdim (__m128d)__builtin_ia32_cvtss2sd_round_mask((__v2df)(__m128d)(A), \ 9673309124Sdim (__v4sf)(__m128)(B), \ 9674309124Sdim (__v2df)_mm_setzero_pd(), \ 9675309124Sdim (__mmask8)(U), (int)(R)); }) 9676309124Sdim 9677309124Sdimstatic __inline__ __m128d __DEFAULT_FN_ATTRS 9678309124Sdim_mm_mask_cvtss_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128 __B) 9679309124Sdim{ 9680309124Sdim return __builtin_ia32_cvtss2sd_round_mask((__v2df)(__A), 9681309124Sdim (__v4sf)(__B), 9682309124Sdim (__v2df)(__W), 9683309124Sdim (__mmask8)(__U), _MM_FROUND_CUR_DIRECTION); 9684309124Sdim} 9685309124Sdim 9686309124Sdimstatic __inline__ __m128d __DEFAULT_FN_ATTRS 9687309124Sdim_mm_maskz_cvtss_sd (__mmask8 __U, __m128d __A, __m128 __B) 9688309124Sdim{ 9689309124Sdim return __builtin_ia32_cvtss2sd_round_mask((__v2df)(__A), 9690309124Sdim (__v4sf)(__B), 9691309124Sdim (__v2df)_mm_setzero_pd(), 9692309124Sdim (__mmask8)(__U), _MM_FROUND_CUR_DIRECTION); 9693309124Sdim} 9694309124Sdim 9695309124Sdimstatic __inline__ __m128d __DEFAULT_FN_ATTRS 9696309124Sdim_mm_cvtu32_sd (__m128d __A, unsigned __B) 9697309124Sdim{ 9698309124Sdim return (__m128d) __builtin_ia32_cvtusi2sd32 ((__v2df) __A, __B); 9699309124Sdim} 9700309124Sdim 9701314564Sdim#ifdef __x86_64__ 9702309124Sdim#define _mm_cvt_roundu64_sd(A, B, R) __extension__ ({ \ 9703309124Sdim (__m128d)__builtin_ia32_cvtusi2sd64((__v2df)(__m128d)(A), \ 9704309124Sdim (unsigned long long)(B), (int)(R)); }) 9705309124Sdim 9706309124Sdimstatic __inline__ __m128d __DEFAULT_FN_ATTRS 9707309124Sdim_mm_cvtu64_sd (__m128d __A, unsigned long long __B) 9708309124Sdim{ 9709309124Sdim return (__m128d) __builtin_ia32_cvtusi2sd64 ((__v2df) __A, __B, 9710309124Sdim _MM_FROUND_CUR_DIRECTION); 9711309124Sdim} 9712314564Sdim#endif 9713309124Sdim 9714309124Sdim#define _mm_cvt_roundu32_ss(A, B, R) __extension__ ({ \ 9715309124Sdim (__m128)__builtin_ia32_cvtusi2ss32((__v4sf)(__m128)(A), (unsigned int)(B), \ 9716309124Sdim (int)(R)); }) 9717309124Sdim 9718309124Sdimstatic __inline__ __m128 __DEFAULT_FN_ATTRS 9719309124Sdim_mm_cvtu32_ss (__m128 __A, unsigned __B) 9720309124Sdim{ 9721309124Sdim return (__m128) __builtin_ia32_cvtusi2ss32 ((__v4sf) __A, __B, 9722309124Sdim _MM_FROUND_CUR_DIRECTION); 9723309124Sdim} 9724309124Sdim 9725314564Sdim#ifdef __x86_64__ 9726309124Sdim#define _mm_cvt_roundu64_ss(A, B, R) __extension__ ({ \ 9727309124Sdim (__m128)__builtin_ia32_cvtusi2ss64((__v4sf)(__m128)(A), \ 9728309124Sdim (unsigned long long)(B), (int)(R)); }) 9729309124Sdim 9730309124Sdimstatic __inline__ __m128 __DEFAULT_FN_ATTRS 9731309124Sdim_mm_cvtu64_ss (__m128 __A, unsigned long long __B) 9732309124Sdim{ 9733309124Sdim return (__m128) __builtin_ia32_cvtusi2ss64 ((__v4sf) __A, __B, 9734309124Sdim _MM_FROUND_CUR_DIRECTION); 9735309124Sdim} 9736314564Sdim#endif 9737309124Sdim 9738309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 9739309124Sdim_mm512_mask_set1_epi32 (__m512i __O, __mmask16 __M, int __A) 9740309124Sdim{ 9741309124Sdim return (__m512i) __builtin_ia32_pbroadcastd512_gpr_mask (__A, (__v16si) __O, 9742309124Sdim __M); 9743309124Sdim} 9744309124Sdim 9745314564Sdim#ifdef __x86_64__ 9746309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 9747309124Sdim_mm512_mask_set1_epi64 (__m512i __O, __mmask8 __M, long long __A) 9748309124Sdim{ 9749309124Sdim return (__m512i) __builtin_ia32_pbroadcastq512_gpr_mask (__A, (__v8di) __O, 9750309124Sdim __M); 9751309124Sdim} 9752314564Sdim#endif 9753309124Sdim 9754321369Sdimstatic __inline __m512i __DEFAULT_FN_ATTRS 9755321369Sdim_mm512_set_epi8 (char __e63, char __e62, char __e61, char __e60, char __e59, 9756321369Sdim char __e58, char __e57, char __e56, char __e55, char __e54, char __e53, 9757321369Sdim char __e52, char __e51, char __e50, char __e49, char __e48, char __e47, 9758321369Sdim char __e46, char __e45, char __e44, char __e43, char __e42, char __e41, 9759321369Sdim char __e40, char __e39, char __e38, char __e37, char __e36, char __e35, 9760321369Sdim char __e34, char __e33, char __e32, char __e31, char __e30, char __e29, 9761321369Sdim char __e28, char __e27, char __e26, char __e25, char __e24, char __e23, 9762321369Sdim char __e22, char __e21, char __e20, char __e19, char __e18, char __e17, 9763321369Sdim char __e16, char __e15, char __e14, char __e13, char __e12, char __e11, 9764321369Sdim char __e10, char __e9, char __e8, char __e7, char __e6, char __e5, 9765321369Sdim char __e4, char __e3, char __e2, char __e1, char __e0) { 9766321369Sdim 9767321369Sdim return __extension__ (__m512i)(__v64qi) 9768321369Sdim {__e0, __e1, __e2, __e3, __e4, __e5, __e6, __e7, 9769321369Sdim __e8, __e9, __e10, __e11, __e12, __e13, __e14, __e15, 9770321369Sdim __e16, __e17, __e18, __e19, __e20, __e21, __e22, __e23, 9771321369Sdim __e24, __e25, __e26, __e27, __e28, __e29, __e30, __e31, 9772321369Sdim __e32, __e33, __e34, __e35, __e36, __e37, __e38, __e39, 9773321369Sdim __e40, __e41, __e42, __e43, __e44, __e45, __e46, __e47, 9774321369Sdim __e48, __e49, __e50, __e51, __e52, __e53, __e54, __e55, 9775321369Sdim __e56, __e57, __e58, __e59, __e60, __e61, __e62, __e63}; 9776321369Sdim} 9777321369Sdim 9778321369Sdimstatic __inline __m512i __DEFAULT_FN_ATTRS 9779321369Sdim_mm512_set_epi16(short __e31, short __e30, short __e29, short __e28, 9780321369Sdim short __e27, short __e26, short __e25, short __e24, short __e23, 9781321369Sdim short __e22, short __e21, short __e20, short __e19, short __e18, 9782321369Sdim short __e17, short __e16, short __e15, short __e14, short __e13, 9783321369Sdim short __e12, short __e11, short __e10, short __e9, short __e8, 9784321369Sdim short __e7, short __e6, short __e5, short __e4, short __e3, 9785321369Sdim short __e2, short __e1, short __e0) { 9786321369Sdim return __extension__ (__m512i)(__v32hi) 9787321369Sdim {__e0, __e1, __e2, __e3, __e4, __e5, __e6, __e7, 9788321369Sdim __e8, __e9, __e10, __e11, __e12, __e13, __e14, __e15, 9789321369Sdim __e16, __e17, __e18, __e19, __e20, __e21, __e22, __e23, 9790321369Sdim __e24, __e25, __e26, __e27, __e28, __e29, __e30, __e31 }; 9791321369Sdim} 9792321369Sdim 9793309124Sdimstatic __inline __m512i __DEFAULT_FN_ATTRS 9794309124Sdim_mm512_set_epi32 (int __A, int __B, int __C, int __D, 9795309124Sdim int __E, int __F, int __G, int __H, 9796309124Sdim int __I, int __J, int __K, int __L, 9797309124Sdim int __M, int __N, int __O, int __P) 9798309124Sdim{ 9799309124Sdim return __extension__ (__m512i)(__v16si) 9800309124Sdim { __P, __O, __N, __M, __L, __K, __J, __I, 9801309124Sdim __H, __G, __F, __E, __D, __C, __B, __A }; 9802309124Sdim} 9803309124Sdim 9804309124Sdim#define _mm512_setr_epi32(e0,e1,e2,e3,e4,e5,e6,e7, \ 9805309124Sdim e8,e9,e10,e11,e12,e13,e14,e15) \ 9806309124Sdim _mm512_set_epi32((e15),(e14),(e13),(e12),(e11),(e10),(e9),(e8),(e7),(e6), \ 9807309124Sdim (e5),(e4),(e3),(e2),(e1),(e0)) 9808309124Sdim 9809309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 9810309124Sdim_mm512_set_epi64 (long long __A, long long __B, long long __C, 9811309124Sdim long long __D, long long __E, long long __F, 9812309124Sdim long long __G, long long __H) 9813309124Sdim{ 9814309124Sdim return __extension__ (__m512i) (__v8di) 9815309124Sdim { __H, __G, __F, __E, __D, __C, __B, __A }; 9816309124Sdim} 9817309124Sdim 9818309124Sdim#define _mm512_setr_epi64(e0,e1,e2,e3,e4,e5,e6,e7) \ 9819309124Sdim _mm512_set_epi64((e7),(e6),(e5),(e4),(e3),(e2),(e1),(e0)) 9820309124Sdim 9821309124Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS 9822309124Sdim_mm512_set_pd (double __A, double __B, double __C, double __D, 9823309124Sdim double __E, double __F, double __G, double __H) 9824309124Sdim{ 9825309124Sdim return __extension__ (__m512d) 9826309124Sdim { __H, __G, __F, __E, __D, __C, __B, __A }; 9827309124Sdim} 9828309124Sdim 9829309124Sdim#define _mm512_setr_pd(e0,e1,e2,e3,e4,e5,e6,e7) \ 9830309124Sdim _mm512_set_pd((e7),(e6),(e5),(e4),(e3),(e2),(e1),(e0)) 9831309124Sdim 9832309124Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS 9833309124Sdim_mm512_set_ps (float __A, float __B, float __C, float __D, 9834309124Sdim float __E, float __F, float __G, float __H, 9835309124Sdim float __I, float __J, float __K, float __L, 9836309124Sdim float __M, float __N, float __O, float __P) 9837309124Sdim{ 9838309124Sdim return __extension__ (__m512) 9839309124Sdim { __P, __O, __N, __M, __L, __K, __J, __I, 9840309124Sdim __H, __G, __F, __E, __D, __C, __B, __A }; 9841309124Sdim} 9842309124Sdim 9843309124Sdim#define _mm512_setr_ps(e0,e1,e2,e3,e4,e5,e6,e7,e8,e9,e10,e11,e12,e13,e14,e15) \ 9844309124Sdim _mm512_set_ps((e15),(e14),(e13),(e12),(e11),(e10),(e9),(e8),(e7),(e6),(e5), \ 9845309124Sdim (e4),(e3),(e2),(e1),(e0)) 9846309124Sdim 9847309124Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS 9848314564Sdim_mm512_abs_ps(__m512 __A) 9849309124Sdim{ 9850314564Sdim return (__m512)_mm512_and_epi32(_mm512_set1_epi32(0x7FFFFFFF),(__m512i)__A) ; 9851309124Sdim} 9852309124Sdim 9853309124Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS 9854314564Sdim_mm512_mask_abs_ps(__m512 __W, __mmask16 __K, __m512 __A) 9855309124Sdim{ 9856314564Sdim return (__m512)_mm512_mask_and_epi32((__m512i)__W, __K, _mm512_set1_epi32(0x7FFFFFFF),(__m512i)__A) ; 9857309124Sdim} 9858309124Sdim 9859309124Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS 9860314564Sdim_mm512_abs_pd(__m512d __A) 9861309124Sdim{ 9862314564Sdim return (__m512d)_mm512_and_epi64(_mm512_set1_epi64(0x7FFFFFFFFFFFFFFF),(__v8di)__A) ; 9863309124Sdim} 9864309124Sdim 9865309124Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS 9866314564Sdim_mm512_mask_abs_pd(__m512d __W, __mmask8 __K, __m512d __A) 9867309124Sdim{ 9868314564Sdim return (__m512d)_mm512_mask_and_epi64((__v8di)__W, __K, _mm512_set1_epi64(0x7FFFFFFFFFFFFFFF),(__v8di)__A); 9869309124Sdim} 9870309124Sdim 9871314564Sdim// Vector-reduction arithmetic accepts vectors as inputs and produces scalars as 9872314564Sdim// outputs. This class of vector operation forms the basis of many scientific 9873314564Sdim// computations. In vector-reduction arithmetic, the evaluation off is 9874314564Sdim// independent of the order of the input elements of V. 9875314564Sdim 9876314564Sdim// Used bisection method. At each step, we partition the vector with previous 9877314564Sdim// step in half, and the operation is performed on its two halves. 9878314564Sdim// This takes log2(n) steps where n is the number of elements in the vector. 9879314564Sdim 9880314564Sdim// Vec512 - Vector with size 512. 9881314564Sdim// Operator - Can be one of following: +,*,&,| 9882314564Sdim// T2 - Can get 'i' for int and 'f' for float. 9883314564Sdim// T1 - Can get 'i' for int and 'd' for double. 9884314564Sdim 9885314564Sdim#define _mm512_reduce_operator_64bit(Vec512, Operator, T2, T1) \ 9886314564Sdim __extension__({ \ 9887314564Sdim __m256##T1 Vec256 = __builtin_shufflevector( \ 9888314564Sdim (__v8d##T2)Vec512, \ 9889314564Sdim (__v8d##T2)Vec512, \ 9890314564Sdim 0, 1, 2, 3) \ 9891314564Sdim Operator \ 9892314564Sdim __builtin_shufflevector( \ 9893314564Sdim (__v8d##T2)Vec512, \ 9894314564Sdim (__v8d##T2)Vec512, \ 9895314564Sdim 4, 5, 6, 7); \ 9896314564Sdim __m128##T1 Vec128 = __builtin_shufflevector( \ 9897314564Sdim (__v4d##T2)Vec256, \ 9898314564Sdim (__v4d##T2)Vec256, \ 9899314564Sdim 0, 1) \ 9900314564Sdim Operator \ 9901314564Sdim __builtin_shufflevector( \ 9902314564Sdim (__v4d##T2)Vec256, \ 9903314564Sdim (__v4d##T2)Vec256, \ 9904314564Sdim 2, 3); \ 9905314564Sdim Vec128 = __builtin_shufflevector((__v2d##T2)Vec128, \ 9906314564Sdim (__v2d##T2)Vec128, 0, -1) \ 9907314564Sdim Operator \ 9908314564Sdim __builtin_shufflevector((__v2d##T2)Vec128, \ 9909314564Sdim (__v2d##T2)Vec128, 1, -1); \ 9910314564Sdim return Vec128[0]; \ 9911314564Sdim }) 9912314564Sdim 9913314564Sdimstatic __inline__ long long __DEFAULT_FN_ATTRS _mm512_reduce_add_epi64(__m512i __W) { 9914314564Sdim _mm512_reduce_operator_64bit(__W, +, i, i); 9915314564Sdim} 9916314564Sdim 9917314564Sdimstatic __inline__ long long __DEFAULT_FN_ATTRS _mm512_reduce_mul_epi64(__m512i __W) { 9918314564Sdim _mm512_reduce_operator_64bit(__W, *, i, i); 9919314564Sdim} 9920314564Sdim 9921314564Sdimstatic __inline__ long long __DEFAULT_FN_ATTRS _mm512_reduce_and_epi64(__m512i __W) { 9922314564Sdim _mm512_reduce_operator_64bit(__W, &, i, i); 9923314564Sdim} 9924314564Sdim 9925314564Sdimstatic __inline__ long long __DEFAULT_FN_ATTRS _mm512_reduce_or_epi64(__m512i __W) { 9926314564Sdim _mm512_reduce_operator_64bit(__W, |, i, i); 9927314564Sdim} 9928314564Sdim 9929314564Sdimstatic __inline__ double __DEFAULT_FN_ATTRS _mm512_reduce_add_pd(__m512d __W) { 9930314564Sdim _mm512_reduce_operator_64bit(__W, +, f, d); 9931314564Sdim} 9932314564Sdim 9933314564Sdimstatic __inline__ double __DEFAULT_FN_ATTRS _mm512_reduce_mul_pd(__m512d __W) { 9934314564Sdim _mm512_reduce_operator_64bit(__W, *, f, d); 9935314564Sdim} 9936314564Sdim 9937314564Sdim// Vec512 - Vector with size 512. 9938314564Sdim// Vec512Neutral - All vector elements set to the identity element. 9939314564Sdim// Identity element: {+,0},{*,1},{&,0xFFFFFFFFFFFFFFFF},{|,0} 9940314564Sdim// Operator - Can be one of following: +,*,&,| 9941314564Sdim// Mask - Intrinsic Mask 9942314564Sdim// T2 - Can get 'i' for int and 'f' for float. 9943314564Sdim// T1 - Can get 'i' for int and 'd' for packed double-precision. 9944314564Sdim// T3 - Can be Pd for packed double or q for q-word. 9945314564Sdim 9946314564Sdim#define _mm512_mask_reduce_operator_64bit(Vec512, Vec512Neutral, Operator, \ 9947314564Sdim Mask, T2, T1, T3) \ 9948314564Sdim __extension__({ \ 9949314564Sdim Vec512 = __builtin_ia32_select##T3##_512( \ 9950314564Sdim (__mmask8)Mask, \ 9951314564Sdim (__v8d##T2)Vec512, \ 9952314564Sdim (__v8d##T2)Vec512Neutral); \ 9953314564Sdim _mm512_reduce_operator_64bit(Vec512, Operator, T2, T1); \ 9954314564Sdim }) 9955314564Sdim 9956314564Sdimstatic __inline__ long long __DEFAULT_FN_ATTRS 9957314564Sdim_mm512_mask_reduce_add_epi64(__mmask8 __M, __m512i __W) { 9958314564Sdim _mm512_mask_reduce_operator_64bit(__W, _mm512_set1_epi64(0), +, __M, i, i, q); 9959314564Sdim} 9960314564Sdim 9961314564Sdimstatic __inline__ long long __DEFAULT_FN_ATTRS 9962314564Sdim_mm512_mask_reduce_mul_epi64(__mmask8 __M, __m512i __W) { 9963314564Sdim _mm512_mask_reduce_operator_64bit(__W, _mm512_set1_epi64(1), *, __M, i, i, q); 9964314564Sdim} 9965314564Sdim 9966314564Sdimstatic __inline__ long long __DEFAULT_FN_ATTRS 9967314564Sdim_mm512_mask_reduce_and_epi64(__mmask8 __M, __m512i __W) { 9968314564Sdim _mm512_mask_reduce_operator_64bit(__W, _mm512_set1_epi64(0xFFFFFFFFFFFFFFFF), 9969314564Sdim &, __M, i, i, q); 9970314564Sdim} 9971314564Sdim 9972314564Sdimstatic __inline__ long long __DEFAULT_FN_ATTRS 9973314564Sdim_mm512_mask_reduce_or_epi64(__mmask8 __M, __m512i __W) { 9974314564Sdim _mm512_mask_reduce_operator_64bit(__W, _mm512_set1_epi64(0), |, __M, 9975314564Sdim i, i, q); 9976314564Sdim} 9977314564Sdim 9978314564Sdimstatic __inline__ double __DEFAULT_FN_ATTRS 9979314564Sdim_mm512_mask_reduce_add_pd(__mmask8 __M, __m512d __W) { 9980314564Sdim _mm512_mask_reduce_operator_64bit(__W, _mm512_set1_pd(0), +, __M, 9981314564Sdim f, d, pd); 9982314564Sdim} 9983314564Sdim 9984314564Sdimstatic __inline__ double __DEFAULT_FN_ATTRS 9985314564Sdim_mm512_mask_reduce_mul_pd(__mmask8 __M, __m512d __W) { 9986314564Sdim _mm512_mask_reduce_operator_64bit(__W, _mm512_set1_pd(1), *, __M, 9987314564Sdim f, d, pd); 9988314564Sdim} 9989314564Sdim 9990314564Sdim// Vec512 - Vector with size 512. 9991314564Sdim// Operator - Can be one of following: +,*,&,| 9992314564Sdim// T2 - Can get 'i' for int and ' ' for packed single. 9993314564Sdim// T1 - Can get 'i' for int and 'f' for float. 9994314564Sdim 9995314564Sdim#define _mm512_reduce_operator_32bit(Vec512, Operator, T2, T1) __extension__({ \ 9996314564Sdim __m256##T1 Vec256 = \ 9997314564Sdim (__m256##T1)(__builtin_shufflevector( \ 9998314564Sdim (__v16s##T2)Vec512, \ 9999314564Sdim (__v16s##T2)Vec512, \ 10000314564Sdim 0, 1, 2, 3, 4, 5, 6, 7) \ 10001314564Sdim Operator \ 10002314564Sdim __builtin_shufflevector( \ 10003314564Sdim (__v16s##T2)Vec512, \ 10004314564Sdim (__v16s##T2)Vec512, \ 10005314564Sdim 8, 9, 10, 11, 12, 13, 14, 15)); \ 10006314564Sdim __m128##T1 Vec128 = \ 10007314564Sdim (__m128##T1)(__builtin_shufflevector( \ 10008314564Sdim (__v8s##T2)Vec256, \ 10009314564Sdim (__v8s##T2)Vec256, \ 10010314564Sdim 0, 1, 2, 3) \ 10011314564Sdim Operator \ 10012314564Sdim __builtin_shufflevector( \ 10013314564Sdim (__v8s##T2)Vec256, \ 10014314564Sdim (__v8s##T2)Vec256, \ 10015314564Sdim 4, 5, 6, 7)); \ 10016314564Sdim Vec128 = (__m128##T1)(__builtin_shufflevector( \ 10017314564Sdim (__v4s##T2)Vec128, \ 10018314564Sdim (__v4s##T2)Vec128, \ 10019314564Sdim 0, 1, -1, -1) \ 10020314564Sdim Operator \ 10021314564Sdim __builtin_shufflevector( \ 10022314564Sdim (__v4s##T2)Vec128, \ 10023314564Sdim (__v4s##T2)Vec128, \ 10024314564Sdim 2, 3, -1, -1)); \ 10025314564Sdim Vec128 = (__m128##T1)(__builtin_shufflevector( \ 10026314564Sdim (__v4s##T2)Vec128, \ 10027314564Sdim (__v4s##T2)Vec128, \ 10028314564Sdim 0, -1, -1, -1) \ 10029314564Sdim Operator \ 10030314564Sdim __builtin_shufflevector( \ 10031314564Sdim (__v4s##T2)Vec128, \ 10032314564Sdim (__v4s##T2)Vec128, \ 10033314564Sdim 1, -1, -1, -1)); \ 10034314564Sdim return Vec128[0]; \ 10035314564Sdim }) 10036314564Sdim 10037314564Sdimstatic __inline__ int __DEFAULT_FN_ATTRS 10038314564Sdim_mm512_reduce_add_epi32(__m512i __W) { 10039314564Sdim _mm512_reduce_operator_32bit(__W, +, i, i); 10040314564Sdim} 10041314564Sdim 10042314564Sdimstatic __inline__ int __DEFAULT_FN_ATTRS 10043314564Sdim_mm512_reduce_mul_epi32(__m512i __W) { 10044314564Sdim _mm512_reduce_operator_32bit(__W, *, i, i); 10045314564Sdim} 10046314564Sdim 10047314564Sdimstatic __inline__ int __DEFAULT_FN_ATTRS 10048314564Sdim_mm512_reduce_and_epi32(__m512i __W) { 10049314564Sdim _mm512_reduce_operator_32bit(__W, &, i, i); 10050314564Sdim} 10051314564Sdim 10052314564Sdimstatic __inline__ int __DEFAULT_FN_ATTRS 10053314564Sdim_mm512_reduce_or_epi32(__m512i __W) { 10054314564Sdim _mm512_reduce_operator_32bit(__W, |, i, i); 10055314564Sdim} 10056314564Sdim 10057314564Sdimstatic __inline__ float __DEFAULT_FN_ATTRS 10058314564Sdim_mm512_reduce_add_ps(__m512 __W) { 10059314564Sdim _mm512_reduce_operator_32bit(__W, +, f, ); 10060314564Sdim} 10061314564Sdim 10062314564Sdimstatic __inline__ float __DEFAULT_FN_ATTRS 10063314564Sdim_mm512_reduce_mul_ps(__m512 __W) { 10064314564Sdim _mm512_reduce_operator_32bit(__W, *, f, ); 10065314564Sdim} 10066314564Sdim 10067314564Sdim// Vec512 - Vector with size 512. 10068314564Sdim// Vec512Neutral - All vector elements set to the identity element. 10069314564Sdim// Identity element: {+,0},{*,1},{&,0xFFFFFFFF},{|,0} 10070314564Sdim// Operator - Can be one of following: +,*,&,| 10071314564Sdim// Mask - Intrinsic Mask 10072314564Sdim// T2 - Can get 'i' for int and 'f' for float. 10073314564Sdim// T1 - Can get 'i' for int and 'd' for double. 10074314564Sdim// T3 - Can be Ps for packed single or d for d-word. 10075314564Sdim 10076314564Sdim#define _mm512_mask_reduce_operator_32bit(Vec512, Vec512Neutral, Operator, \ 10077314564Sdim Mask, T2, T1, T3) \ 10078314564Sdim __extension__({ \ 10079314564Sdim Vec512 = (__m512##T1)__builtin_ia32_select##T3##_512( \ 10080314564Sdim (__mmask16)Mask, \ 10081314564Sdim (__v16s##T2)Vec512, \ 10082314564Sdim (__v16s##T2)Vec512Neutral); \ 10083314564Sdim _mm512_reduce_operator_32bit(Vec512, Operator, T2, T1); \ 10084314564Sdim }) 10085314564Sdim 10086314564Sdimstatic __inline__ int __DEFAULT_FN_ATTRS 10087314564Sdim_mm512_mask_reduce_add_epi32( __mmask16 __M, __m512i __W) { 10088314564Sdim _mm512_mask_reduce_operator_32bit(__W, _mm512_set1_epi32(0), +, __M, i, i, d); 10089314564Sdim} 10090314564Sdim 10091314564Sdimstatic __inline__ int __DEFAULT_FN_ATTRS 10092314564Sdim_mm512_mask_reduce_mul_epi32( __mmask16 __M, __m512i __W) { 10093314564Sdim _mm512_mask_reduce_operator_32bit(__W, _mm512_set1_epi32(1), *, __M, i, i, d); 10094314564Sdim} 10095314564Sdim 10096314564Sdimstatic __inline__ int __DEFAULT_FN_ATTRS 10097314564Sdim_mm512_mask_reduce_and_epi32( __mmask16 __M, __m512i __W) { 10098314564Sdim _mm512_mask_reduce_operator_32bit(__W, _mm512_set1_epi32(0xFFFFFFFF), &, __M, 10099314564Sdim i, i, d); 10100314564Sdim} 10101314564Sdim 10102314564Sdimstatic __inline__ int __DEFAULT_FN_ATTRS 10103314564Sdim_mm512_mask_reduce_or_epi32(__mmask16 __M, __m512i __W) { 10104314564Sdim _mm512_mask_reduce_operator_32bit(__W, _mm512_set1_epi32(0), |, __M, i, i, d); 10105314564Sdim} 10106314564Sdim 10107314564Sdimstatic __inline__ float __DEFAULT_FN_ATTRS 10108314564Sdim_mm512_mask_reduce_add_ps(__mmask16 __M, __m512 __W) { 10109314564Sdim _mm512_mask_reduce_operator_32bit(__W, _mm512_set1_ps(0), +, __M, f, , ps); 10110314564Sdim} 10111314564Sdim 10112314564Sdimstatic __inline__ float __DEFAULT_FN_ATTRS 10113314564Sdim_mm512_mask_reduce_mul_ps(__mmask16 __M, __m512 __W) { 10114314564Sdim _mm512_mask_reduce_operator_32bit(__W, _mm512_set1_ps(1), *, __M, f, , ps); 10115314564Sdim} 10116314564Sdim 10117314564Sdim// Used bisection method. At each step, we partition the vector with previous 10118314564Sdim// step in half, and the operation is performed on its two halves. 10119314564Sdim// This takes log2(n) steps where n is the number of elements in the vector. 10120314564Sdim// This macro uses only intrinsics from the AVX512F feature. 10121314564Sdim 10122314564Sdim// Vec512 - Vector with size of 512. 10123314564Sdim// IntrinName - Can be one of following: {max|min}_{epi64|epu64|pd} for example: 10124314564Sdim// __mm512_max_epi64 10125314564Sdim// T1 - Can get 'i' for int and 'd' for double.[__m512{i|d}] 10126314564Sdim// T2 - Can get 'i' for int and 'f' for float. [__v8d{i|f}] 10127314564Sdim 10128314564Sdim#define _mm512_reduce_maxMin_64bit(Vec512, IntrinName, T1, T2) __extension__({ \ 10129314564Sdim Vec512 = _mm512_##IntrinName( \ 10130314564Sdim (__m512##T1)__builtin_shufflevector( \ 10131314564Sdim (__v8d##T2)Vec512, \ 10132314564Sdim (__v8d##T2)Vec512, \ 10133314564Sdim 0, 1, 2, 3, -1, -1, -1, -1), \ 10134314564Sdim (__m512##T1)__builtin_shufflevector( \ 10135314564Sdim (__v8d##T2)Vec512, \ 10136314564Sdim (__v8d##T2)Vec512, \ 10137314564Sdim 4, 5, 6, 7, -1, -1, -1, -1)); \ 10138314564Sdim Vec512 = _mm512_##IntrinName( \ 10139314564Sdim (__m512##T1)__builtin_shufflevector( \ 10140314564Sdim (__v8d##T2)Vec512, \ 10141314564Sdim (__v8d##T2)Vec512, \ 10142314564Sdim 0, 1, -1, -1, -1, -1, -1, -1),\ 10143314564Sdim (__m512##T1)__builtin_shufflevector( \ 10144314564Sdim (__v8d##T2)Vec512, \ 10145314564Sdim (__v8d##T2)Vec512, \ 10146314564Sdim 2, 3, -1, -1, -1, -1, -1, \ 10147314564Sdim -1)); \ 10148314564Sdim Vec512 = _mm512_##IntrinName( \ 10149314564Sdim (__m512##T1)__builtin_shufflevector( \ 10150314564Sdim (__v8d##T2)Vec512, \ 10151314564Sdim (__v8d##T2)Vec512, \ 10152314564Sdim 0, -1, -1, -1, -1, -1, -1, -1),\ 10153314564Sdim (__m512##T1)__builtin_shufflevector( \ 10154314564Sdim (__v8d##T2)Vec512, \ 10155314564Sdim (__v8d##T2)Vec512, \ 10156314564Sdim 1, -1, -1, -1, -1, -1, -1, -1))\ 10157314564Sdim ; \ 10158314564Sdim return Vec512[0]; \ 10159314564Sdim }) 10160314564Sdim 10161314564Sdimstatic __inline__ long long __DEFAULT_FN_ATTRS 10162314564Sdim_mm512_reduce_max_epi64(__m512i __V) { 10163314564Sdim _mm512_reduce_maxMin_64bit(__V, max_epi64, i, i); 10164314564Sdim} 10165314564Sdim 10166314564Sdimstatic __inline__ unsigned long long __DEFAULT_FN_ATTRS 10167314564Sdim_mm512_reduce_max_epu64(__m512i __V) { 10168314564Sdim _mm512_reduce_maxMin_64bit(__V, max_epu64, i, i); 10169314564Sdim} 10170314564Sdim 10171314564Sdimstatic __inline__ double __DEFAULT_FN_ATTRS 10172314564Sdim_mm512_reduce_max_pd(__m512d __V) { 10173314564Sdim _mm512_reduce_maxMin_64bit(__V, max_pd, d, f); 10174314564Sdim} 10175314564Sdim 10176314564Sdimstatic __inline__ long long __DEFAULT_FN_ATTRS _mm512_reduce_min_epi64 10177314564Sdim(__m512i __V) { 10178314564Sdim _mm512_reduce_maxMin_64bit(__V, min_epi64, i, i); 10179314564Sdim} 10180314564Sdim 10181314564Sdimstatic __inline__ unsigned long long __DEFAULT_FN_ATTRS 10182314564Sdim_mm512_reduce_min_epu64(__m512i __V) { 10183314564Sdim _mm512_reduce_maxMin_64bit(__V, min_epu64, i, i); 10184314564Sdim} 10185314564Sdim 10186314564Sdimstatic __inline__ double __DEFAULT_FN_ATTRS 10187314564Sdim_mm512_reduce_min_pd(__m512d __V) { 10188314564Sdim _mm512_reduce_maxMin_64bit(__V, min_pd, d, f); 10189314564Sdim} 10190314564Sdim 10191314564Sdim// Vec512 - Vector with size 512. 10192314564Sdim// Vec512Neutral - A 512 length vector with elements set to the identity element 10193314564Sdim// Identity element: {max_epi,0x8000000000000000} 10194314564Sdim// {max_epu,0x0000000000000000} 10195314564Sdim// {max_pd, 0xFFF0000000000000} 10196314564Sdim// {min_epi,0x7FFFFFFFFFFFFFFF} 10197314564Sdim// {min_epu,0xFFFFFFFFFFFFFFFF} 10198314564Sdim// {min_pd, 0x7FF0000000000000} 10199314564Sdim// 10200314564Sdim// IntrinName - Can be one of following: {max|min}_{epi64|epu64|pd} for example: 10201314564Sdim// __mm512_max_epi64 10202314564Sdim// T1 - Can get 'i' for int and 'd' for double.[__m512{i|d}] 10203314564Sdim// T2 - Can get 'i' for int and 'f' for float. [__v8d{i|f}] 10204314564Sdim// T3 - Can get 'q' q word and 'pd' for packed double. 10205314564Sdim// [__builtin_ia32_select{q|pd}_512] 10206314564Sdim// Mask - Intrinsic Mask 10207314564Sdim 10208314564Sdim#define _mm512_mask_reduce_maxMin_64bit(Vec512, Vec512Neutral, IntrinName, T1, \ 10209314564Sdim T2, T3, Mask) \ 10210314564Sdim __extension__({ \ 10211314564Sdim Vec512 = (__m512##T1)__builtin_ia32_select##T3##_512( \ 10212314564Sdim (__mmask8)Mask, \ 10213314564Sdim (__v8d##T2)Vec512, \ 10214314564Sdim (__v8d##T2)Vec512Neutral); \ 10215314564Sdim _mm512_reduce_maxMin_64bit(Vec512, IntrinName, T1, T2); \ 10216314564Sdim }) 10217314564Sdim 10218314564Sdimstatic __inline__ long long __DEFAULT_FN_ATTRS 10219314564Sdim_mm512_mask_reduce_max_epi64(__mmask8 __M, __m512i __V) { 10220314564Sdim _mm512_mask_reduce_maxMin_64bit(__V, _mm512_set1_epi64(0x8000000000000000), 10221314564Sdim max_epi64, i, i, q, __M); 10222314564Sdim} 10223314564Sdim 10224314564Sdimstatic __inline__ unsigned long long __DEFAULT_FN_ATTRS 10225314564Sdim_mm512_mask_reduce_max_epu64(__mmask8 __M, __m512i __V) { 10226314564Sdim _mm512_mask_reduce_maxMin_64bit(__V, _mm512_set1_epi64(0x0000000000000000), 10227314564Sdim max_epu64, i, i, q, __M); 10228314564Sdim} 10229314564Sdim 10230314564Sdimstatic __inline__ double __DEFAULT_FN_ATTRS 10231314564Sdim_mm512_mask_reduce_max_pd(__mmask8 __M, __m512d __V) { 10232314564Sdim _mm512_mask_reduce_maxMin_64bit(__V, -_mm512_set1_pd(__builtin_inf()), 10233314564Sdim max_pd, d, f, pd, __M); 10234314564Sdim} 10235314564Sdim 10236314564Sdimstatic __inline__ long long __DEFAULT_FN_ATTRS 10237314564Sdim_mm512_mask_reduce_min_epi64(__mmask8 __M, __m512i __V) { 10238314564Sdim _mm512_mask_reduce_maxMin_64bit(__V, _mm512_set1_epi64(0x7FFFFFFFFFFFFFFF), 10239314564Sdim min_epi64, i, i, q, __M); 10240314564Sdim} 10241314564Sdim 10242314564Sdimstatic __inline__ unsigned long long __DEFAULT_FN_ATTRS 10243314564Sdim_mm512_mask_reduce_min_epu64(__mmask8 __M, __m512i __V) { 10244314564Sdim _mm512_mask_reduce_maxMin_64bit(__V, _mm512_set1_epi64(0xFFFFFFFFFFFFFFFF), 10245314564Sdim min_epu64, i, i, q, __M); 10246314564Sdim} 10247314564Sdim 10248314564Sdimstatic __inline__ double __DEFAULT_FN_ATTRS 10249314564Sdim_mm512_mask_reduce_min_pd(__mmask8 __M, __m512d __V) { 10250314564Sdim _mm512_mask_reduce_maxMin_64bit(__V, _mm512_set1_pd(__builtin_inf()), 10251314564Sdim min_pd, d, f, pd, __M); 10252314564Sdim} 10253314564Sdim 10254314564Sdim// Vec512 - Vector with size 512. 10255314564Sdim// IntrinName - Can be one of following: {max|min}_{epi32|epu32|ps} for example: 10256314564Sdim// __mm512_max_epi32 10257314564Sdim// T1 - Can get 'i' for int and ' ' .[__m512{i|}] 10258314564Sdim// T2 - Can get 'i' for int and 'f' for float.[__v16s{i|f}] 10259314564Sdim 10260314564Sdim#define _mm512_reduce_maxMin_32bit(Vec512, IntrinName, T1, T2) __extension__({ \ 10261314564Sdim Vec512 = _mm512_##IntrinName( \ 10262314564Sdim (__m512##T1)__builtin_shufflevector( \ 10263314564Sdim (__v16s##T2)Vec512, \ 10264314564Sdim (__v16s##T2)Vec512, \ 10265314564Sdim 0, 1, 2, 3, 4, 5, 6, 7, \ 10266314564Sdim -1, -1, -1, -1, -1, -1, -1, -1), \ 10267314564Sdim (__m512##T1)__builtin_shufflevector( \ 10268314564Sdim (__v16s##T2)Vec512, \ 10269314564Sdim (__v16s##T2)Vec512, \ 10270314564Sdim 8, 9, 10, 11, 12, 13, 14, 15, \ 10271314564Sdim -1, -1, -1, -1, -1, -1, -1, -1)); \ 10272314564Sdim Vec512 = _mm512_##IntrinName( \ 10273314564Sdim (__m512##T1)__builtin_shufflevector( \ 10274314564Sdim (__v16s##T2)Vec512, \ 10275314564Sdim (__v16s##T2)Vec512, \ 10276314564Sdim 0, 1, 2, 3, -1, -1, -1, -1, \ 10277314564Sdim -1, -1, -1, -1, -1, -1, -1, -1), \ 10278314564Sdim (__m512##T1)__builtin_shufflevector( \ 10279314564Sdim (__v16s##T2)Vec512, \ 10280314564Sdim (__v16s##T2)Vec512, \ 10281314564Sdim 4, 5, 6, 7, -1, -1, -1, -1, \ 10282314564Sdim -1, -1, -1, -1, -1, -1, -1, -1)); \ 10283314564Sdim Vec512 = _mm512_##IntrinName( \ 10284314564Sdim (__m512##T1)__builtin_shufflevector( \ 10285314564Sdim (__v16s##T2)Vec512, \ 10286314564Sdim (__v16s##T2)Vec512, \ 10287314564Sdim 0, 1, -1, -1, -1, -1, -1, -1, \ 10288314564Sdim -1, -1, -1, -1, -1, -1, -1, -1), \ 10289314564Sdim (__m512##T1)__builtin_shufflevector( \ 10290314564Sdim (__v16s##T2)Vec512, \ 10291314564Sdim (__v16s##T2)Vec512, \ 10292314564Sdim 2, 3, -1, -1, -1, -1, -1, -1, \ 10293314564Sdim -1, -1, -1, -1, -1, -1, -1, -1)); \ 10294314564Sdim Vec512 = _mm512_##IntrinName( \ 10295314564Sdim (__m512##T1)__builtin_shufflevector( \ 10296314564Sdim (__v16s##T2)Vec512, \ 10297314564Sdim (__v16s##T2)Vec512, \ 10298314564Sdim 0, -1, -1, -1, -1, -1, -1, -1, \ 10299314564Sdim -1, -1, -1, -1, -1, -1, -1, -1), \ 10300314564Sdim (__m512##T1)__builtin_shufflevector( \ 10301314564Sdim (__v16s##T2)Vec512, \ 10302314564Sdim (__v16s##T2)Vec512, \ 10303314564Sdim 1, -1, -1, -1, -1, -1, -1, -1, \ 10304314564Sdim -1, -1, -1, -1, -1, -1, -1, -1)); \ 10305314564Sdim return Vec512[0]; \ 10306314564Sdim }) 10307314564Sdim 10308314564Sdimstatic __inline__ int __DEFAULT_FN_ATTRS _mm512_reduce_max_epi32(__m512i a) { 10309314564Sdim _mm512_reduce_maxMin_32bit(a, max_epi32, i, i); 10310314564Sdim} 10311314564Sdim 10312314564Sdimstatic __inline__ unsigned int __DEFAULT_FN_ATTRS 10313314564Sdim_mm512_reduce_max_epu32(__m512i a) { 10314314564Sdim _mm512_reduce_maxMin_32bit(a, max_epu32, i, i); 10315314564Sdim} 10316314564Sdim 10317314564Sdimstatic __inline__ float __DEFAULT_FN_ATTRS _mm512_reduce_max_ps(__m512 a) { 10318314564Sdim _mm512_reduce_maxMin_32bit(a, max_ps, , f); 10319314564Sdim} 10320314564Sdim 10321314564Sdimstatic __inline__ int __DEFAULT_FN_ATTRS _mm512_reduce_min_epi32(__m512i a) { 10322314564Sdim _mm512_reduce_maxMin_32bit(a, min_epi32, i, i); 10323314564Sdim} 10324314564Sdim 10325314564Sdimstatic __inline__ unsigned int __DEFAULT_FN_ATTRS 10326314564Sdim_mm512_reduce_min_epu32(__m512i a) { 10327314564Sdim _mm512_reduce_maxMin_32bit(a, min_epu32, i, i); 10328314564Sdim} 10329314564Sdim 10330314564Sdimstatic __inline__ float __DEFAULT_FN_ATTRS _mm512_reduce_min_ps(__m512 a) { 10331314564Sdim _mm512_reduce_maxMin_32bit(a, min_ps, , f); 10332314564Sdim} 10333314564Sdim 10334314564Sdim// Vec512 - Vector with size 512. 10335314564Sdim// Vec512Neutral - A 512 length vector with elements set to the identity element 10336314564Sdim// Identity element: {max_epi,0x80000000} 10337314564Sdim// {max_epu,0x00000000} 10338314564Sdim// {max_ps, 0xFF800000} 10339314564Sdim// {min_epi,0x7FFFFFFF} 10340314564Sdim// {min_epu,0xFFFFFFFF} 10341314564Sdim// {min_ps, 0x7F800000} 10342314564Sdim// 10343314564Sdim// IntrinName - Can be one of following: {max|min}_{epi32|epu32|ps} for example: 10344314564Sdim// __mm512_max_epi32 10345314564Sdim// T1 - Can get 'i' for int and ' ' .[__m512{i|}] 10346314564Sdim// T2 - Can get 'i' for int and 'f' for float.[__v16s{i|f}] 10347314564Sdim// T3 - Can get 'q' q word and 'pd' for packed double. 10348314564Sdim// [__builtin_ia32_select{q|pd}_512] 10349314564Sdim// Mask - Intrinsic Mask 10350314564Sdim 10351314564Sdim#define _mm512_mask_reduce_maxMin_32bit(Vec512, Vec512Neutral, IntrinName, T1, \ 10352314564Sdim T2, T3, Mask) \ 10353314564Sdim __extension__({ \ 10354314564Sdim Vec512 = (__m512##T1)__builtin_ia32_select##T3##_512( \ 10355314564Sdim (__mmask16)Mask, \ 10356314564Sdim (__v16s##T2)Vec512, \ 10357314564Sdim (__v16s##T2)Vec512Neutral); \ 10358314564Sdim _mm512_reduce_maxMin_32bit(Vec512, IntrinName, T1, T2); \ 10359314564Sdim }) 10360314564Sdim 10361314564Sdimstatic __inline__ int __DEFAULT_FN_ATTRS 10362314564Sdim_mm512_mask_reduce_max_epi32(__mmask16 __M, __m512i __V) { 10363314564Sdim _mm512_mask_reduce_maxMin_32bit(__V, _mm512_set1_epi32(0x80000000), max_epi32, 10364314564Sdim i, i, d, __M); 10365314564Sdim} 10366314564Sdim 10367314564Sdimstatic __inline__ unsigned int __DEFAULT_FN_ATTRS 10368314564Sdim_mm512_mask_reduce_max_epu32(__mmask16 __M, __m512i __V) { 10369314564Sdim _mm512_mask_reduce_maxMin_32bit(__V, _mm512_set1_epi32(0x00000000), max_epu32, 10370314564Sdim i, i, d, __M); 10371314564Sdim} 10372314564Sdim 10373314564Sdimstatic __inline__ float __DEFAULT_FN_ATTRS 10374314564Sdim_mm512_mask_reduce_max_ps(__mmask16 __M, __m512 __V) { 10375314564Sdim _mm512_mask_reduce_maxMin_32bit(__V,-_mm512_set1_ps(__builtin_inff()), max_ps, , f, 10376314564Sdim ps, __M); 10377314564Sdim} 10378314564Sdim 10379314564Sdimstatic __inline__ int __DEFAULT_FN_ATTRS 10380314564Sdim_mm512_mask_reduce_min_epi32(__mmask16 __M, __m512i __V) { 10381314564Sdim _mm512_mask_reduce_maxMin_32bit(__V, _mm512_set1_epi32(0x7FFFFFFF), min_epi32, 10382314564Sdim i, i, d, __M); 10383314564Sdim} 10384314564Sdim 10385314564Sdimstatic __inline__ unsigned int __DEFAULT_FN_ATTRS 10386314564Sdim_mm512_mask_reduce_min_epu32(__mmask16 __M, __m512i __V) { 10387314564Sdim _mm512_mask_reduce_maxMin_32bit(__V, _mm512_set1_epi32(0xFFFFFFFF), min_epu32, 10388314564Sdim i, i, d, __M); 10389314564Sdim} 10390314564Sdim 10391314564Sdimstatic __inline__ float __DEFAULT_FN_ATTRS 10392314564Sdim_mm512_mask_reduce_min_ps(__mmask16 __M, __m512 __V) { 10393314564Sdim _mm512_mask_reduce_maxMin_32bit(__V, _mm512_set1_ps(__builtin_inff()), min_ps, , f, 10394314564Sdim ps, __M); 10395314564Sdim} 10396314564Sdim 10397288943Sdim#undef __DEFAULT_FN_ATTRS 10398288943Sdim 10399277325Sdim#endif // __AVX512FINTRIN_H 10400