avx512fintrin.h revision 314564
1296417Sdim/*===---- avx512fintrin.h - AVX512F intrinsics -----------------------------=== 2277325Sdim * 3277325Sdim * Permission is hereby granted, free of charge, to any person obtaining a copy 4277325Sdim * of this software and associated documentation files (the "Software"), to deal 5277325Sdim * in the Software without restriction, including without limitation the rights 6277325Sdim * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7277325Sdim * copies of the Software, and to permit persons to whom the Software is 8277325Sdim * furnished to do so, subject to the following conditions: 9277325Sdim * 10277325Sdim * The above copyright notice and this permission notice shall be included in 11277325Sdim * all copies or substantial portions of the Software. 12277325Sdim * 13277325Sdim * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14277325Sdim * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15277325Sdim * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16277325Sdim * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17277325Sdim * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18277325Sdim * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19277325Sdim * THE SOFTWARE. 20277325Sdim * 21277325Sdim *===-----------------------------------------------------------------------=== 22277325Sdim */ 23277325Sdim#ifndef __IMMINTRIN_H 24277325Sdim#error "Never use <avx512fintrin.h> directly; include <immintrin.h> instead." 25277325Sdim#endif 26277325Sdim 27277325Sdim#ifndef __AVX512FINTRIN_H 28277325Sdim#define __AVX512FINTRIN_H 29277325Sdim 30309124Sdimtypedef char __v64qi __attribute__((__vector_size__(64))); 31309124Sdimtypedef short __v32hi __attribute__((__vector_size__(64))); 32277325Sdimtypedef double __v8df __attribute__((__vector_size__(64))); 33277325Sdimtypedef float __v16sf __attribute__((__vector_size__(64))); 34277325Sdimtypedef long long __v8di __attribute__((__vector_size__(64))); 35277325Sdimtypedef int __v16si __attribute__((__vector_size__(64))); 36277325Sdim 37309124Sdim/* Unsigned types */ 38309124Sdimtypedef unsigned char __v64qu __attribute__((__vector_size__(64))); 39309124Sdimtypedef unsigned short __v32hu __attribute__((__vector_size__(64))); 40309124Sdimtypedef unsigned long long __v8du __attribute__((__vector_size__(64))); 41309124Sdimtypedef unsigned int __v16su __attribute__((__vector_size__(64))); 42309124Sdim 43277325Sdimtypedef float __m512 __attribute__((__vector_size__(64))); 44277325Sdimtypedef double __m512d __attribute__((__vector_size__(64))); 45277325Sdimtypedef long long __m512i __attribute__((__vector_size__(64))); 46277325Sdim 47277325Sdimtypedef unsigned char __mmask8; 48277325Sdimtypedef unsigned short __mmask16; 49277325Sdim 50277325Sdim/* Rounding mode macros. */ 51277325Sdim#define _MM_FROUND_TO_NEAREST_INT 0x00 52277325Sdim#define _MM_FROUND_TO_NEG_INF 0x01 53277325Sdim#define _MM_FROUND_TO_POS_INF 0x02 54277325Sdim#define _MM_FROUND_TO_ZERO 0x03 55277325Sdim#define _MM_FROUND_CUR_DIRECTION 0x04 56277325Sdim 57314564Sdim/* Constants for integer comparison predicates */ 58314564Sdimtypedef enum { 59314564Sdim _MM_CMPINT_EQ, /* Equal */ 60314564Sdim _MM_CMPINT_LT, /* Less than */ 61314564Sdim _MM_CMPINT_LE, /* Less than or Equal */ 62314564Sdim _MM_CMPINT_UNUSED, 63314564Sdim _MM_CMPINT_NE, /* Not Equal */ 64314564Sdim _MM_CMPINT_NLT, /* Not Less than */ 65314564Sdim#define _MM_CMPINT_GE _MM_CMPINT_NLT /* Greater than or Equal */ 66314564Sdim _MM_CMPINT_NLE /* Not Less than or Equal */ 67314564Sdim#define _MM_CMPINT_GT _MM_CMPINT_NLE /* Greater than */ 68314564Sdim} _MM_CMPINT_ENUM; 69314564Sdim 70309124Sdimtypedef enum 71309124Sdim{ 72309124Sdim _MM_PERM_AAAA = 0x00, _MM_PERM_AAAB = 0x01, _MM_PERM_AAAC = 0x02, 73309124Sdim _MM_PERM_AAAD = 0x03, _MM_PERM_AABA = 0x04, _MM_PERM_AABB = 0x05, 74309124Sdim _MM_PERM_AABC = 0x06, _MM_PERM_AABD = 0x07, _MM_PERM_AACA = 0x08, 75309124Sdim _MM_PERM_AACB = 0x09, _MM_PERM_AACC = 0x0A, _MM_PERM_AACD = 0x0B, 76309124Sdim _MM_PERM_AADA = 0x0C, _MM_PERM_AADB = 0x0D, _MM_PERM_AADC = 0x0E, 77309124Sdim _MM_PERM_AADD = 0x0F, _MM_PERM_ABAA = 0x10, _MM_PERM_ABAB = 0x11, 78309124Sdim _MM_PERM_ABAC = 0x12, _MM_PERM_ABAD = 0x13, _MM_PERM_ABBA = 0x14, 79309124Sdim _MM_PERM_ABBB = 0x15, _MM_PERM_ABBC = 0x16, _MM_PERM_ABBD = 0x17, 80309124Sdim _MM_PERM_ABCA = 0x18, _MM_PERM_ABCB = 0x19, _MM_PERM_ABCC = 0x1A, 81309124Sdim _MM_PERM_ABCD = 0x1B, _MM_PERM_ABDA = 0x1C, _MM_PERM_ABDB = 0x1D, 82309124Sdim _MM_PERM_ABDC = 0x1E, _MM_PERM_ABDD = 0x1F, _MM_PERM_ACAA = 0x20, 83309124Sdim _MM_PERM_ACAB = 0x21, _MM_PERM_ACAC = 0x22, _MM_PERM_ACAD = 0x23, 84309124Sdim _MM_PERM_ACBA = 0x24, _MM_PERM_ACBB = 0x25, _MM_PERM_ACBC = 0x26, 85309124Sdim _MM_PERM_ACBD = 0x27, _MM_PERM_ACCA = 0x28, _MM_PERM_ACCB = 0x29, 86309124Sdim _MM_PERM_ACCC = 0x2A, _MM_PERM_ACCD = 0x2B, _MM_PERM_ACDA = 0x2C, 87309124Sdim _MM_PERM_ACDB = 0x2D, _MM_PERM_ACDC = 0x2E, _MM_PERM_ACDD = 0x2F, 88309124Sdim _MM_PERM_ADAA = 0x30, _MM_PERM_ADAB = 0x31, _MM_PERM_ADAC = 0x32, 89309124Sdim _MM_PERM_ADAD = 0x33, _MM_PERM_ADBA = 0x34, _MM_PERM_ADBB = 0x35, 90309124Sdim _MM_PERM_ADBC = 0x36, _MM_PERM_ADBD = 0x37, _MM_PERM_ADCA = 0x38, 91309124Sdim _MM_PERM_ADCB = 0x39, _MM_PERM_ADCC = 0x3A, _MM_PERM_ADCD = 0x3B, 92309124Sdim _MM_PERM_ADDA = 0x3C, _MM_PERM_ADDB = 0x3D, _MM_PERM_ADDC = 0x3E, 93309124Sdim _MM_PERM_ADDD = 0x3F, _MM_PERM_BAAA = 0x40, _MM_PERM_BAAB = 0x41, 94309124Sdim _MM_PERM_BAAC = 0x42, _MM_PERM_BAAD = 0x43, _MM_PERM_BABA = 0x44, 95309124Sdim _MM_PERM_BABB = 0x45, _MM_PERM_BABC = 0x46, _MM_PERM_BABD = 0x47, 96309124Sdim _MM_PERM_BACA = 0x48, _MM_PERM_BACB = 0x49, _MM_PERM_BACC = 0x4A, 97309124Sdim _MM_PERM_BACD = 0x4B, _MM_PERM_BADA = 0x4C, _MM_PERM_BADB = 0x4D, 98309124Sdim _MM_PERM_BADC = 0x4E, _MM_PERM_BADD = 0x4F, _MM_PERM_BBAA = 0x50, 99309124Sdim _MM_PERM_BBAB = 0x51, _MM_PERM_BBAC = 0x52, _MM_PERM_BBAD = 0x53, 100309124Sdim _MM_PERM_BBBA = 0x54, _MM_PERM_BBBB = 0x55, _MM_PERM_BBBC = 0x56, 101309124Sdim _MM_PERM_BBBD = 0x57, _MM_PERM_BBCA = 0x58, _MM_PERM_BBCB = 0x59, 102309124Sdim _MM_PERM_BBCC = 0x5A, _MM_PERM_BBCD = 0x5B, _MM_PERM_BBDA = 0x5C, 103309124Sdim _MM_PERM_BBDB = 0x5D, _MM_PERM_BBDC = 0x5E, _MM_PERM_BBDD = 0x5F, 104309124Sdim _MM_PERM_BCAA = 0x60, _MM_PERM_BCAB = 0x61, _MM_PERM_BCAC = 0x62, 105309124Sdim _MM_PERM_BCAD = 0x63, _MM_PERM_BCBA = 0x64, _MM_PERM_BCBB = 0x65, 106309124Sdim _MM_PERM_BCBC = 0x66, _MM_PERM_BCBD = 0x67, _MM_PERM_BCCA = 0x68, 107309124Sdim _MM_PERM_BCCB = 0x69, _MM_PERM_BCCC = 0x6A, _MM_PERM_BCCD = 0x6B, 108309124Sdim _MM_PERM_BCDA = 0x6C, _MM_PERM_BCDB = 0x6D, _MM_PERM_BCDC = 0x6E, 109309124Sdim _MM_PERM_BCDD = 0x6F, _MM_PERM_BDAA = 0x70, _MM_PERM_BDAB = 0x71, 110309124Sdim _MM_PERM_BDAC = 0x72, _MM_PERM_BDAD = 0x73, _MM_PERM_BDBA = 0x74, 111309124Sdim _MM_PERM_BDBB = 0x75, _MM_PERM_BDBC = 0x76, _MM_PERM_BDBD = 0x77, 112309124Sdim _MM_PERM_BDCA = 0x78, _MM_PERM_BDCB = 0x79, _MM_PERM_BDCC = 0x7A, 113309124Sdim _MM_PERM_BDCD = 0x7B, _MM_PERM_BDDA = 0x7C, _MM_PERM_BDDB = 0x7D, 114309124Sdim _MM_PERM_BDDC = 0x7E, _MM_PERM_BDDD = 0x7F, _MM_PERM_CAAA = 0x80, 115309124Sdim _MM_PERM_CAAB = 0x81, _MM_PERM_CAAC = 0x82, _MM_PERM_CAAD = 0x83, 116309124Sdim _MM_PERM_CABA = 0x84, _MM_PERM_CABB = 0x85, _MM_PERM_CABC = 0x86, 117309124Sdim _MM_PERM_CABD = 0x87, _MM_PERM_CACA = 0x88, _MM_PERM_CACB = 0x89, 118309124Sdim _MM_PERM_CACC = 0x8A, _MM_PERM_CACD = 0x8B, _MM_PERM_CADA = 0x8C, 119309124Sdim _MM_PERM_CADB = 0x8D, _MM_PERM_CADC = 0x8E, _MM_PERM_CADD = 0x8F, 120309124Sdim _MM_PERM_CBAA = 0x90, _MM_PERM_CBAB = 0x91, _MM_PERM_CBAC = 0x92, 121309124Sdim _MM_PERM_CBAD = 0x93, _MM_PERM_CBBA = 0x94, _MM_PERM_CBBB = 0x95, 122309124Sdim _MM_PERM_CBBC = 0x96, _MM_PERM_CBBD = 0x97, _MM_PERM_CBCA = 0x98, 123309124Sdim _MM_PERM_CBCB = 0x99, _MM_PERM_CBCC = 0x9A, _MM_PERM_CBCD = 0x9B, 124309124Sdim _MM_PERM_CBDA = 0x9C, _MM_PERM_CBDB = 0x9D, _MM_PERM_CBDC = 0x9E, 125309124Sdim _MM_PERM_CBDD = 0x9F, _MM_PERM_CCAA = 0xA0, _MM_PERM_CCAB = 0xA1, 126309124Sdim _MM_PERM_CCAC = 0xA2, _MM_PERM_CCAD = 0xA3, _MM_PERM_CCBA = 0xA4, 127309124Sdim _MM_PERM_CCBB = 0xA5, _MM_PERM_CCBC = 0xA6, _MM_PERM_CCBD = 0xA7, 128309124Sdim _MM_PERM_CCCA = 0xA8, _MM_PERM_CCCB = 0xA9, _MM_PERM_CCCC = 0xAA, 129309124Sdim _MM_PERM_CCCD = 0xAB, _MM_PERM_CCDA = 0xAC, _MM_PERM_CCDB = 0xAD, 130309124Sdim _MM_PERM_CCDC = 0xAE, _MM_PERM_CCDD = 0xAF, _MM_PERM_CDAA = 0xB0, 131309124Sdim _MM_PERM_CDAB = 0xB1, _MM_PERM_CDAC = 0xB2, _MM_PERM_CDAD = 0xB3, 132309124Sdim _MM_PERM_CDBA = 0xB4, _MM_PERM_CDBB = 0xB5, _MM_PERM_CDBC = 0xB6, 133309124Sdim _MM_PERM_CDBD = 0xB7, _MM_PERM_CDCA = 0xB8, _MM_PERM_CDCB = 0xB9, 134309124Sdim _MM_PERM_CDCC = 0xBA, _MM_PERM_CDCD = 0xBB, _MM_PERM_CDDA = 0xBC, 135309124Sdim _MM_PERM_CDDB = 0xBD, _MM_PERM_CDDC = 0xBE, _MM_PERM_CDDD = 0xBF, 136309124Sdim _MM_PERM_DAAA = 0xC0, _MM_PERM_DAAB = 0xC1, _MM_PERM_DAAC = 0xC2, 137309124Sdim _MM_PERM_DAAD = 0xC3, _MM_PERM_DABA = 0xC4, _MM_PERM_DABB = 0xC5, 138309124Sdim _MM_PERM_DABC = 0xC6, _MM_PERM_DABD = 0xC7, _MM_PERM_DACA = 0xC8, 139309124Sdim _MM_PERM_DACB = 0xC9, _MM_PERM_DACC = 0xCA, _MM_PERM_DACD = 0xCB, 140309124Sdim _MM_PERM_DADA = 0xCC, _MM_PERM_DADB = 0xCD, _MM_PERM_DADC = 0xCE, 141309124Sdim _MM_PERM_DADD = 0xCF, _MM_PERM_DBAA = 0xD0, _MM_PERM_DBAB = 0xD1, 142309124Sdim _MM_PERM_DBAC = 0xD2, _MM_PERM_DBAD = 0xD3, _MM_PERM_DBBA = 0xD4, 143309124Sdim _MM_PERM_DBBB = 0xD5, _MM_PERM_DBBC = 0xD6, _MM_PERM_DBBD = 0xD7, 144309124Sdim _MM_PERM_DBCA = 0xD8, _MM_PERM_DBCB = 0xD9, _MM_PERM_DBCC = 0xDA, 145309124Sdim _MM_PERM_DBCD = 0xDB, _MM_PERM_DBDA = 0xDC, _MM_PERM_DBDB = 0xDD, 146309124Sdim _MM_PERM_DBDC = 0xDE, _MM_PERM_DBDD = 0xDF, _MM_PERM_DCAA = 0xE0, 147309124Sdim _MM_PERM_DCAB = 0xE1, _MM_PERM_DCAC = 0xE2, _MM_PERM_DCAD = 0xE3, 148309124Sdim _MM_PERM_DCBA = 0xE4, _MM_PERM_DCBB = 0xE5, _MM_PERM_DCBC = 0xE6, 149309124Sdim _MM_PERM_DCBD = 0xE7, _MM_PERM_DCCA = 0xE8, _MM_PERM_DCCB = 0xE9, 150309124Sdim _MM_PERM_DCCC = 0xEA, _MM_PERM_DCCD = 0xEB, _MM_PERM_DCDA = 0xEC, 151309124Sdim _MM_PERM_DCDB = 0xED, _MM_PERM_DCDC = 0xEE, _MM_PERM_DCDD = 0xEF, 152309124Sdim _MM_PERM_DDAA = 0xF0, _MM_PERM_DDAB = 0xF1, _MM_PERM_DDAC = 0xF2, 153309124Sdim _MM_PERM_DDAD = 0xF3, _MM_PERM_DDBA = 0xF4, _MM_PERM_DDBB = 0xF5, 154309124Sdim _MM_PERM_DDBC = 0xF6, _MM_PERM_DDBD = 0xF7, _MM_PERM_DDCA = 0xF8, 155309124Sdim _MM_PERM_DDCB = 0xF9, _MM_PERM_DDCC = 0xFA, _MM_PERM_DDCD = 0xFB, 156309124Sdim _MM_PERM_DDDA = 0xFC, _MM_PERM_DDDB = 0xFD, _MM_PERM_DDDC = 0xFE, 157309124Sdim _MM_PERM_DDDD = 0xFF 158309124Sdim} _MM_PERM_ENUM; 159309124Sdim 160309124Sdimtypedef enum 161309124Sdim{ 162309124Sdim _MM_MANT_NORM_1_2, /* interval [1, 2) */ 163309124Sdim _MM_MANT_NORM_p5_2, /* interval [0.5, 2) */ 164309124Sdim _MM_MANT_NORM_p5_1, /* interval [0.5, 1) */ 165309124Sdim _MM_MANT_NORM_p75_1p5 /* interval [0.75, 1.5) */ 166309124Sdim} _MM_MANTISSA_NORM_ENUM; 167309124Sdim 168309124Sdimtypedef enum 169309124Sdim{ 170309124Sdim _MM_MANT_SIGN_src, /* sign = sign(SRC) */ 171309124Sdim _MM_MANT_SIGN_zero, /* sign = 0 */ 172309124Sdim _MM_MANT_SIGN_nan /* DEST = NaN if sign(SRC) = 1 */ 173309124Sdim} _MM_MANTISSA_SIGN_ENUM; 174309124Sdim 175288943Sdim/* Define the default attributes for the functions in this file. */ 176296417Sdim#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512f"))) 177288943Sdim 178277325Sdim/* Create vectors with repeated elements */ 179277325Sdim 180288943Sdimstatic __inline __m512i __DEFAULT_FN_ATTRS 181277325Sdim_mm512_setzero_si512(void) 182277325Sdim{ 183277325Sdim return (__m512i)(__v8di){ 0, 0, 0, 0, 0, 0, 0, 0 }; 184277325Sdim} 185277325Sdim 186309124Sdim#define _mm512_setzero_epi32 _mm512_setzero_si512 187309124Sdim 188296417Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS 189309124Sdim_mm512_undefined_pd(void) 190296417Sdim{ 191296417Sdim return (__m512d)__builtin_ia32_undef512(); 192296417Sdim} 193296417Sdim 194296417Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS 195309124Sdim_mm512_undefined(void) 196296417Sdim{ 197296417Sdim return (__m512)__builtin_ia32_undef512(); 198296417Sdim} 199296417Sdim 200296417Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS 201309124Sdim_mm512_undefined_ps(void) 202296417Sdim{ 203296417Sdim return (__m512)__builtin_ia32_undef512(); 204296417Sdim} 205296417Sdim 206296417Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 207309124Sdim_mm512_undefined_epi32(void) 208296417Sdim{ 209296417Sdim return (__m512i)__builtin_ia32_undef512(); 210296417Sdim} 211296417Sdim 212309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 213309124Sdim_mm512_broadcastd_epi32 (__m128i __A) 214309124Sdim{ 215309124Sdim return (__m512i)__builtin_shufflevector((__v4si) __A, 216309124Sdim (__v4si)_mm_undefined_si128(), 217309124Sdim 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); 218309124Sdim} 219309124Sdim 220309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 221309124Sdim_mm512_mask_broadcastd_epi32 (__m512i __O, __mmask16 __M, __m128i __A) 222309124Sdim{ 223309124Sdim return (__m512i)__builtin_ia32_selectd_512(__M, 224309124Sdim (__v16si) _mm512_broadcastd_epi32(__A), 225309124Sdim (__v16si) __O); 226309124Sdim} 227309124Sdim 228309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 229309124Sdim_mm512_maskz_broadcastd_epi32 (__mmask16 __M, __m128i __A) 230309124Sdim{ 231309124Sdim return (__m512i)__builtin_ia32_selectd_512(__M, 232309124Sdim (__v16si) _mm512_broadcastd_epi32(__A), 233309124Sdim (__v16si) _mm512_setzero_si512()); 234309124Sdim} 235309124Sdim 236309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 237309124Sdim_mm512_broadcastq_epi64 (__m128i __A) 238309124Sdim{ 239309124Sdim return (__m512i)__builtin_shufflevector((__v2di) __A, 240309124Sdim (__v2di) _mm_undefined_si128(), 241309124Sdim 0, 0, 0, 0, 0, 0, 0, 0); 242309124Sdim} 243309124Sdim 244309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 245309124Sdim_mm512_mask_broadcastq_epi64 (__m512i __O, __mmask8 __M, __m128i __A) 246309124Sdim{ 247309124Sdim return (__m512i)__builtin_ia32_selectq_512(__M, 248309124Sdim (__v8di) _mm512_broadcastq_epi64(__A), 249309124Sdim (__v8di) __O); 250309124Sdim 251309124Sdim} 252309124Sdim 253309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 254309124Sdim_mm512_maskz_broadcastq_epi64 (__mmask8 __M, __m128i __A) 255309124Sdim{ 256309124Sdim return (__m512i)__builtin_ia32_selectq_512(__M, 257309124Sdim (__v8di) _mm512_broadcastq_epi64(__A), 258309124Sdim (__v8di) _mm512_setzero_si512()); 259309124Sdim} 260309124Sdim 261288943Sdimstatic __inline __m512i __DEFAULT_FN_ATTRS 262277325Sdim_mm512_maskz_set1_epi32(__mmask16 __M, int __A) 263277325Sdim{ 264277325Sdim return (__m512i) __builtin_ia32_pbroadcastd512_gpr_mask (__A, 265277325Sdim (__v16si) 266277325Sdim _mm512_setzero_si512 (), 267277325Sdim __M); 268277325Sdim} 269277325Sdim 270288943Sdimstatic __inline __m512i __DEFAULT_FN_ATTRS 271277325Sdim_mm512_maskz_set1_epi64(__mmask8 __M, long long __A) 272277325Sdim{ 273277325Sdim#ifdef __x86_64__ 274277325Sdim return (__m512i) __builtin_ia32_pbroadcastq512_gpr_mask (__A, 275277325Sdim (__v8di) 276277325Sdim _mm512_setzero_si512 (), 277277325Sdim __M); 278277325Sdim#else 279277325Sdim return (__m512i) __builtin_ia32_pbroadcastq512_mem_mask (__A, 280277325Sdim (__v8di) 281277325Sdim _mm512_setzero_si512 (), 282277325Sdim __M); 283277325Sdim#endif 284277325Sdim} 285277325Sdim 286288943Sdimstatic __inline __m512 __DEFAULT_FN_ATTRS 287277325Sdim_mm512_setzero_ps(void) 288277325Sdim{ 289277325Sdim return (__m512){ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 290277325Sdim 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 }; 291277325Sdim} 292309124Sdim 293309124Sdim#define _mm512_setzero _mm512_setzero_ps 294309124Sdim 295288943Sdimstatic __inline __m512d __DEFAULT_FN_ATTRS 296277325Sdim_mm512_setzero_pd(void) 297277325Sdim{ 298277325Sdim return (__m512d){ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 }; 299277325Sdim} 300277325Sdim 301288943Sdimstatic __inline __m512 __DEFAULT_FN_ATTRS 302277325Sdim_mm512_set1_ps(float __w) 303277325Sdim{ 304277325Sdim return (__m512){ __w, __w, __w, __w, __w, __w, __w, __w, 305277325Sdim __w, __w, __w, __w, __w, __w, __w, __w }; 306277325Sdim} 307277325Sdim 308288943Sdimstatic __inline __m512d __DEFAULT_FN_ATTRS 309277325Sdim_mm512_set1_pd(double __w) 310277325Sdim{ 311277325Sdim return (__m512d){ __w, __w, __w, __w, __w, __w, __w, __w }; 312277325Sdim} 313277325Sdim 314288943Sdimstatic __inline __m512i __DEFAULT_FN_ATTRS 315309124Sdim_mm512_set1_epi8(char __w) 316309124Sdim{ 317309124Sdim return (__m512i)(__v64qi){ __w, __w, __w, __w, __w, __w, __w, __w, 318309124Sdim __w, __w, __w, __w, __w, __w, __w, __w, 319309124Sdim __w, __w, __w, __w, __w, __w, __w, __w, 320309124Sdim __w, __w, __w, __w, __w, __w, __w, __w, 321309124Sdim __w, __w, __w, __w, __w, __w, __w, __w, 322309124Sdim __w, __w, __w, __w, __w, __w, __w, __w, 323309124Sdim __w, __w, __w, __w, __w, __w, __w, __w, 324309124Sdim __w, __w, __w, __w, __w, __w, __w, __w }; 325309124Sdim} 326309124Sdim 327309124Sdimstatic __inline __m512i __DEFAULT_FN_ATTRS 328309124Sdim_mm512_set1_epi16(short __w) 329309124Sdim{ 330309124Sdim return (__m512i)(__v32hi){ __w, __w, __w, __w, __w, __w, __w, __w, 331309124Sdim __w, __w, __w, __w, __w, __w, __w, __w, 332309124Sdim __w, __w, __w, __w, __w, __w, __w, __w, 333309124Sdim __w, __w, __w, __w, __w, __w, __w, __w }; 334309124Sdim} 335309124Sdim 336309124Sdimstatic __inline __m512i __DEFAULT_FN_ATTRS 337277325Sdim_mm512_set1_epi32(int __s) 338277325Sdim{ 339277325Sdim return (__m512i)(__v16si){ __s, __s, __s, __s, __s, __s, __s, __s, 340277325Sdim __s, __s, __s, __s, __s, __s, __s, __s }; 341277325Sdim} 342277325Sdim 343288943Sdimstatic __inline __m512i __DEFAULT_FN_ATTRS 344277325Sdim_mm512_set1_epi64(long long __d) 345277325Sdim{ 346277325Sdim return (__m512i)(__v8di){ __d, __d, __d, __d, __d, __d, __d, __d }; 347277325Sdim} 348277325Sdim 349288943Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS 350309124Sdim_mm512_broadcastss_ps(__m128 __A) 351277325Sdim{ 352309124Sdim return (__m512)__builtin_shufflevector((__v4sf) __A, 353309124Sdim (__v4sf)_mm_undefined_ps(), 354309124Sdim 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); 355277325Sdim} 356277325Sdim 357309124Sdimstatic __inline __m512i __DEFAULT_FN_ATTRS 358309124Sdim_mm512_set4_epi32 (int __A, int __B, int __C, int __D) 359309124Sdim{ 360309124Sdim return (__m512i)(__v16si) 361309124Sdim { __D, __C, __B, __A, __D, __C, __B, __A, 362309124Sdim __D, __C, __B, __A, __D, __C, __B, __A }; 363309124Sdim} 364309124Sdim 365309124Sdimstatic __inline __m512i __DEFAULT_FN_ATTRS 366309124Sdim_mm512_set4_epi64 (long long __A, long long __B, long long __C, 367309124Sdim long long __D) 368309124Sdim{ 369309124Sdim return (__m512i) (__v8di) 370309124Sdim { __D, __C, __B, __A, __D, __C, __B, __A }; 371309124Sdim} 372309124Sdim 373309124Sdimstatic __inline __m512d __DEFAULT_FN_ATTRS 374309124Sdim_mm512_set4_pd (double __A, double __B, double __C, double __D) 375309124Sdim{ 376309124Sdim return (__m512d) 377309124Sdim { __D, __C, __B, __A, __D, __C, __B, __A }; 378309124Sdim} 379309124Sdim 380309124Sdimstatic __inline __m512 __DEFAULT_FN_ATTRS 381309124Sdim_mm512_set4_ps (float __A, float __B, float __C, float __D) 382309124Sdim{ 383309124Sdim return (__m512) 384309124Sdim { __D, __C, __B, __A, __D, __C, __B, __A, 385309124Sdim __D, __C, __B, __A, __D, __C, __B, __A }; 386309124Sdim} 387309124Sdim 388309124Sdim#define _mm512_setr4_epi32(e0,e1,e2,e3) \ 389309124Sdim _mm512_set4_epi32((e3),(e2),(e1),(e0)) 390309124Sdim 391309124Sdim#define _mm512_setr4_epi64(e0,e1,e2,e3) \ 392309124Sdim _mm512_set4_epi64((e3),(e2),(e1),(e0)) 393309124Sdim 394309124Sdim#define _mm512_setr4_pd(e0,e1,e2,e3) \ 395309124Sdim _mm512_set4_pd((e3),(e2),(e1),(e0)) 396309124Sdim 397309124Sdim#define _mm512_setr4_ps(e0,e1,e2,e3) \ 398309124Sdim _mm512_set4_ps((e3),(e2),(e1),(e0)) 399309124Sdim 400288943Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS 401309124Sdim_mm512_broadcastsd_pd(__m128d __A) 402277325Sdim{ 403309124Sdim return (__m512d)__builtin_shufflevector((__v2df) __A, 404309124Sdim (__v2df) _mm_undefined_pd(), 405309124Sdim 0, 0, 0, 0, 0, 0, 0, 0); 406277325Sdim} 407277325Sdim 408277325Sdim/* Cast between vector types */ 409277325Sdim 410288943Sdimstatic __inline __m512d __DEFAULT_FN_ATTRS 411277325Sdim_mm512_castpd256_pd512(__m256d __a) 412277325Sdim{ 413277325Sdim return __builtin_shufflevector(__a, __a, 0, 1, 2, 3, -1, -1, -1, -1); 414277325Sdim} 415277325Sdim 416288943Sdimstatic __inline __m512 __DEFAULT_FN_ATTRS 417277325Sdim_mm512_castps256_ps512(__m256 __a) 418277325Sdim{ 419277325Sdim return __builtin_shufflevector(__a, __a, 0, 1, 2, 3, 4, 5, 6, 7, 420277325Sdim -1, -1, -1, -1, -1, -1, -1, -1); 421277325Sdim} 422277325Sdim 423288943Sdimstatic __inline __m128d __DEFAULT_FN_ATTRS 424277325Sdim_mm512_castpd512_pd128(__m512d __a) 425277325Sdim{ 426277325Sdim return __builtin_shufflevector(__a, __a, 0, 1); 427277325Sdim} 428277325Sdim 429309124Sdimstatic __inline __m256d __DEFAULT_FN_ATTRS 430309124Sdim_mm512_castpd512_pd256 (__m512d __A) 431309124Sdim{ 432309124Sdim return __builtin_shufflevector(__A, __A, 0, 1, 2, 3); 433309124Sdim} 434309124Sdim 435288943Sdimstatic __inline __m128 __DEFAULT_FN_ATTRS 436277325Sdim_mm512_castps512_ps128(__m512 __a) 437277325Sdim{ 438277325Sdim return __builtin_shufflevector(__a, __a, 0, 1, 2, 3); 439277325Sdim} 440277325Sdim 441309124Sdimstatic __inline __m256 __DEFAULT_FN_ATTRS 442309124Sdim_mm512_castps512_ps256 (__m512 __A) 443309124Sdim{ 444309124Sdim return __builtin_shufflevector(__A, __A, 0, 1, 2, 3, 4, 5, 6, 7); 445309124Sdim} 446309124Sdim 447309124Sdimstatic __inline __m512 __DEFAULT_FN_ATTRS 448309124Sdim_mm512_castpd_ps (__m512d __A) 449309124Sdim{ 450309124Sdim return (__m512) (__A); 451309124Sdim} 452309124Sdim 453309124Sdimstatic __inline __m512i __DEFAULT_FN_ATTRS 454309124Sdim_mm512_castpd_si512 (__m512d __A) 455309124Sdim{ 456309124Sdim return (__m512i) (__A); 457309124Sdim} 458309124Sdim 459309124Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS 460309124Sdim_mm512_castpd128_pd512 (__m128d __A) 461309124Sdim{ 462309124Sdim return __builtin_shufflevector( __A, __A, 0, 1, -1, -1, -1, -1, -1, -1); 463309124Sdim} 464309124Sdim 465309124Sdimstatic __inline __m512d __DEFAULT_FN_ATTRS 466309124Sdim_mm512_castps_pd (__m512 __A) 467309124Sdim{ 468309124Sdim return (__m512d) (__A); 469309124Sdim} 470309124Sdim 471309124Sdimstatic __inline __m512i __DEFAULT_FN_ATTRS 472309124Sdim_mm512_castps_si512 (__m512 __A) 473309124Sdim{ 474309124Sdim return (__m512i) (__A); 475309124Sdim} 476309124Sdim 477309124Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS 478309124Sdim_mm512_castps128_ps512 (__m128 __A) 479309124Sdim{ 480309124Sdim return __builtin_shufflevector( __A, __A, 0, 1, 2, 3, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1); 481309124Sdim} 482309124Sdim 483309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 484309124Sdim_mm512_castsi128_si512 (__m128i __A) 485309124Sdim{ 486309124Sdim return __builtin_shufflevector( __A, __A, 0, 1, -1, -1, -1, -1, -1, -1); 487309124Sdim} 488309124Sdim 489309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 490309124Sdim_mm512_castsi256_si512 (__m256i __A) 491309124Sdim{ 492309124Sdim return __builtin_shufflevector( __A, __A, 0, 1, 2, 3, -1, -1, -1, -1); 493309124Sdim} 494309124Sdim 495309124Sdimstatic __inline __m512 __DEFAULT_FN_ATTRS 496309124Sdim_mm512_castsi512_ps (__m512i __A) 497309124Sdim{ 498309124Sdim return (__m512) (__A); 499309124Sdim} 500309124Sdim 501309124Sdimstatic __inline __m512d __DEFAULT_FN_ATTRS 502309124Sdim_mm512_castsi512_pd (__m512i __A) 503309124Sdim{ 504309124Sdim return (__m512d) (__A); 505309124Sdim} 506309124Sdim 507309124Sdimstatic __inline __m128i __DEFAULT_FN_ATTRS 508309124Sdim_mm512_castsi512_si128 (__m512i __A) 509309124Sdim{ 510309124Sdim return (__m128i)__builtin_shufflevector(__A, __A , 0, 1); 511309124Sdim} 512309124Sdim 513309124Sdimstatic __inline __m256i __DEFAULT_FN_ATTRS 514309124Sdim_mm512_castsi512_si256 (__m512i __A) 515309124Sdim{ 516309124Sdim return (__m256i)__builtin_shufflevector(__A, __A , 0, 1, 2, 3); 517309124Sdim} 518309124Sdim 519314564Sdimstatic __inline__ __mmask16 __DEFAULT_FN_ATTRS 520314564Sdim_mm512_int2mask(int __a) 521314564Sdim{ 522314564Sdim return (__mmask16)__a; 523314564Sdim} 524314564Sdim 525314564Sdimstatic __inline__ int __DEFAULT_FN_ATTRS 526314564Sdim_mm512_mask2int(__mmask16 __a) 527314564Sdim{ 528314564Sdim return (int)__a; 529314564Sdim} 530314564Sdim 531288943Sdim/* Bitwise operators */ 532288943Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 533288943Sdim_mm512_and_epi32(__m512i __a, __m512i __b) 534288943Sdim{ 535309124Sdim return (__m512i)((__v16su)__a & (__v16su)__b); 536288943Sdim} 537288943Sdim 538288943Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 539288943Sdim_mm512_mask_and_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b) 540288943Sdim{ 541309124Sdim return (__m512i)__builtin_ia32_selectd_512((__mmask16)__k, 542309124Sdim (__v16si) _mm512_and_epi32(__a, __b), 543309124Sdim (__v16si) __src); 544288943Sdim} 545309124Sdim 546288943Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 547288943Sdim_mm512_maskz_and_epi32(__mmask16 __k, __m512i __a, __m512i __b) 548288943Sdim{ 549309124Sdim return (__m512i) _mm512_mask_and_epi32(_mm512_setzero_si512 (), 550309124Sdim __k, __a, __b); 551288943Sdim} 552288943Sdim 553288943Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 554288943Sdim_mm512_and_epi64(__m512i __a, __m512i __b) 555288943Sdim{ 556309124Sdim return (__m512i)((__v8du)__a & (__v8du)__b); 557288943Sdim} 558288943Sdim 559288943Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 560288943Sdim_mm512_mask_and_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b) 561288943Sdim{ 562309124Sdim return (__m512i) __builtin_ia32_selectq_512 ((__mmask8) __k, 563309124Sdim (__v8di) _mm512_and_epi64(__a, __b), 564309124Sdim (__v8di) __src); 565288943Sdim} 566309124Sdim 567288943Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 568288943Sdim_mm512_maskz_and_epi64(__mmask8 __k, __m512i __a, __m512i __b) 569288943Sdim{ 570309124Sdim return (__m512i) _mm512_mask_and_epi64(_mm512_setzero_si512 (), 571309124Sdim __k, __a, __b); 572288943Sdim} 573288943Sdim 574288943Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 575309124Sdim_mm512_andnot_si512 (__m512i __A, __m512i __B) 576309124Sdim{ 577309124Sdim return (__m512i)(~(__v8du)(__A) & (__v8du)__B); 578309124Sdim} 579309124Sdim 580309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 581288943Sdim_mm512_andnot_epi32 (__m512i __A, __m512i __B) 582288943Sdim{ 583309124Sdim return (__m512i)(~(__v16su)(__A) & (__v16su)__B); 584288943Sdim} 585288943Sdim 586288943Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 587309124Sdim_mm512_mask_andnot_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B) 588288943Sdim{ 589309124Sdim return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, 590309124Sdim (__v16si)_mm512_andnot_epi32(__A, __B), 591309124Sdim (__v16si)__W); 592288943Sdim} 593288943Sdim 594288943Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 595309124Sdim_mm512_maskz_andnot_epi32(__mmask16 __U, __m512i __A, __m512i __B) 596288943Sdim{ 597309124Sdim return (__m512i)_mm512_mask_andnot_epi32(_mm512_setzero_si512(), 598309124Sdim __U, __A, __B); 599288943Sdim} 600288943Sdim 601288943Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 602309124Sdim_mm512_andnot_epi64(__m512i __A, __m512i __B) 603288943Sdim{ 604309124Sdim return (__m512i)(~(__v8du)(__A) & (__v8du)__B); 605288943Sdim} 606288943Sdim 607288943Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 608309124Sdim_mm512_mask_andnot_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) 609288943Sdim{ 610309124Sdim return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, 611309124Sdim (__v8di)_mm512_andnot_epi64(__A, __B), 612309124Sdim (__v8di)__W); 613288943Sdim} 614288943Sdim 615288943Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 616309124Sdim_mm512_maskz_andnot_epi64(__mmask8 __U, __m512i __A, __m512i __B) 617288943Sdim{ 618309124Sdim return (__m512i)_mm512_mask_andnot_epi64(_mm512_setzero_si512(), 619309124Sdim __U, __A, __B); 620288943Sdim} 621309124Sdim 622288943Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 623288943Sdim_mm512_or_epi32(__m512i __a, __m512i __b) 624288943Sdim{ 625309124Sdim return (__m512i)((__v16su)__a | (__v16su)__b); 626288943Sdim} 627288943Sdim 628288943Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 629288943Sdim_mm512_mask_or_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b) 630288943Sdim{ 631309124Sdim return (__m512i)__builtin_ia32_selectd_512((__mmask16)__k, 632309124Sdim (__v16si)_mm512_or_epi32(__a, __b), 633309124Sdim (__v16si)__src); 634288943Sdim} 635309124Sdim 636288943Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 637288943Sdim_mm512_maskz_or_epi32(__mmask16 __k, __m512i __a, __m512i __b) 638288943Sdim{ 639309124Sdim return (__m512i)_mm512_mask_or_epi32(_mm512_setzero_si512(), __k, __a, __b); 640288943Sdim} 641288943Sdim 642288943Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 643288943Sdim_mm512_or_epi64(__m512i __a, __m512i __b) 644288943Sdim{ 645309124Sdim return (__m512i)((__v8du)__a | (__v8du)__b); 646288943Sdim} 647288943Sdim 648288943Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 649288943Sdim_mm512_mask_or_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b) 650288943Sdim{ 651309124Sdim return (__m512i)__builtin_ia32_selectq_512((__mmask8)__k, 652309124Sdim (__v8di)_mm512_or_epi64(__a, __b), 653309124Sdim (__v8di)__src); 654288943Sdim} 655309124Sdim 656288943Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 657288943Sdim_mm512_maskz_or_epi64(__mmask8 __k, __m512i __a, __m512i __b) 658288943Sdim{ 659309124Sdim return (__m512i)_mm512_mask_or_epi64(_mm512_setzero_si512(), __k, __a, __b); 660288943Sdim} 661288943Sdim 662288943Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 663288943Sdim_mm512_xor_epi32(__m512i __a, __m512i __b) 664288943Sdim{ 665309124Sdim return (__m512i)((__v16su)__a ^ (__v16su)__b); 666288943Sdim} 667288943Sdim 668288943Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 669288943Sdim_mm512_mask_xor_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b) 670288943Sdim{ 671309124Sdim return (__m512i)__builtin_ia32_selectd_512((__mmask16)__k, 672309124Sdim (__v16si)_mm512_xor_epi32(__a, __b), 673309124Sdim (__v16si)__src); 674288943Sdim} 675309124Sdim 676288943Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 677288943Sdim_mm512_maskz_xor_epi32(__mmask16 __k, __m512i __a, __m512i __b) 678288943Sdim{ 679309124Sdim return (__m512i)_mm512_mask_xor_epi32(_mm512_setzero_si512(), __k, __a, __b); 680288943Sdim} 681288943Sdim 682288943Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 683288943Sdim_mm512_xor_epi64(__m512i __a, __m512i __b) 684288943Sdim{ 685309124Sdim return (__m512i)((__v8du)__a ^ (__v8du)__b); 686288943Sdim} 687288943Sdim 688288943Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 689288943Sdim_mm512_mask_xor_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b) 690288943Sdim{ 691309124Sdim return (__m512i)__builtin_ia32_selectq_512((__mmask8)__k, 692309124Sdim (__v8di)_mm512_xor_epi64(__a, __b), 693309124Sdim (__v8di)__src); 694288943Sdim} 695309124Sdim 696288943Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 697288943Sdim_mm512_maskz_xor_epi64(__mmask8 __k, __m512i __a, __m512i __b) 698288943Sdim{ 699309124Sdim return (__m512i)_mm512_mask_xor_epi64(_mm512_setzero_si512(), __k, __a, __b); 700288943Sdim} 701288943Sdim 702288943Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 703288943Sdim_mm512_and_si512(__m512i __a, __m512i __b) 704288943Sdim{ 705309124Sdim return (__m512i)((__v8du)__a & (__v8du)__b); 706288943Sdim} 707288943Sdim 708288943Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 709288943Sdim_mm512_or_si512(__m512i __a, __m512i __b) 710288943Sdim{ 711309124Sdim return (__m512i)((__v8du)__a | (__v8du)__b); 712288943Sdim} 713288943Sdim 714288943Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 715288943Sdim_mm512_xor_si512(__m512i __a, __m512i __b) 716288943Sdim{ 717309124Sdim return (__m512i)((__v8du)__a ^ (__v8du)__b); 718288943Sdim} 719309124Sdim 720277325Sdim/* Arithmetic */ 721277325Sdim 722288943Sdimstatic __inline __m512d __DEFAULT_FN_ATTRS 723277325Sdim_mm512_add_pd(__m512d __a, __m512d __b) 724277325Sdim{ 725309124Sdim return (__m512d)((__v8df)__a + (__v8df)__b); 726277325Sdim} 727277325Sdim 728288943Sdimstatic __inline __m512 __DEFAULT_FN_ATTRS 729277325Sdim_mm512_add_ps(__m512 __a, __m512 __b) 730277325Sdim{ 731309124Sdim return (__m512)((__v16sf)__a + (__v16sf)__b); 732277325Sdim} 733277325Sdim 734288943Sdimstatic __inline __m512d __DEFAULT_FN_ATTRS 735277325Sdim_mm512_mul_pd(__m512d __a, __m512d __b) 736277325Sdim{ 737309124Sdim return (__m512d)((__v8df)__a * (__v8df)__b); 738277325Sdim} 739277325Sdim 740288943Sdimstatic __inline __m512 __DEFAULT_FN_ATTRS 741277325Sdim_mm512_mul_ps(__m512 __a, __m512 __b) 742277325Sdim{ 743309124Sdim return (__m512)((__v16sf)__a * (__v16sf)__b); 744277325Sdim} 745277325Sdim 746288943Sdimstatic __inline __m512d __DEFAULT_FN_ATTRS 747277325Sdim_mm512_sub_pd(__m512d __a, __m512d __b) 748277325Sdim{ 749309124Sdim return (__m512d)((__v8df)__a - (__v8df)__b); 750277325Sdim} 751277325Sdim 752288943Sdimstatic __inline __m512 __DEFAULT_FN_ATTRS 753277325Sdim_mm512_sub_ps(__m512 __a, __m512 __b) 754277325Sdim{ 755309124Sdim return (__m512)((__v16sf)__a - (__v16sf)__b); 756277325Sdim} 757277325Sdim 758288943Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 759288943Sdim_mm512_add_epi64 (__m512i __A, __m512i __B) 760288943Sdim{ 761309124Sdim return (__m512i) ((__v8du) __A + (__v8du) __B); 762288943Sdim} 763288943Sdim 764288943Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 765314564Sdim_mm512_mask_add_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) 766288943Sdim{ 767314564Sdim return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, 768314564Sdim (__v8di)_mm512_add_epi64(__A, __B), 769314564Sdim (__v8di)__W); 770288943Sdim} 771288943Sdim 772288943Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 773314564Sdim_mm512_maskz_add_epi64(__mmask8 __U, __m512i __A, __m512i __B) 774288943Sdim{ 775314564Sdim return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, 776314564Sdim (__v8di)_mm512_add_epi64(__A, __B), 777314564Sdim (__v8di)_mm512_setzero_si512()); 778288943Sdim} 779288943Sdim 780288943Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 781288943Sdim_mm512_sub_epi64 (__m512i __A, __m512i __B) 782288943Sdim{ 783309124Sdim return (__m512i) ((__v8du) __A - (__v8du) __B); 784288943Sdim} 785288943Sdim 786288943Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 787314564Sdim_mm512_mask_sub_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) 788288943Sdim{ 789314564Sdim return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, 790314564Sdim (__v8di)_mm512_sub_epi64(__A, __B), 791314564Sdim (__v8di)__W); 792288943Sdim} 793288943Sdim 794288943Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 795314564Sdim_mm512_maskz_sub_epi64(__mmask8 __U, __m512i __A, __m512i __B) 796288943Sdim{ 797314564Sdim return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, 798314564Sdim (__v8di)_mm512_sub_epi64(__A, __B), 799314564Sdim (__v8di)_mm512_setzero_si512()); 800288943Sdim} 801288943Sdim 802288943Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 803288943Sdim_mm512_add_epi32 (__m512i __A, __m512i __B) 804288943Sdim{ 805309124Sdim return (__m512i) ((__v16su) __A + (__v16su) __B); 806288943Sdim} 807288943Sdim 808288943Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 809314564Sdim_mm512_mask_add_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B) 810288943Sdim{ 811314564Sdim return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, 812314564Sdim (__v16si)_mm512_add_epi32(__A, __B), 813314564Sdim (__v16si)__W); 814288943Sdim} 815288943Sdim 816288943Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 817288943Sdim_mm512_maskz_add_epi32 (__mmask16 __U, __m512i __A, __m512i __B) 818288943Sdim{ 819314564Sdim return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, 820314564Sdim (__v16si)_mm512_add_epi32(__A, __B), 821314564Sdim (__v16si)_mm512_setzero_si512()); 822288943Sdim} 823288943Sdim 824288943Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 825288943Sdim_mm512_sub_epi32 (__m512i __A, __m512i __B) 826288943Sdim{ 827309124Sdim return (__m512i) ((__v16su) __A - (__v16su) __B); 828288943Sdim} 829288943Sdim 830288943Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 831314564Sdim_mm512_mask_sub_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B) 832288943Sdim{ 833314564Sdim return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, 834314564Sdim (__v16si)_mm512_sub_epi32(__A, __B), 835314564Sdim (__v16si)__W); 836288943Sdim} 837288943Sdim 838288943Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 839314564Sdim_mm512_maskz_sub_epi32(__mmask16 __U, __m512i __A, __m512i __B) 840288943Sdim{ 841314564Sdim return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, 842314564Sdim (__v16si)_mm512_sub_epi32(__A, __B), 843314564Sdim (__v16si)_mm512_setzero_si512()); 844288943Sdim} 845288943Sdim 846309124Sdim#define _mm512_mask_max_round_pd(W, U, A, B, R) __extension__ ({ \ 847309124Sdim (__m512d)__builtin_ia32_maxpd512_mask((__v8df)(__m512d)(A), \ 848309124Sdim (__v8df)(__m512d)(B), \ 849309124Sdim (__v8df)(__m512d)(W), (__mmask8)(U), \ 850309124Sdim (int)(R)); }) 851309124Sdim 852309124Sdim#define _mm512_maskz_max_round_pd(U, A, B, R) __extension__ ({ \ 853309124Sdim (__m512d)__builtin_ia32_maxpd512_mask((__v8df)(__m512d)(A), \ 854309124Sdim (__v8df)(__m512d)(B), \ 855309124Sdim (__v8df)_mm512_setzero_pd(), \ 856309124Sdim (__mmask8)(U), (int)(R)); }) 857309124Sdim 858309124Sdim#define _mm512_max_round_pd(A, B, R) __extension__ ({ \ 859309124Sdim (__m512d)__builtin_ia32_maxpd512_mask((__v8df)(__m512d)(A), \ 860309124Sdim (__v8df)(__m512d)(B), \ 861309124Sdim (__v8df)_mm512_undefined_pd(), \ 862309124Sdim (__mmask8)-1, (int)(R)); }) 863309124Sdim 864288943Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS 865277325Sdim_mm512_max_pd(__m512d __A, __m512d __B) 866277325Sdim{ 867277325Sdim return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A, 868277325Sdim (__v8df) __B, 869277325Sdim (__v8df) 870277325Sdim _mm512_setzero_pd (), 871277325Sdim (__mmask8) -1, 872277325Sdim _MM_FROUND_CUR_DIRECTION); 873277325Sdim} 874277325Sdim 875309124Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS 876309124Sdim_mm512_mask_max_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) 877309124Sdim{ 878309124Sdim return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A, 879309124Sdim (__v8df) __B, 880309124Sdim (__v8df) __W, 881309124Sdim (__mmask8) __U, 882309124Sdim _MM_FROUND_CUR_DIRECTION); 883309124Sdim} 884309124Sdim 885309124Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS 886309124Sdim_mm512_maskz_max_pd (__mmask8 __U, __m512d __A, __m512d __B) 887309124Sdim{ 888309124Sdim return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A, 889309124Sdim (__v8df) __B, 890309124Sdim (__v8df) 891309124Sdim _mm512_setzero_pd (), 892309124Sdim (__mmask8) __U, 893309124Sdim _MM_FROUND_CUR_DIRECTION); 894309124Sdim} 895309124Sdim 896309124Sdim#define _mm512_mask_max_round_ps(W, U, A, B, R) __extension__ ({ \ 897309124Sdim (__m512)__builtin_ia32_maxps512_mask((__v16sf)(__m512)(A), \ 898309124Sdim (__v16sf)(__m512)(B), \ 899309124Sdim (__v16sf)(__m512)(W), (__mmask16)(U), \ 900309124Sdim (int)(R)); }) 901309124Sdim 902309124Sdim#define _mm512_maskz_max_round_ps(U, A, B, R) __extension__ ({ \ 903309124Sdim (__m512)__builtin_ia32_maxps512_mask((__v16sf)(__m512)(A), \ 904309124Sdim (__v16sf)(__m512)(B), \ 905309124Sdim (__v16sf)_mm512_setzero_ps(), \ 906309124Sdim (__mmask16)(U), (int)(R)); }) 907309124Sdim 908309124Sdim#define _mm512_max_round_ps(A, B, R) __extension__ ({ \ 909309124Sdim (__m512)__builtin_ia32_maxps512_mask((__v16sf)(__m512)(A), \ 910309124Sdim (__v16sf)(__m512)(B), \ 911309124Sdim (__v16sf)_mm512_undefined_ps(), \ 912309124Sdim (__mmask16)-1, (int)(R)); }) 913309124Sdim 914288943Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS 915277325Sdim_mm512_max_ps(__m512 __A, __m512 __B) 916277325Sdim{ 917277325Sdim return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A, 918277325Sdim (__v16sf) __B, 919277325Sdim (__v16sf) 920277325Sdim _mm512_setzero_ps (), 921277325Sdim (__mmask16) -1, 922277325Sdim _MM_FROUND_CUR_DIRECTION); 923277325Sdim} 924277325Sdim 925309124Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS 926309124Sdim_mm512_mask_max_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) 927309124Sdim{ 928309124Sdim return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A, 929309124Sdim (__v16sf) __B, 930309124Sdim (__v16sf) __W, 931309124Sdim (__mmask16) __U, 932309124Sdim _MM_FROUND_CUR_DIRECTION); 933309124Sdim} 934309124Sdim 935309124Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS 936309124Sdim_mm512_maskz_max_ps (__mmask16 __U, __m512 __A, __m512 __B) 937309124Sdim{ 938309124Sdim return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A, 939309124Sdim (__v16sf) __B, 940309124Sdim (__v16sf) 941309124Sdim _mm512_setzero_ps (), 942309124Sdim (__mmask16) __U, 943309124Sdim _MM_FROUND_CUR_DIRECTION); 944309124Sdim} 945309124Sdim 946296417Sdimstatic __inline__ __m128 __DEFAULT_FN_ATTRS 947296417Sdim_mm_mask_max_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) { 948309124Sdim return (__m128) __builtin_ia32_maxss_round_mask ((__v4sf) __A, 949296417Sdim (__v4sf) __B, 950296417Sdim (__v4sf) __W, 951296417Sdim (__mmask8) __U, 952296417Sdim _MM_FROUND_CUR_DIRECTION); 953296417Sdim} 954296417Sdim 955296417Sdimstatic __inline__ __m128 __DEFAULT_FN_ATTRS 956296417Sdim_mm_maskz_max_ss(__mmask8 __U,__m128 __A, __m128 __B) { 957309124Sdim return (__m128) __builtin_ia32_maxss_round_mask ((__v4sf) __A, 958296417Sdim (__v4sf) __B, 959296417Sdim (__v4sf) _mm_setzero_ps (), 960296417Sdim (__mmask8) __U, 961296417Sdim _MM_FROUND_CUR_DIRECTION); 962296417Sdim} 963296417Sdim 964309124Sdim#define _mm_max_round_ss(A, B, R) __extension__ ({ \ 965309124Sdim (__m128)__builtin_ia32_maxss_round_mask((__v4sf)(__m128)(A), \ 966309124Sdim (__v4sf)(__m128)(B), \ 967309124Sdim (__v4sf)_mm_setzero_ps(), \ 968309124Sdim (__mmask8)-1, (int)(R)); }) 969296417Sdim 970309124Sdim#define _mm_mask_max_round_ss(W, U, A, B, R) __extension__ ({ \ 971309124Sdim (__m128)__builtin_ia32_maxss_round_mask((__v4sf)(__m128)(A), \ 972309124Sdim (__v4sf)(__m128)(B), \ 973309124Sdim (__v4sf)(__m128)(W), (__mmask8)(U), \ 974309124Sdim (int)(R)); }) 975296417Sdim 976309124Sdim#define _mm_maskz_max_round_ss(U, A, B, R) __extension__ ({ \ 977309124Sdim (__m128)__builtin_ia32_maxss_round_mask((__v4sf)(__m128)(A), \ 978309124Sdim (__v4sf)(__m128)(B), \ 979309124Sdim (__v4sf)_mm_setzero_ps(), \ 980309124Sdim (__mmask8)(U), (int)(R)); }) 981296417Sdim 982296417Sdimstatic __inline__ __m128d __DEFAULT_FN_ATTRS 983296417Sdim_mm_mask_max_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) { 984309124Sdim return (__m128d) __builtin_ia32_maxsd_round_mask ((__v2df) __A, 985296417Sdim (__v2df) __B, 986296417Sdim (__v2df) __W, 987296417Sdim (__mmask8) __U, 988296417Sdim _MM_FROUND_CUR_DIRECTION); 989296417Sdim} 990296417Sdim 991296417Sdimstatic __inline__ __m128d __DEFAULT_FN_ATTRS 992296417Sdim_mm_maskz_max_sd(__mmask8 __U,__m128d __A, __m128d __B) { 993309124Sdim return (__m128d) __builtin_ia32_maxsd_round_mask ((__v2df) __A, 994296417Sdim (__v2df) __B, 995296417Sdim (__v2df) _mm_setzero_pd (), 996296417Sdim (__mmask8) __U, 997296417Sdim _MM_FROUND_CUR_DIRECTION); 998296417Sdim} 999296417Sdim 1000309124Sdim#define _mm_max_round_sd(A, B, R) __extension__ ({ \ 1001309124Sdim (__m128d)__builtin_ia32_maxsd_round_mask((__v2df)(__m128d)(A), \ 1002309124Sdim (__v2df)(__m128d)(B), \ 1003309124Sdim (__v2df)_mm_setzero_pd(), \ 1004309124Sdim (__mmask8)-1, (int)(R)); }) 1005296417Sdim 1006309124Sdim#define _mm_mask_max_round_sd(W, U, A, B, R) __extension__ ({ \ 1007309124Sdim (__m128d)__builtin_ia32_maxsd_round_mask((__v2df)(__m128d)(A), \ 1008309124Sdim (__v2df)(__m128d)(B), \ 1009309124Sdim (__v2df)(__m128d)(W), \ 1010309124Sdim (__mmask8)(U), (int)(R)); }) 1011296417Sdim 1012309124Sdim#define _mm_maskz_max_round_sd(U, A, B, R) __extension__ ({ \ 1013309124Sdim (__m128d)__builtin_ia32_maxsd_round_mask((__v2df)(__m128d)(A), \ 1014309124Sdim (__v2df)(__m128d)(B), \ 1015309124Sdim (__v2df)_mm_setzero_pd(), \ 1016309124Sdim (__mmask8)(U), (int)(R)); }) 1017296417Sdim 1018277325Sdimstatic __inline __m512i 1019288943Sdim__DEFAULT_FN_ATTRS 1020277325Sdim_mm512_max_epi32(__m512i __A, __m512i __B) 1021277325Sdim{ 1022277325Sdim return (__m512i) __builtin_ia32_pmaxsd512_mask ((__v16si) __A, 1023277325Sdim (__v16si) __B, 1024277325Sdim (__v16si) 1025277325Sdim _mm512_setzero_si512 (), 1026277325Sdim (__mmask16) -1); 1027277325Sdim} 1028277325Sdim 1029309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 1030309124Sdim_mm512_mask_max_epi32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B) 1031309124Sdim{ 1032309124Sdim return (__m512i) __builtin_ia32_pmaxsd512_mask ((__v16si) __A, 1033309124Sdim (__v16si) __B, 1034309124Sdim (__v16si) __W, __M); 1035309124Sdim} 1036309124Sdim 1037309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 1038309124Sdim_mm512_maskz_max_epi32 (__mmask16 __M, __m512i __A, __m512i __B) 1039309124Sdim{ 1040309124Sdim return (__m512i) __builtin_ia32_pmaxsd512_mask ((__v16si) __A, 1041309124Sdim (__v16si) __B, 1042309124Sdim (__v16si) 1043309124Sdim _mm512_setzero_si512 (), 1044309124Sdim __M); 1045309124Sdim} 1046309124Sdim 1047288943Sdimstatic __inline __m512i __DEFAULT_FN_ATTRS 1048277325Sdim_mm512_max_epu32(__m512i __A, __m512i __B) 1049277325Sdim{ 1050277325Sdim return (__m512i) __builtin_ia32_pmaxud512_mask ((__v16si) __A, 1051277325Sdim (__v16si) __B, 1052277325Sdim (__v16si) 1053277325Sdim _mm512_setzero_si512 (), 1054277325Sdim (__mmask16) -1); 1055277325Sdim} 1056277325Sdim 1057309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 1058309124Sdim_mm512_mask_max_epu32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B) 1059309124Sdim{ 1060309124Sdim return (__m512i) __builtin_ia32_pmaxud512_mask ((__v16si) __A, 1061309124Sdim (__v16si) __B, 1062309124Sdim (__v16si) __W, __M); 1063309124Sdim} 1064309124Sdim 1065309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 1066309124Sdim_mm512_maskz_max_epu32 (__mmask16 __M, __m512i __A, __m512i __B) 1067309124Sdim{ 1068309124Sdim return (__m512i) __builtin_ia32_pmaxud512_mask ((__v16si) __A, 1069309124Sdim (__v16si) __B, 1070309124Sdim (__v16si) 1071309124Sdim _mm512_setzero_si512 (), 1072309124Sdim __M); 1073309124Sdim} 1074309124Sdim 1075288943Sdimstatic __inline __m512i __DEFAULT_FN_ATTRS 1076277325Sdim_mm512_max_epi64(__m512i __A, __m512i __B) 1077277325Sdim{ 1078277325Sdim return (__m512i) __builtin_ia32_pmaxsq512_mask ((__v8di) __A, 1079277325Sdim (__v8di) __B, 1080277325Sdim (__v8di) 1081277325Sdim _mm512_setzero_si512 (), 1082277325Sdim (__mmask8) -1); 1083277325Sdim} 1084277325Sdim 1085309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 1086309124Sdim_mm512_mask_max_epi64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B) 1087309124Sdim{ 1088309124Sdim return (__m512i) __builtin_ia32_pmaxsq512_mask ((__v8di) __A, 1089309124Sdim (__v8di) __B, 1090309124Sdim (__v8di) __W, __M); 1091309124Sdim} 1092309124Sdim 1093309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 1094309124Sdim_mm512_maskz_max_epi64 (__mmask8 __M, __m512i __A, __m512i __B) 1095309124Sdim{ 1096309124Sdim return (__m512i) __builtin_ia32_pmaxsq512_mask ((__v8di) __A, 1097309124Sdim (__v8di) __B, 1098309124Sdim (__v8di) 1099309124Sdim _mm512_setzero_si512 (), 1100309124Sdim __M); 1101309124Sdim} 1102309124Sdim 1103288943Sdimstatic __inline __m512i __DEFAULT_FN_ATTRS 1104277325Sdim_mm512_max_epu64(__m512i __A, __m512i __B) 1105277325Sdim{ 1106277325Sdim return (__m512i) __builtin_ia32_pmaxuq512_mask ((__v8di) __A, 1107277325Sdim (__v8di) __B, 1108277325Sdim (__v8di) 1109277325Sdim _mm512_setzero_si512 (), 1110277325Sdim (__mmask8) -1); 1111277325Sdim} 1112277325Sdim 1113309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 1114309124Sdim_mm512_mask_max_epu64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B) 1115309124Sdim{ 1116309124Sdim return (__m512i) __builtin_ia32_pmaxuq512_mask ((__v8di) __A, 1117309124Sdim (__v8di) __B, 1118309124Sdim (__v8di) __W, __M); 1119309124Sdim} 1120309124Sdim 1121309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 1122309124Sdim_mm512_maskz_max_epu64 (__mmask8 __M, __m512i __A, __m512i __B) 1123309124Sdim{ 1124309124Sdim return (__m512i) __builtin_ia32_pmaxuq512_mask ((__v8di) __A, 1125309124Sdim (__v8di) __B, 1126309124Sdim (__v8di) 1127309124Sdim _mm512_setzero_si512 (), 1128309124Sdim __M); 1129309124Sdim} 1130309124Sdim 1131309124Sdim#define _mm512_mask_min_round_pd(W, U, A, B, R) __extension__ ({ \ 1132309124Sdim (__m512d)__builtin_ia32_minpd512_mask((__v8df)(__m512d)(A), \ 1133309124Sdim (__v8df)(__m512d)(B), \ 1134309124Sdim (__v8df)(__m512d)(W), (__mmask8)(U), \ 1135309124Sdim (int)(R)); }) 1136309124Sdim 1137309124Sdim#define _mm512_maskz_min_round_pd(U, A, B, R) __extension__ ({ \ 1138309124Sdim (__m512d)__builtin_ia32_minpd512_mask((__v8df)(__m512d)(A), \ 1139309124Sdim (__v8df)(__m512d)(B), \ 1140309124Sdim (__v8df)_mm512_setzero_pd(), \ 1141309124Sdim (__mmask8)(U), (int)(R)); }) 1142309124Sdim 1143309124Sdim#define _mm512_min_round_pd(A, B, R) __extension__ ({ \ 1144309124Sdim (__m512d)__builtin_ia32_minpd512_mask((__v8df)(__m512d)(A), \ 1145309124Sdim (__v8df)(__m512d)(B), \ 1146309124Sdim (__v8df)_mm512_undefined_pd(), \ 1147309124Sdim (__mmask8)-1, (int)(R)); }) 1148309124Sdim 1149288943Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS 1150277325Sdim_mm512_min_pd(__m512d __A, __m512d __B) 1151277325Sdim{ 1152277325Sdim return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A, 1153277325Sdim (__v8df) __B, 1154277325Sdim (__v8df) 1155277325Sdim _mm512_setzero_pd (), 1156277325Sdim (__mmask8) -1, 1157277325Sdim _MM_FROUND_CUR_DIRECTION); 1158277325Sdim} 1159277325Sdim 1160309124Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS 1161309124Sdim_mm512_mask_min_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) 1162309124Sdim{ 1163309124Sdim return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A, 1164309124Sdim (__v8df) __B, 1165309124Sdim (__v8df) __W, 1166309124Sdim (__mmask8) __U, 1167309124Sdim _MM_FROUND_CUR_DIRECTION); 1168309124Sdim} 1169309124Sdim 1170309124Sdim#define _mm512_mask_min_round_ps(W, U, A, B, R) __extension__ ({ \ 1171309124Sdim (__m512)__builtin_ia32_minps512_mask((__v16sf)(__m512)(A), \ 1172309124Sdim (__v16sf)(__m512)(B), \ 1173309124Sdim (__v16sf)(__m512)(W), (__mmask16)(U), \ 1174309124Sdim (int)(R)); }) 1175309124Sdim 1176309124Sdim#define _mm512_maskz_min_round_ps(U, A, B, R) __extension__ ({ \ 1177309124Sdim (__m512)__builtin_ia32_minps512_mask((__v16sf)(__m512)(A), \ 1178309124Sdim (__v16sf)(__m512)(B), \ 1179309124Sdim (__v16sf)_mm512_setzero_ps(), \ 1180309124Sdim (__mmask16)(U), (int)(R)); }) 1181309124Sdim 1182309124Sdim#define _mm512_min_round_ps(A, B, R) __extension__ ({ \ 1183309124Sdim (__m512)__builtin_ia32_minps512_mask((__v16sf)(__m512)(A), \ 1184309124Sdim (__v16sf)(__m512)(B), \ 1185309124Sdim (__v16sf)_mm512_undefined_ps(), \ 1186309124Sdim (__mmask16)-1, (int)(R)); }) 1187309124Sdim 1188309124Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS 1189309124Sdim_mm512_maskz_min_pd (__mmask8 __U, __m512d __A, __m512d __B) 1190309124Sdim{ 1191309124Sdim return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A, 1192309124Sdim (__v8df) __B, 1193309124Sdim (__v8df) 1194309124Sdim _mm512_setzero_pd (), 1195309124Sdim (__mmask8) __U, 1196309124Sdim _MM_FROUND_CUR_DIRECTION); 1197309124Sdim} 1198309124Sdim 1199288943Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS 1200277325Sdim_mm512_min_ps(__m512 __A, __m512 __B) 1201277325Sdim{ 1202277325Sdim return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A, 1203277325Sdim (__v16sf) __B, 1204277325Sdim (__v16sf) 1205277325Sdim _mm512_setzero_ps (), 1206277325Sdim (__mmask16) -1, 1207277325Sdim _MM_FROUND_CUR_DIRECTION); 1208277325Sdim} 1209277325Sdim 1210309124Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS 1211309124Sdim_mm512_mask_min_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) 1212309124Sdim{ 1213309124Sdim return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A, 1214309124Sdim (__v16sf) __B, 1215309124Sdim (__v16sf) __W, 1216309124Sdim (__mmask16) __U, 1217309124Sdim _MM_FROUND_CUR_DIRECTION); 1218309124Sdim} 1219309124Sdim 1220309124Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS 1221309124Sdim_mm512_maskz_min_ps (__mmask16 __U, __m512 __A, __m512 __B) 1222309124Sdim{ 1223309124Sdim return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A, 1224309124Sdim (__v16sf) __B, 1225309124Sdim (__v16sf) 1226309124Sdim _mm512_setzero_ps (), 1227309124Sdim (__mmask16) __U, 1228309124Sdim _MM_FROUND_CUR_DIRECTION); 1229309124Sdim} 1230309124Sdim 1231296417Sdimstatic __inline__ __m128 __DEFAULT_FN_ATTRS 1232296417Sdim_mm_mask_min_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) { 1233309124Sdim return (__m128) __builtin_ia32_minss_round_mask ((__v4sf) __A, 1234296417Sdim (__v4sf) __B, 1235296417Sdim (__v4sf) __W, 1236296417Sdim (__mmask8) __U, 1237296417Sdim _MM_FROUND_CUR_DIRECTION); 1238296417Sdim} 1239296417Sdim 1240296417Sdimstatic __inline__ __m128 __DEFAULT_FN_ATTRS 1241296417Sdim_mm_maskz_min_ss(__mmask8 __U,__m128 __A, __m128 __B) { 1242309124Sdim return (__m128) __builtin_ia32_minss_round_mask ((__v4sf) __A, 1243296417Sdim (__v4sf) __B, 1244296417Sdim (__v4sf) _mm_setzero_ps (), 1245296417Sdim (__mmask8) __U, 1246296417Sdim _MM_FROUND_CUR_DIRECTION); 1247296417Sdim} 1248296417Sdim 1249309124Sdim#define _mm_min_round_ss(A, B, R) __extension__ ({ \ 1250309124Sdim (__m128)__builtin_ia32_minss_round_mask((__v4sf)(__m128)(A), \ 1251309124Sdim (__v4sf)(__m128)(B), \ 1252309124Sdim (__v4sf)_mm_setzero_ps(), \ 1253309124Sdim (__mmask8)-1, (int)(R)); }) 1254296417Sdim 1255309124Sdim#define _mm_mask_min_round_ss(W, U, A, B, R) __extension__ ({ \ 1256309124Sdim (__m128)__builtin_ia32_minss_round_mask((__v4sf)(__m128)(A), \ 1257309124Sdim (__v4sf)(__m128)(B), \ 1258309124Sdim (__v4sf)(__m128)(W), (__mmask8)(U), \ 1259309124Sdim (int)(R)); }) 1260296417Sdim 1261309124Sdim#define _mm_maskz_min_round_ss(U, A, B, R) __extension__ ({ \ 1262309124Sdim (__m128)__builtin_ia32_minss_round_mask((__v4sf)(__m128)(A), \ 1263309124Sdim (__v4sf)(__m128)(B), \ 1264309124Sdim (__v4sf)_mm_setzero_ps(), \ 1265309124Sdim (__mmask8)(U), (int)(R)); }) 1266296417Sdim 1267296417Sdimstatic __inline__ __m128d __DEFAULT_FN_ATTRS 1268296417Sdim_mm_mask_min_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) { 1269309124Sdim return (__m128d) __builtin_ia32_minsd_round_mask ((__v2df) __A, 1270296417Sdim (__v2df) __B, 1271296417Sdim (__v2df) __W, 1272296417Sdim (__mmask8) __U, 1273296417Sdim _MM_FROUND_CUR_DIRECTION); 1274296417Sdim} 1275296417Sdim 1276296417Sdimstatic __inline__ __m128d __DEFAULT_FN_ATTRS 1277296417Sdim_mm_maskz_min_sd(__mmask8 __U,__m128d __A, __m128d __B) { 1278309124Sdim return (__m128d) __builtin_ia32_minsd_round_mask ((__v2df) __A, 1279296417Sdim (__v2df) __B, 1280296417Sdim (__v2df) _mm_setzero_pd (), 1281296417Sdim (__mmask8) __U, 1282296417Sdim _MM_FROUND_CUR_DIRECTION); 1283296417Sdim} 1284296417Sdim 1285309124Sdim#define _mm_min_round_sd(A, B, R) __extension__ ({ \ 1286309124Sdim (__m128d)__builtin_ia32_minsd_round_mask((__v2df)(__m128d)(A), \ 1287309124Sdim (__v2df)(__m128d)(B), \ 1288309124Sdim (__v2df)_mm_setzero_pd(), \ 1289309124Sdim (__mmask8)-1, (int)(R)); }) 1290296417Sdim 1291309124Sdim#define _mm_mask_min_round_sd(W, U, A, B, R) __extension__ ({ \ 1292309124Sdim (__m128d)__builtin_ia32_minsd_round_mask((__v2df)(__m128d)(A), \ 1293309124Sdim (__v2df)(__m128d)(B), \ 1294309124Sdim (__v2df)(__m128d)(W), \ 1295309124Sdim (__mmask8)(U), (int)(R)); }) 1296296417Sdim 1297309124Sdim#define _mm_maskz_min_round_sd(U, A, B, R) __extension__ ({ \ 1298309124Sdim (__m128d)__builtin_ia32_minsd_round_mask((__v2df)(__m128d)(A), \ 1299309124Sdim (__v2df)(__m128d)(B), \ 1300309124Sdim (__v2df)_mm_setzero_pd(), \ 1301309124Sdim (__mmask8)(U), (int)(R)); }) 1302296417Sdim 1303277325Sdimstatic __inline __m512i 1304288943Sdim__DEFAULT_FN_ATTRS 1305277325Sdim_mm512_min_epi32(__m512i __A, __m512i __B) 1306277325Sdim{ 1307277325Sdim return (__m512i) __builtin_ia32_pminsd512_mask ((__v16si) __A, 1308277325Sdim (__v16si) __B, 1309277325Sdim (__v16si) 1310277325Sdim _mm512_setzero_si512 (), 1311277325Sdim (__mmask16) -1); 1312277325Sdim} 1313277325Sdim 1314309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 1315309124Sdim_mm512_mask_min_epi32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B) 1316309124Sdim{ 1317309124Sdim return (__m512i) __builtin_ia32_pminsd512_mask ((__v16si) __A, 1318309124Sdim (__v16si) __B, 1319309124Sdim (__v16si) __W, __M); 1320309124Sdim} 1321309124Sdim 1322309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 1323309124Sdim_mm512_maskz_min_epi32 (__mmask16 __M, __m512i __A, __m512i __B) 1324309124Sdim{ 1325309124Sdim return (__m512i) __builtin_ia32_pminsd512_mask ((__v16si) __A, 1326309124Sdim (__v16si) __B, 1327309124Sdim (__v16si) 1328309124Sdim _mm512_setzero_si512 (), 1329309124Sdim __M); 1330309124Sdim} 1331309124Sdim 1332288943Sdimstatic __inline __m512i __DEFAULT_FN_ATTRS 1333277325Sdim_mm512_min_epu32(__m512i __A, __m512i __B) 1334277325Sdim{ 1335277325Sdim return (__m512i) __builtin_ia32_pminud512_mask ((__v16si) __A, 1336277325Sdim (__v16si) __B, 1337277325Sdim (__v16si) 1338277325Sdim _mm512_setzero_si512 (), 1339277325Sdim (__mmask16) -1); 1340277325Sdim} 1341277325Sdim 1342309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 1343309124Sdim_mm512_mask_min_epu32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B) 1344309124Sdim{ 1345309124Sdim return (__m512i) __builtin_ia32_pminud512_mask ((__v16si) __A, 1346309124Sdim (__v16si) __B, 1347309124Sdim (__v16si) __W, __M); 1348309124Sdim} 1349309124Sdim 1350309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 1351309124Sdim_mm512_maskz_min_epu32 (__mmask16 __M, __m512i __A, __m512i __B) 1352309124Sdim{ 1353309124Sdim return (__m512i) __builtin_ia32_pminud512_mask ((__v16si) __A, 1354309124Sdim (__v16si) __B, 1355309124Sdim (__v16si) 1356309124Sdim _mm512_setzero_si512 (), 1357309124Sdim __M); 1358309124Sdim} 1359309124Sdim 1360288943Sdimstatic __inline __m512i __DEFAULT_FN_ATTRS 1361277325Sdim_mm512_min_epi64(__m512i __A, __m512i __B) 1362277325Sdim{ 1363277325Sdim return (__m512i) __builtin_ia32_pminsq512_mask ((__v8di) __A, 1364277325Sdim (__v8di) __B, 1365277325Sdim (__v8di) 1366277325Sdim _mm512_setzero_si512 (), 1367277325Sdim (__mmask8) -1); 1368277325Sdim} 1369277325Sdim 1370309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 1371309124Sdim_mm512_mask_min_epi64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B) 1372309124Sdim{ 1373309124Sdim return (__m512i) __builtin_ia32_pminsq512_mask ((__v8di) __A, 1374309124Sdim (__v8di) __B, 1375309124Sdim (__v8di) __W, __M); 1376309124Sdim} 1377309124Sdim 1378309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 1379309124Sdim_mm512_maskz_min_epi64 (__mmask8 __M, __m512i __A, __m512i __B) 1380309124Sdim{ 1381309124Sdim return (__m512i) __builtin_ia32_pminsq512_mask ((__v8di) __A, 1382309124Sdim (__v8di) __B, 1383309124Sdim (__v8di) 1384309124Sdim _mm512_setzero_si512 (), 1385309124Sdim __M); 1386309124Sdim} 1387309124Sdim 1388288943Sdimstatic __inline __m512i __DEFAULT_FN_ATTRS 1389277325Sdim_mm512_min_epu64(__m512i __A, __m512i __B) 1390277325Sdim{ 1391277325Sdim return (__m512i) __builtin_ia32_pminuq512_mask ((__v8di) __A, 1392277325Sdim (__v8di) __B, 1393277325Sdim (__v8di) 1394277325Sdim _mm512_setzero_si512 (), 1395277325Sdim (__mmask8) -1); 1396277325Sdim} 1397277325Sdim 1398309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 1399309124Sdim_mm512_mask_min_epu64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B) 1400309124Sdim{ 1401309124Sdim return (__m512i) __builtin_ia32_pminuq512_mask ((__v8di) __A, 1402309124Sdim (__v8di) __B, 1403309124Sdim (__v8di) __W, __M); 1404309124Sdim} 1405309124Sdim 1406309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 1407309124Sdim_mm512_maskz_min_epu64 (__mmask8 __M, __m512i __A, __m512i __B) 1408309124Sdim{ 1409309124Sdim return (__m512i) __builtin_ia32_pminuq512_mask ((__v8di) __A, 1410309124Sdim (__v8di) __B, 1411309124Sdim (__v8di) 1412309124Sdim _mm512_setzero_si512 (), 1413309124Sdim __M); 1414309124Sdim} 1415309124Sdim 1416288943Sdimstatic __inline __m512i __DEFAULT_FN_ATTRS 1417277325Sdim_mm512_mul_epi32(__m512i __X, __m512i __Y) 1418277325Sdim{ 1419314564Sdim return (__m512i)__builtin_ia32_pmuldq512((__v16si)__X, (__v16si) __Y); 1420277325Sdim} 1421277325Sdim 1422288943Sdimstatic __inline __m512i __DEFAULT_FN_ATTRS 1423314564Sdim_mm512_mask_mul_epi32(__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y) 1424288943Sdim{ 1425314564Sdim return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M, 1426314564Sdim (__v8di)_mm512_mul_epi32(__X, __Y), 1427314564Sdim (__v8di)__W); 1428288943Sdim} 1429288943Sdim 1430288943Sdimstatic __inline __m512i __DEFAULT_FN_ATTRS 1431314564Sdim_mm512_maskz_mul_epi32(__mmask8 __M, __m512i __X, __m512i __Y) 1432288943Sdim{ 1433314564Sdim return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M, 1434314564Sdim (__v8di)_mm512_mul_epi32(__X, __Y), 1435314564Sdim (__v8di)_mm512_setzero_si512 ()); 1436288943Sdim} 1437288943Sdim 1438288943Sdimstatic __inline __m512i __DEFAULT_FN_ATTRS 1439277325Sdim_mm512_mul_epu32(__m512i __X, __m512i __Y) 1440277325Sdim{ 1441314564Sdim return (__m512i)__builtin_ia32_pmuludq512((__v16si)__X, (__v16si)__Y); 1442277325Sdim} 1443277325Sdim 1444288943Sdimstatic __inline __m512i __DEFAULT_FN_ATTRS 1445314564Sdim_mm512_mask_mul_epu32(__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y) 1446288943Sdim{ 1447314564Sdim return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M, 1448314564Sdim (__v8di)_mm512_mul_epu32(__X, __Y), 1449314564Sdim (__v8di)__W); 1450288943Sdim} 1451288943Sdim 1452288943Sdimstatic __inline __m512i __DEFAULT_FN_ATTRS 1453314564Sdim_mm512_maskz_mul_epu32(__mmask8 __M, __m512i __X, __m512i __Y) 1454288943Sdim{ 1455314564Sdim return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M, 1456314564Sdim (__v8di)_mm512_mul_epu32(__X, __Y), 1457314564Sdim (__v8di)_mm512_setzero_si512 ()); 1458288943Sdim} 1459288943Sdim 1460288943Sdimstatic __inline __m512i __DEFAULT_FN_ATTRS 1461288943Sdim_mm512_mullo_epi32 (__m512i __A, __m512i __B) 1462288943Sdim{ 1463309124Sdim return (__m512i) ((__v16su) __A * (__v16su) __B); 1464288943Sdim} 1465288943Sdim 1466288943Sdimstatic __inline __m512i __DEFAULT_FN_ATTRS 1467314564Sdim_mm512_maskz_mullo_epi32(__mmask16 __M, __m512i __A, __m512i __B) 1468288943Sdim{ 1469314564Sdim return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M, 1470314564Sdim (__v16si)_mm512_mullo_epi32(__A, __B), 1471314564Sdim (__v16si)_mm512_setzero_si512()); 1472288943Sdim} 1473288943Sdim 1474288943Sdimstatic __inline __m512i __DEFAULT_FN_ATTRS 1475314564Sdim_mm512_mask_mullo_epi32(__m512i __W, __mmask16 __M, __m512i __A, __m512i __B) 1476288943Sdim{ 1477314564Sdim return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M, 1478314564Sdim (__v16si)_mm512_mullo_epi32(__A, __B), 1479314564Sdim (__v16si)__W); 1480288943Sdim} 1481288943Sdim 1482309124Sdim#define _mm512_mask_sqrt_round_pd(W, U, A, R) __extension__ ({ \ 1483309124Sdim (__m512d)__builtin_ia32_sqrtpd512_mask((__v8df)(__m512d)(A), \ 1484309124Sdim (__v8df)(__m512d)(W), (__mmask8)(U), \ 1485309124Sdim (int)(R)); }) 1486309124Sdim 1487309124Sdim#define _mm512_maskz_sqrt_round_pd(U, A, R) __extension__ ({ \ 1488309124Sdim (__m512d)__builtin_ia32_sqrtpd512_mask((__v8df)(__m512d)(A), \ 1489309124Sdim (__v8df)_mm512_setzero_pd(), \ 1490309124Sdim (__mmask8)(U), (int)(R)); }) 1491309124Sdim 1492309124Sdim#define _mm512_sqrt_round_pd(A, R) __extension__ ({ \ 1493309124Sdim (__m512d)__builtin_ia32_sqrtpd512_mask((__v8df)(__m512d)(A), \ 1494309124Sdim (__v8df)_mm512_undefined_pd(), \ 1495309124Sdim (__mmask8)-1, (int)(R)); }) 1496309124Sdim 1497288943Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS 1498296417Sdim_mm512_sqrt_pd(__m512d __a) 1499277325Sdim{ 1500296417Sdim return (__m512d)__builtin_ia32_sqrtpd512_mask((__v8df)__a, 1501277325Sdim (__v8df) _mm512_setzero_pd (), 1502277325Sdim (__mmask8) -1, 1503277325Sdim _MM_FROUND_CUR_DIRECTION); 1504277325Sdim} 1505277325Sdim 1506309124Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS 1507309124Sdim_mm512_mask_sqrt_pd (__m512d __W, __mmask8 __U, __m512d __A) 1508309124Sdim{ 1509309124Sdim return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A, 1510309124Sdim (__v8df) __W, 1511309124Sdim (__mmask8) __U, 1512309124Sdim _MM_FROUND_CUR_DIRECTION); 1513309124Sdim} 1514309124Sdim 1515309124Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS 1516309124Sdim_mm512_maskz_sqrt_pd (__mmask8 __U, __m512d __A) 1517309124Sdim{ 1518309124Sdim return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A, 1519309124Sdim (__v8df) 1520309124Sdim _mm512_setzero_pd (), 1521309124Sdim (__mmask8) __U, 1522309124Sdim _MM_FROUND_CUR_DIRECTION); 1523309124Sdim} 1524309124Sdim 1525309124Sdim#define _mm512_mask_sqrt_round_ps(W, U, A, R) __extension__ ({ \ 1526309124Sdim (__m512)__builtin_ia32_sqrtps512_mask((__v16sf)(__m512)(A), \ 1527309124Sdim (__v16sf)(__m512)(W), (__mmask16)(U), \ 1528309124Sdim (int)(R)); }) 1529309124Sdim 1530309124Sdim#define _mm512_maskz_sqrt_round_ps(U, A, R) __extension__ ({ \ 1531309124Sdim (__m512)__builtin_ia32_sqrtps512_mask((__v16sf)(__m512)(A), \ 1532309124Sdim (__v16sf)_mm512_setzero_ps(), \ 1533309124Sdim (__mmask16)(U), (int)(R)); }) 1534309124Sdim 1535309124Sdim#define _mm512_sqrt_round_ps(A, R) __extension__ ({ \ 1536309124Sdim (__m512)__builtin_ia32_sqrtps512_mask((__v16sf)(__m512)(A), \ 1537309124Sdim (__v16sf)_mm512_undefined_ps(), \ 1538309124Sdim (__mmask16)-1, (int)(R)); }) 1539309124Sdim 1540288943Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS 1541296417Sdim_mm512_sqrt_ps(__m512 __a) 1542277325Sdim{ 1543296417Sdim return (__m512)__builtin_ia32_sqrtps512_mask((__v16sf)__a, 1544277325Sdim (__v16sf) _mm512_setzero_ps (), 1545277325Sdim (__mmask16) -1, 1546277325Sdim _MM_FROUND_CUR_DIRECTION); 1547277325Sdim} 1548277325Sdim 1549309124Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS 1550309124Sdim_mm512_mask_sqrt_ps(__m512 __W, __mmask16 __U, __m512 __A) 1551309124Sdim{ 1552309124Sdim return (__m512)__builtin_ia32_sqrtps512_mask((__v16sf)__A, 1553309124Sdim (__v16sf) __W, 1554309124Sdim (__mmask16) __U, 1555309124Sdim _MM_FROUND_CUR_DIRECTION); 1556309124Sdim} 1557309124Sdim 1558309124Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS 1559309124Sdim_mm512_maskz_sqrt_ps( __mmask16 __U, __m512 __A) 1560309124Sdim{ 1561309124Sdim return (__m512)__builtin_ia32_sqrtps512_mask((__v16sf)__A, 1562309124Sdim (__v16sf) _mm512_setzero_ps (), 1563309124Sdim (__mmask16) __U, 1564309124Sdim _MM_FROUND_CUR_DIRECTION); 1565309124Sdim} 1566309124Sdim 1567288943Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS 1568277325Sdim_mm512_rsqrt14_pd(__m512d __A) 1569277325Sdim{ 1570277325Sdim return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A, 1571277325Sdim (__v8df) 1572277325Sdim _mm512_setzero_pd (), 1573277325Sdim (__mmask8) -1);} 1574277325Sdim 1575309124Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS 1576309124Sdim_mm512_mask_rsqrt14_pd (__m512d __W, __mmask8 __U, __m512d __A) 1577309124Sdim{ 1578309124Sdim return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A, 1579309124Sdim (__v8df) __W, 1580309124Sdim (__mmask8) __U); 1581309124Sdim} 1582309124Sdim 1583309124Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS 1584309124Sdim_mm512_maskz_rsqrt14_pd (__mmask8 __U, __m512d __A) 1585309124Sdim{ 1586309124Sdim return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A, 1587309124Sdim (__v8df) 1588309124Sdim _mm512_setzero_pd (), 1589309124Sdim (__mmask8) __U); 1590309124Sdim} 1591309124Sdim 1592288943Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS 1593277325Sdim_mm512_rsqrt14_ps(__m512 __A) 1594277325Sdim{ 1595277325Sdim return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A, 1596277325Sdim (__v16sf) 1597277325Sdim _mm512_setzero_ps (), 1598277325Sdim (__mmask16) -1); 1599277325Sdim} 1600277325Sdim 1601309124Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS 1602309124Sdim_mm512_mask_rsqrt14_ps (__m512 __W, __mmask16 __U, __m512 __A) 1603309124Sdim{ 1604309124Sdim return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A, 1605309124Sdim (__v16sf) __W, 1606309124Sdim (__mmask16) __U); 1607309124Sdim} 1608309124Sdim 1609309124Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS 1610309124Sdim_mm512_maskz_rsqrt14_ps (__mmask16 __U, __m512 __A) 1611309124Sdim{ 1612309124Sdim return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A, 1613309124Sdim (__v16sf) 1614309124Sdim _mm512_setzero_ps (), 1615309124Sdim (__mmask16) __U); 1616309124Sdim} 1617309124Sdim 1618288943Sdimstatic __inline__ __m128 __DEFAULT_FN_ATTRS 1619277325Sdim_mm_rsqrt14_ss(__m128 __A, __m128 __B) 1620277325Sdim{ 1621309124Sdim return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __A, 1622277325Sdim (__v4sf) __B, 1623277325Sdim (__v4sf) 1624277325Sdim _mm_setzero_ps (), 1625277325Sdim (__mmask8) -1); 1626277325Sdim} 1627277325Sdim 1628309124Sdimstatic __inline__ __m128 __DEFAULT_FN_ATTRS 1629309124Sdim_mm_mask_rsqrt14_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) 1630309124Sdim{ 1631309124Sdim return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __A, 1632309124Sdim (__v4sf) __B, 1633309124Sdim (__v4sf) __W, 1634309124Sdim (__mmask8) __U); 1635309124Sdim} 1636309124Sdim 1637309124Sdimstatic __inline__ __m128 __DEFAULT_FN_ATTRS 1638309124Sdim_mm_maskz_rsqrt14_ss (__mmask8 __U, __m128 __A, __m128 __B) 1639309124Sdim{ 1640309124Sdim return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __A, 1641309124Sdim (__v4sf) __B, 1642309124Sdim (__v4sf) _mm_setzero_ps (), 1643309124Sdim (__mmask8) __U); 1644309124Sdim} 1645309124Sdim 1646288943Sdimstatic __inline__ __m128d __DEFAULT_FN_ATTRS 1647277325Sdim_mm_rsqrt14_sd(__m128d __A, __m128d __B) 1648277325Sdim{ 1649309124Sdim return (__m128d) __builtin_ia32_rsqrt14sd_mask ((__v2df) __A, 1650277325Sdim (__v2df) __B, 1651277325Sdim (__v2df) 1652277325Sdim _mm_setzero_pd (), 1653277325Sdim (__mmask8) -1); 1654277325Sdim} 1655277325Sdim 1656309124Sdimstatic __inline__ __m128d __DEFAULT_FN_ATTRS 1657309124Sdim_mm_mask_rsqrt14_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) 1658309124Sdim{ 1659309124Sdim return (__m128d) __builtin_ia32_rsqrt14sd_mask ( (__v2df) __A, 1660309124Sdim (__v2df) __B, 1661309124Sdim (__v2df) __W, 1662309124Sdim (__mmask8) __U); 1663309124Sdim} 1664309124Sdim 1665309124Sdimstatic __inline__ __m128d __DEFAULT_FN_ATTRS 1666309124Sdim_mm_maskz_rsqrt14_sd (__mmask8 __U, __m128d __A, __m128d __B) 1667309124Sdim{ 1668309124Sdim return (__m128d) __builtin_ia32_rsqrt14sd_mask ( (__v2df) __A, 1669309124Sdim (__v2df) __B, 1670309124Sdim (__v2df) _mm_setzero_pd (), 1671309124Sdim (__mmask8) __U); 1672309124Sdim} 1673309124Sdim 1674288943Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS 1675277325Sdim_mm512_rcp14_pd(__m512d __A) 1676277325Sdim{ 1677277325Sdim return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A, 1678277325Sdim (__v8df) 1679277325Sdim _mm512_setzero_pd (), 1680277325Sdim (__mmask8) -1); 1681277325Sdim} 1682277325Sdim 1683309124Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS 1684309124Sdim_mm512_mask_rcp14_pd (__m512d __W, __mmask8 __U, __m512d __A) 1685309124Sdim{ 1686309124Sdim return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A, 1687309124Sdim (__v8df) __W, 1688309124Sdim (__mmask8) __U); 1689309124Sdim} 1690309124Sdim 1691309124Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS 1692309124Sdim_mm512_maskz_rcp14_pd (__mmask8 __U, __m512d __A) 1693309124Sdim{ 1694309124Sdim return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A, 1695309124Sdim (__v8df) 1696309124Sdim _mm512_setzero_pd (), 1697309124Sdim (__mmask8) __U); 1698309124Sdim} 1699309124Sdim 1700288943Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS 1701277325Sdim_mm512_rcp14_ps(__m512 __A) 1702277325Sdim{ 1703277325Sdim return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A, 1704277325Sdim (__v16sf) 1705277325Sdim _mm512_setzero_ps (), 1706277325Sdim (__mmask16) -1); 1707277325Sdim} 1708309124Sdim 1709309124Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS 1710309124Sdim_mm512_mask_rcp14_ps (__m512 __W, __mmask16 __U, __m512 __A) 1711309124Sdim{ 1712309124Sdim return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A, 1713309124Sdim (__v16sf) __W, 1714309124Sdim (__mmask16) __U); 1715309124Sdim} 1716309124Sdim 1717309124Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS 1718309124Sdim_mm512_maskz_rcp14_ps (__mmask16 __U, __m512 __A) 1719309124Sdim{ 1720309124Sdim return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A, 1721309124Sdim (__v16sf) 1722309124Sdim _mm512_setzero_ps (), 1723309124Sdim (__mmask16) __U); 1724309124Sdim} 1725309124Sdim 1726288943Sdimstatic __inline__ __m128 __DEFAULT_FN_ATTRS 1727277325Sdim_mm_rcp14_ss(__m128 __A, __m128 __B) 1728277325Sdim{ 1729309124Sdim return (__m128) __builtin_ia32_rcp14ss_mask ((__v4sf) __A, 1730277325Sdim (__v4sf) __B, 1731277325Sdim (__v4sf) 1732277325Sdim _mm_setzero_ps (), 1733277325Sdim (__mmask8) -1); 1734277325Sdim} 1735277325Sdim 1736309124Sdimstatic __inline__ __m128 __DEFAULT_FN_ATTRS 1737309124Sdim_mm_mask_rcp14_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) 1738309124Sdim{ 1739309124Sdim return (__m128) __builtin_ia32_rcp14ss_mask ((__v4sf) __A, 1740309124Sdim (__v4sf) __B, 1741309124Sdim (__v4sf) __W, 1742309124Sdim (__mmask8) __U); 1743309124Sdim} 1744309124Sdim 1745309124Sdimstatic __inline__ __m128 __DEFAULT_FN_ATTRS 1746309124Sdim_mm_maskz_rcp14_ss (__mmask8 __U, __m128 __A, __m128 __B) 1747309124Sdim{ 1748309124Sdim return (__m128) __builtin_ia32_rcp14ss_mask ((__v4sf) __A, 1749309124Sdim (__v4sf) __B, 1750309124Sdim (__v4sf) _mm_setzero_ps (), 1751309124Sdim (__mmask8) __U); 1752309124Sdim} 1753309124Sdim 1754288943Sdimstatic __inline__ __m128d __DEFAULT_FN_ATTRS 1755277325Sdim_mm_rcp14_sd(__m128d __A, __m128d __B) 1756277325Sdim{ 1757309124Sdim return (__m128d) __builtin_ia32_rcp14sd_mask ((__v2df) __A, 1758277325Sdim (__v2df) __B, 1759277325Sdim (__v2df) 1760277325Sdim _mm_setzero_pd (), 1761277325Sdim (__mmask8) -1); 1762277325Sdim} 1763277325Sdim 1764309124Sdimstatic __inline__ __m128d __DEFAULT_FN_ATTRS 1765309124Sdim_mm_mask_rcp14_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) 1766309124Sdim{ 1767309124Sdim return (__m128d) __builtin_ia32_rcp14sd_mask ( (__v2df) __A, 1768309124Sdim (__v2df) __B, 1769309124Sdim (__v2df) __W, 1770309124Sdim (__mmask8) __U); 1771309124Sdim} 1772309124Sdim 1773309124Sdimstatic __inline__ __m128d __DEFAULT_FN_ATTRS 1774309124Sdim_mm_maskz_rcp14_sd (__mmask8 __U, __m128d __A, __m128d __B) 1775309124Sdim{ 1776309124Sdim return (__m128d) __builtin_ia32_rcp14sd_mask ( (__v2df) __A, 1777309124Sdim (__v2df) __B, 1778309124Sdim (__v2df) _mm_setzero_pd (), 1779309124Sdim (__mmask8) __U); 1780309124Sdim} 1781309124Sdim 1782288943Sdimstatic __inline __m512 __DEFAULT_FN_ATTRS 1783277325Sdim_mm512_floor_ps(__m512 __A) 1784277325Sdim{ 1785277325Sdim return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A, 1786277325Sdim _MM_FROUND_FLOOR, 1787277325Sdim (__v16sf) __A, -1, 1788277325Sdim _MM_FROUND_CUR_DIRECTION); 1789277325Sdim} 1790277325Sdim 1791309124Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS 1792309124Sdim_mm512_mask_floor_ps (__m512 __W, __mmask16 __U, __m512 __A) 1793309124Sdim{ 1794309124Sdim return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A, 1795309124Sdim _MM_FROUND_FLOOR, 1796309124Sdim (__v16sf) __W, __U, 1797309124Sdim _MM_FROUND_CUR_DIRECTION); 1798309124Sdim} 1799309124Sdim 1800288943Sdimstatic __inline __m512d __DEFAULT_FN_ATTRS 1801277325Sdim_mm512_floor_pd(__m512d __A) 1802277325Sdim{ 1803277325Sdim return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A, 1804277325Sdim _MM_FROUND_FLOOR, 1805277325Sdim (__v8df) __A, -1, 1806277325Sdim _MM_FROUND_CUR_DIRECTION); 1807277325Sdim} 1808277325Sdim 1809309124Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS 1810309124Sdim_mm512_mask_floor_pd (__m512d __W, __mmask8 __U, __m512d __A) 1811309124Sdim{ 1812309124Sdim return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A, 1813309124Sdim _MM_FROUND_FLOOR, 1814309124Sdim (__v8df) __W, __U, 1815309124Sdim _MM_FROUND_CUR_DIRECTION); 1816309124Sdim} 1817309124Sdim 1818309124Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS 1819309124Sdim_mm512_mask_ceil_ps (__m512 __W, __mmask16 __U, __m512 __A) 1820309124Sdim{ 1821309124Sdim return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A, 1822309124Sdim _MM_FROUND_CEIL, 1823309124Sdim (__v16sf) __W, __U, 1824309124Sdim _MM_FROUND_CUR_DIRECTION); 1825309124Sdim} 1826309124Sdim 1827288943Sdimstatic __inline __m512 __DEFAULT_FN_ATTRS 1828277325Sdim_mm512_ceil_ps(__m512 __A) 1829277325Sdim{ 1830277325Sdim return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A, 1831277325Sdim _MM_FROUND_CEIL, 1832277325Sdim (__v16sf) __A, -1, 1833277325Sdim _MM_FROUND_CUR_DIRECTION); 1834277325Sdim} 1835277325Sdim 1836288943Sdimstatic __inline __m512d __DEFAULT_FN_ATTRS 1837277325Sdim_mm512_ceil_pd(__m512d __A) 1838277325Sdim{ 1839277325Sdim return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A, 1840277325Sdim _MM_FROUND_CEIL, 1841277325Sdim (__v8df) __A, -1, 1842277325Sdim _MM_FROUND_CUR_DIRECTION); 1843277325Sdim} 1844277325Sdim 1845309124Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS 1846309124Sdim_mm512_mask_ceil_pd (__m512d __W, __mmask8 __U, __m512d __A) 1847309124Sdim{ 1848309124Sdim return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A, 1849309124Sdim _MM_FROUND_CEIL, 1850309124Sdim (__v8df) __W, __U, 1851309124Sdim _MM_FROUND_CUR_DIRECTION); 1852309124Sdim} 1853309124Sdim 1854288943Sdimstatic __inline __m512i __DEFAULT_FN_ATTRS 1855277325Sdim_mm512_abs_epi64(__m512i __A) 1856277325Sdim{ 1857277325Sdim return (__m512i) __builtin_ia32_pabsq512_mask ((__v8di) __A, 1858277325Sdim (__v8di) 1859277325Sdim _mm512_setzero_si512 (), 1860277325Sdim (__mmask8) -1); 1861277325Sdim} 1862277325Sdim 1863309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 1864309124Sdim_mm512_mask_abs_epi64 (__m512i __W, __mmask8 __U, __m512i __A) 1865309124Sdim{ 1866309124Sdim return (__m512i) __builtin_ia32_pabsq512_mask ((__v8di) __A, 1867309124Sdim (__v8di) __W, 1868309124Sdim (__mmask8) __U); 1869309124Sdim} 1870309124Sdim 1871309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 1872309124Sdim_mm512_maskz_abs_epi64 (__mmask8 __U, __m512i __A) 1873309124Sdim{ 1874309124Sdim return (__m512i) __builtin_ia32_pabsq512_mask ((__v8di) __A, 1875309124Sdim (__v8di) 1876309124Sdim _mm512_setzero_si512 (), 1877309124Sdim (__mmask8) __U); 1878309124Sdim} 1879309124Sdim 1880288943Sdimstatic __inline __m512i __DEFAULT_FN_ATTRS 1881277325Sdim_mm512_abs_epi32(__m512i __A) 1882277325Sdim{ 1883277325Sdim return (__m512i) __builtin_ia32_pabsd512_mask ((__v16si) __A, 1884277325Sdim (__v16si) 1885277325Sdim _mm512_setzero_si512 (), 1886277325Sdim (__mmask16) -1); 1887277325Sdim} 1888277325Sdim 1889309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 1890309124Sdim_mm512_mask_abs_epi32 (__m512i __W, __mmask16 __U, __m512i __A) 1891309124Sdim{ 1892309124Sdim return (__m512i) __builtin_ia32_pabsd512_mask ((__v16si) __A, 1893309124Sdim (__v16si) __W, 1894309124Sdim (__mmask16) __U); 1895309124Sdim} 1896309124Sdim 1897309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 1898309124Sdim_mm512_maskz_abs_epi32 (__mmask16 __U, __m512i __A) 1899309124Sdim{ 1900309124Sdim return (__m512i) __builtin_ia32_pabsd512_mask ((__v16si) __A, 1901309124Sdim (__v16si) 1902309124Sdim _mm512_setzero_si512 (), 1903309124Sdim (__mmask16) __U); 1904309124Sdim} 1905309124Sdim 1906296417Sdimstatic __inline__ __m128 __DEFAULT_FN_ATTRS 1907296417Sdim_mm_mask_add_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) { 1908309124Sdim return (__m128) __builtin_ia32_addss_round_mask ((__v4sf) __A, 1909296417Sdim (__v4sf) __B, 1910296417Sdim (__v4sf) __W, 1911296417Sdim (__mmask8) __U, 1912296417Sdim _MM_FROUND_CUR_DIRECTION); 1913296417Sdim} 1914296417Sdim 1915296417Sdimstatic __inline__ __m128 __DEFAULT_FN_ATTRS 1916296417Sdim_mm_maskz_add_ss(__mmask8 __U,__m128 __A, __m128 __B) { 1917309124Sdim return (__m128) __builtin_ia32_addss_round_mask ((__v4sf) __A, 1918296417Sdim (__v4sf) __B, 1919296417Sdim (__v4sf) _mm_setzero_ps (), 1920296417Sdim (__mmask8) __U, 1921296417Sdim _MM_FROUND_CUR_DIRECTION); 1922296417Sdim} 1923296417Sdim 1924309124Sdim#define _mm_add_round_ss(A, B, R) __extension__ ({ \ 1925309124Sdim (__m128)__builtin_ia32_addss_round_mask((__v4sf)(__m128)(A), \ 1926309124Sdim (__v4sf)(__m128)(B), \ 1927309124Sdim (__v4sf)_mm_setzero_ps(), \ 1928309124Sdim (__mmask8)-1, (int)(R)); }) 1929296417Sdim 1930309124Sdim#define _mm_mask_add_round_ss(W, U, A, B, R) __extension__ ({ \ 1931309124Sdim (__m128)__builtin_ia32_addss_round_mask((__v4sf)(__m128)(A), \ 1932309124Sdim (__v4sf)(__m128)(B), \ 1933309124Sdim (__v4sf)(__m128)(W), (__mmask8)(U), \ 1934309124Sdim (int)(R)); }) 1935296417Sdim 1936309124Sdim#define _mm_maskz_add_round_ss(U, A, B, R) __extension__ ({ \ 1937309124Sdim (__m128)__builtin_ia32_addss_round_mask((__v4sf)(__m128)(A), \ 1938309124Sdim (__v4sf)(__m128)(B), \ 1939309124Sdim (__v4sf)_mm_setzero_ps(), \ 1940309124Sdim (__mmask8)(U), (int)(R)); }) 1941296417Sdim 1942296417Sdimstatic __inline__ __m128d __DEFAULT_FN_ATTRS 1943296417Sdim_mm_mask_add_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) { 1944309124Sdim return (__m128d) __builtin_ia32_addsd_round_mask ((__v2df) __A, 1945296417Sdim (__v2df) __B, 1946296417Sdim (__v2df) __W, 1947296417Sdim (__mmask8) __U, 1948296417Sdim _MM_FROUND_CUR_DIRECTION); 1949296417Sdim} 1950296417Sdim 1951296417Sdimstatic __inline__ __m128d __DEFAULT_FN_ATTRS 1952296417Sdim_mm_maskz_add_sd(__mmask8 __U,__m128d __A, __m128d __B) { 1953309124Sdim return (__m128d) __builtin_ia32_addsd_round_mask ((__v2df) __A, 1954296417Sdim (__v2df) __B, 1955296417Sdim (__v2df) _mm_setzero_pd (), 1956296417Sdim (__mmask8) __U, 1957296417Sdim _MM_FROUND_CUR_DIRECTION); 1958296417Sdim} 1959309124Sdim#define _mm_add_round_sd(A, B, R) __extension__ ({ \ 1960309124Sdim (__m128d)__builtin_ia32_addsd_round_mask((__v2df)(__m128d)(A), \ 1961309124Sdim (__v2df)(__m128d)(B), \ 1962309124Sdim (__v2df)_mm_setzero_pd(), \ 1963309124Sdim (__mmask8)-1, (int)(R)); }) 1964296417Sdim 1965309124Sdim#define _mm_mask_add_round_sd(W, U, A, B, R) __extension__ ({ \ 1966309124Sdim (__m128d)__builtin_ia32_addsd_round_mask((__v2df)(__m128d)(A), \ 1967309124Sdim (__v2df)(__m128d)(B), \ 1968309124Sdim (__v2df)(__m128d)(W), \ 1969309124Sdim (__mmask8)(U), (int)(R)); }) 1970296417Sdim 1971309124Sdim#define _mm_maskz_add_round_sd(U, A, B, R) __extension__ ({ \ 1972309124Sdim (__m128d)__builtin_ia32_addsd_round_mask((__v2df)(__m128d)(A), \ 1973309124Sdim (__v2df)(__m128d)(B), \ 1974309124Sdim (__v2df)_mm_setzero_pd(), \ 1975309124Sdim (__mmask8)(U), (int)(R)); }) 1976296417Sdim 1977296417Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS 1978296417Sdim_mm512_mask_add_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) { 1979314564Sdim return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, 1980314564Sdim (__v8df)_mm512_add_pd(__A, __B), 1981314564Sdim (__v8df)__W); 1982296417Sdim} 1983296417Sdim 1984296417Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS 1985296417Sdim_mm512_maskz_add_pd(__mmask8 __U, __m512d __A, __m512d __B) { 1986314564Sdim return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, 1987314564Sdim (__v8df)_mm512_add_pd(__A, __B), 1988314564Sdim (__v8df)_mm512_setzero_pd()); 1989296417Sdim} 1990296417Sdim 1991296417Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS 1992296417Sdim_mm512_mask_add_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) { 1993314564Sdim return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, 1994314564Sdim (__v16sf)_mm512_add_ps(__A, __B), 1995314564Sdim (__v16sf)__W); 1996296417Sdim} 1997296417Sdim 1998296417Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS 1999296417Sdim_mm512_maskz_add_ps(__mmask16 __U, __m512 __A, __m512 __B) { 2000314564Sdim return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, 2001314564Sdim (__v16sf)_mm512_add_ps(__A, __B), 2002314564Sdim (__v16sf)_mm512_setzero_ps()); 2003296417Sdim} 2004296417Sdim 2005309124Sdim#define _mm512_add_round_pd(A, B, R) __extension__ ({ \ 2006309124Sdim (__m512d)__builtin_ia32_addpd512_mask((__v8df)(__m512d)(A), \ 2007309124Sdim (__v8df)(__m512d)(B), \ 2008309124Sdim (__v8df)_mm512_setzero_pd(), \ 2009309124Sdim (__mmask8)-1, (int)(R)); }) 2010296417Sdim 2011309124Sdim#define _mm512_mask_add_round_pd(W, U, A, B, R) __extension__ ({ \ 2012309124Sdim (__m512d)__builtin_ia32_addpd512_mask((__v8df)(__m512d)(A), \ 2013309124Sdim (__v8df)(__m512d)(B), \ 2014309124Sdim (__v8df)(__m512d)(W), (__mmask8)(U), \ 2015309124Sdim (int)(R)); }) 2016296417Sdim 2017309124Sdim#define _mm512_maskz_add_round_pd(U, A, B, R) __extension__ ({ \ 2018309124Sdim (__m512d)__builtin_ia32_addpd512_mask((__v8df)(__m512d)(A), \ 2019309124Sdim (__v8df)(__m512d)(B), \ 2020309124Sdim (__v8df)_mm512_setzero_pd(), \ 2021309124Sdim (__mmask8)(U), (int)(R)); }) 2022296417Sdim 2023309124Sdim#define _mm512_add_round_ps(A, B, R) __extension__ ({ \ 2024309124Sdim (__m512)__builtin_ia32_addps512_mask((__v16sf)(__m512)(A), \ 2025309124Sdim (__v16sf)(__m512)(B), \ 2026309124Sdim (__v16sf)_mm512_setzero_ps(), \ 2027309124Sdim (__mmask16)-1, (int)(R)); }) 2028296417Sdim 2029309124Sdim#define _mm512_mask_add_round_ps(W, U, A, B, R) __extension__ ({ \ 2030309124Sdim (__m512)__builtin_ia32_addps512_mask((__v16sf)(__m512)(A), \ 2031309124Sdim (__v16sf)(__m512)(B), \ 2032309124Sdim (__v16sf)(__m512)(W), (__mmask16)(U), \ 2033309124Sdim (int)(R)); }) 2034296417Sdim 2035309124Sdim#define _mm512_maskz_add_round_ps(U, A, B, R) __extension__ ({ \ 2036309124Sdim (__m512)__builtin_ia32_addps512_mask((__v16sf)(__m512)(A), \ 2037309124Sdim (__v16sf)(__m512)(B), \ 2038309124Sdim (__v16sf)_mm512_setzero_ps(), \ 2039309124Sdim (__mmask16)(U), (int)(R)); }) 2040296417Sdim 2041296417Sdimstatic __inline__ __m128 __DEFAULT_FN_ATTRS 2042296417Sdim_mm_mask_sub_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) { 2043309124Sdim return (__m128) __builtin_ia32_subss_round_mask ((__v4sf) __A, 2044296417Sdim (__v4sf) __B, 2045296417Sdim (__v4sf) __W, 2046296417Sdim (__mmask8) __U, 2047296417Sdim _MM_FROUND_CUR_DIRECTION); 2048296417Sdim} 2049296417Sdim 2050296417Sdimstatic __inline__ __m128 __DEFAULT_FN_ATTRS 2051296417Sdim_mm_maskz_sub_ss(__mmask8 __U,__m128 __A, __m128 __B) { 2052309124Sdim return (__m128) __builtin_ia32_subss_round_mask ((__v4sf) __A, 2053296417Sdim (__v4sf) __B, 2054296417Sdim (__v4sf) _mm_setzero_ps (), 2055296417Sdim (__mmask8) __U, 2056296417Sdim _MM_FROUND_CUR_DIRECTION); 2057296417Sdim} 2058309124Sdim#define _mm_sub_round_ss(A, B, R) __extension__ ({ \ 2059309124Sdim (__m128)__builtin_ia32_subss_round_mask((__v4sf)(__m128)(A), \ 2060309124Sdim (__v4sf)(__m128)(B), \ 2061309124Sdim (__v4sf)_mm_setzero_ps(), \ 2062309124Sdim (__mmask8)-1, (int)(R)); }) 2063296417Sdim 2064309124Sdim#define _mm_mask_sub_round_ss(W, U, A, B, R) __extension__ ({ \ 2065309124Sdim (__m128)__builtin_ia32_subss_round_mask((__v4sf)(__m128)(A), \ 2066309124Sdim (__v4sf)(__m128)(B), \ 2067309124Sdim (__v4sf)(__m128)(W), (__mmask8)(U), \ 2068309124Sdim (int)(R)); }) 2069296417Sdim 2070309124Sdim#define _mm_maskz_sub_round_ss(U, A, B, R) __extension__ ({ \ 2071309124Sdim (__m128)__builtin_ia32_subss_round_mask((__v4sf)(__m128)(A), \ 2072309124Sdim (__v4sf)(__m128)(B), \ 2073309124Sdim (__v4sf)_mm_setzero_ps(), \ 2074309124Sdim (__mmask8)(U), (int)(R)); }) 2075296417Sdim 2076296417Sdimstatic __inline__ __m128d __DEFAULT_FN_ATTRS 2077296417Sdim_mm_mask_sub_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) { 2078309124Sdim return (__m128d) __builtin_ia32_subsd_round_mask ((__v2df) __A, 2079296417Sdim (__v2df) __B, 2080296417Sdim (__v2df) __W, 2081296417Sdim (__mmask8) __U, 2082296417Sdim _MM_FROUND_CUR_DIRECTION); 2083296417Sdim} 2084296417Sdim 2085296417Sdimstatic __inline__ __m128d __DEFAULT_FN_ATTRS 2086296417Sdim_mm_maskz_sub_sd(__mmask8 __U,__m128d __A, __m128d __B) { 2087309124Sdim return (__m128d) __builtin_ia32_subsd_round_mask ((__v2df) __A, 2088296417Sdim (__v2df) __B, 2089296417Sdim (__v2df) _mm_setzero_pd (), 2090296417Sdim (__mmask8) __U, 2091296417Sdim _MM_FROUND_CUR_DIRECTION); 2092296417Sdim} 2093296417Sdim 2094309124Sdim#define _mm_sub_round_sd(A, B, R) __extension__ ({ \ 2095309124Sdim (__m128d)__builtin_ia32_subsd_round_mask((__v2df)(__m128d)(A), \ 2096309124Sdim (__v2df)(__m128d)(B), \ 2097309124Sdim (__v2df)_mm_setzero_pd(), \ 2098309124Sdim (__mmask8)-1, (int)(R)); }) 2099296417Sdim 2100309124Sdim#define _mm_mask_sub_round_sd(W, U, A, B, R) __extension__ ({ \ 2101309124Sdim (__m128d)__builtin_ia32_subsd_round_mask((__v2df)(__m128d)(A), \ 2102309124Sdim (__v2df)(__m128d)(B), \ 2103309124Sdim (__v2df)(__m128d)(W), \ 2104309124Sdim (__mmask8)(U), (int)(R)); }) 2105296417Sdim 2106309124Sdim#define _mm_maskz_sub_round_sd(U, A, B, R) __extension__ ({ \ 2107309124Sdim (__m128d)__builtin_ia32_subsd_round_mask((__v2df)(__m128d)(A), \ 2108309124Sdim (__v2df)(__m128d)(B), \ 2109309124Sdim (__v2df)_mm_setzero_pd(), \ 2110309124Sdim (__mmask8)(U), (int)(R)); }) 2111296417Sdim 2112296417Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS 2113296417Sdim_mm512_mask_sub_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) { 2114314564Sdim return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, 2115314564Sdim (__v8df)_mm512_sub_pd(__A, __B), 2116314564Sdim (__v8df)__W); 2117296417Sdim} 2118296417Sdim 2119296417Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS 2120296417Sdim_mm512_maskz_sub_pd(__mmask8 __U, __m512d __A, __m512d __B) { 2121314564Sdim return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, 2122314564Sdim (__v8df)_mm512_sub_pd(__A, __B), 2123314564Sdim (__v8df)_mm512_setzero_pd()); 2124296417Sdim} 2125296417Sdim 2126296417Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS 2127296417Sdim_mm512_mask_sub_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) { 2128314564Sdim return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, 2129314564Sdim (__v16sf)_mm512_sub_ps(__A, __B), 2130314564Sdim (__v16sf)__W); 2131296417Sdim} 2132296417Sdim 2133296417Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS 2134296417Sdim_mm512_maskz_sub_ps(__mmask16 __U, __m512 __A, __m512 __B) { 2135314564Sdim return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, 2136314564Sdim (__v16sf)_mm512_sub_ps(__A, __B), 2137314564Sdim (__v16sf)_mm512_setzero_ps()); 2138296417Sdim} 2139296417Sdim 2140309124Sdim#define _mm512_sub_round_pd(A, B, R) __extension__ ({ \ 2141309124Sdim (__m512d)__builtin_ia32_subpd512_mask((__v8df)(__m512d)(A), \ 2142309124Sdim (__v8df)(__m512d)(B), \ 2143309124Sdim (__v8df)_mm512_setzero_pd(), \ 2144309124Sdim (__mmask8)-1, (int)(R)); }) 2145296417Sdim 2146309124Sdim#define _mm512_mask_sub_round_pd(W, U, A, B, R) __extension__ ({ \ 2147309124Sdim (__m512d)__builtin_ia32_subpd512_mask((__v8df)(__m512d)(A), \ 2148309124Sdim (__v8df)(__m512d)(B), \ 2149309124Sdim (__v8df)(__m512d)(W), (__mmask8)(U), \ 2150309124Sdim (int)(R)); }) 2151296417Sdim 2152309124Sdim#define _mm512_maskz_sub_round_pd(U, A, B, R) __extension__ ({ \ 2153309124Sdim (__m512d)__builtin_ia32_subpd512_mask((__v8df)(__m512d)(A), \ 2154309124Sdim (__v8df)(__m512d)(B), \ 2155309124Sdim (__v8df)_mm512_setzero_pd(), \ 2156309124Sdim (__mmask8)(U), (int)(R)); }) 2157296417Sdim 2158309124Sdim#define _mm512_sub_round_ps(A, B, R) __extension__ ({ \ 2159309124Sdim (__m512)__builtin_ia32_subps512_mask((__v16sf)(__m512)(A), \ 2160309124Sdim (__v16sf)(__m512)(B), \ 2161309124Sdim (__v16sf)_mm512_setzero_ps(), \ 2162309124Sdim (__mmask16)-1, (int)(R)); }) 2163296417Sdim 2164309124Sdim#define _mm512_mask_sub_round_ps(W, U, A, B, R) __extension__ ({ \ 2165309124Sdim (__m512)__builtin_ia32_subps512_mask((__v16sf)(__m512)(A), \ 2166309124Sdim (__v16sf)(__m512)(B), \ 2167309124Sdim (__v16sf)(__m512)(W), (__mmask16)(U), \ 2168309124Sdim (int)(R)); }); 2169296417Sdim 2170309124Sdim#define _mm512_maskz_sub_round_ps(U, A, B, R) __extension__ ({ \ 2171309124Sdim (__m512)__builtin_ia32_subps512_mask((__v16sf)(__m512)(A), \ 2172309124Sdim (__v16sf)(__m512)(B), \ 2173309124Sdim (__v16sf)_mm512_setzero_ps(), \ 2174309124Sdim (__mmask16)(U), (int)(R)); }); 2175296417Sdim 2176296417Sdimstatic __inline__ __m128 __DEFAULT_FN_ATTRS 2177296417Sdim_mm_mask_mul_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) { 2178309124Sdim return (__m128) __builtin_ia32_mulss_round_mask ((__v4sf) __A, 2179296417Sdim (__v4sf) __B, 2180296417Sdim (__v4sf) __W, 2181296417Sdim (__mmask8) __U, 2182296417Sdim _MM_FROUND_CUR_DIRECTION); 2183296417Sdim} 2184296417Sdim 2185296417Sdimstatic __inline__ __m128 __DEFAULT_FN_ATTRS 2186296417Sdim_mm_maskz_mul_ss(__mmask8 __U,__m128 __A, __m128 __B) { 2187309124Sdim return (__m128) __builtin_ia32_mulss_round_mask ((__v4sf) __A, 2188296417Sdim (__v4sf) __B, 2189296417Sdim (__v4sf) _mm_setzero_ps (), 2190296417Sdim (__mmask8) __U, 2191296417Sdim _MM_FROUND_CUR_DIRECTION); 2192296417Sdim} 2193309124Sdim#define _mm_mul_round_ss(A, B, R) __extension__ ({ \ 2194309124Sdim (__m128)__builtin_ia32_mulss_round_mask((__v4sf)(__m128)(A), \ 2195309124Sdim (__v4sf)(__m128)(B), \ 2196309124Sdim (__v4sf)_mm_setzero_ps(), \ 2197309124Sdim (__mmask8)-1, (int)(R)); }) 2198296417Sdim 2199309124Sdim#define _mm_mask_mul_round_ss(W, U, A, B, R) __extension__ ({ \ 2200309124Sdim (__m128)__builtin_ia32_mulss_round_mask((__v4sf)(__m128)(A), \ 2201309124Sdim (__v4sf)(__m128)(B), \ 2202309124Sdim (__v4sf)(__m128)(W), (__mmask8)(U), \ 2203309124Sdim (int)(R)); }) 2204296417Sdim 2205309124Sdim#define _mm_maskz_mul_round_ss(U, A, B, R) __extension__ ({ \ 2206309124Sdim (__m128)__builtin_ia32_mulss_round_mask((__v4sf)(__m128)(A), \ 2207309124Sdim (__v4sf)(__m128)(B), \ 2208309124Sdim (__v4sf)_mm_setzero_ps(), \ 2209309124Sdim (__mmask8)(U), (int)(R)); }) 2210296417Sdim 2211296417Sdimstatic __inline__ __m128d __DEFAULT_FN_ATTRS 2212296417Sdim_mm_mask_mul_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) { 2213309124Sdim return (__m128d) __builtin_ia32_mulsd_round_mask ((__v2df) __A, 2214296417Sdim (__v2df) __B, 2215296417Sdim (__v2df) __W, 2216296417Sdim (__mmask8) __U, 2217296417Sdim _MM_FROUND_CUR_DIRECTION); 2218296417Sdim} 2219296417Sdim 2220296417Sdimstatic __inline__ __m128d __DEFAULT_FN_ATTRS 2221296417Sdim_mm_maskz_mul_sd(__mmask8 __U,__m128d __A, __m128d __B) { 2222309124Sdim return (__m128d) __builtin_ia32_mulsd_round_mask ((__v2df) __A, 2223296417Sdim (__v2df) __B, 2224296417Sdim (__v2df) _mm_setzero_pd (), 2225296417Sdim (__mmask8) __U, 2226296417Sdim _MM_FROUND_CUR_DIRECTION); 2227296417Sdim} 2228296417Sdim 2229309124Sdim#define _mm_mul_round_sd(A, B, R) __extension__ ({ \ 2230309124Sdim (__m128d)__builtin_ia32_mulsd_round_mask((__v2df)(__m128d)(A), \ 2231309124Sdim (__v2df)(__m128d)(B), \ 2232309124Sdim (__v2df)_mm_setzero_pd(), \ 2233309124Sdim (__mmask8)-1, (int)(R)); }) 2234296417Sdim 2235309124Sdim#define _mm_mask_mul_round_sd(W, U, A, B, R) __extension__ ({ \ 2236309124Sdim (__m128d)__builtin_ia32_mulsd_round_mask((__v2df)(__m128d)(A), \ 2237309124Sdim (__v2df)(__m128d)(B), \ 2238309124Sdim (__v2df)(__m128d)(W), \ 2239309124Sdim (__mmask8)(U), (int)(R)); }) 2240296417Sdim 2241309124Sdim#define _mm_maskz_mul_round_sd(U, A, B, R) __extension__ ({ \ 2242309124Sdim (__m128d)__builtin_ia32_mulsd_round_mask((__v2df)(__m128d)(A), \ 2243309124Sdim (__v2df)(__m128d)(B), \ 2244309124Sdim (__v2df)_mm_setzero_pd(), \ 2245309124Sdim (__mmask8)(U), (int)(R)); }) 2246296417Sdim 2247296417Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS 2248296417Sdim_mm512_mask_mul_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) { 2249314564Sdim return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, 2250314564Sdim (__v8df)_mm512_mul_pd(__A, __B), 2251314564Sdim (__v8df)__W); 2252296417Sdim} 2253296417Sdim 2254296417Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS 2255296417Sdim_mm512_maskz_mul_pd(__mmask8 __U, __m512d __A, __m512d __B) { 2256314564Sdim return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, 2257314564Sdim (__v8df)_mm512_mul_pd(__A, __B), 2258314564Sdim (__v8df)_mm512_setzero_pd()); 2259296417Sdim} 2260296417Sdim 2261296417Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS 2262296417Sdim_mm512_mask_mul_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) { 2263314564Sdim return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, 2264314564Sdim (__v16sf)_mm512_mul_ps(__A, __B), 2265314564Sdim (__v16sf)__W); 2266296417Sdim} 2267296417Sdim 2268296417Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS 2269296417Sdim_mm512_maskz_mul_ps(__mmask16 __U, __m512 __A, __m512 __B) { 2270314564Sdim return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, 2271314564Sdim (__v16sf)_mm512_mul_ps(__A, __B), 2272314564Sdim (__v16sf)_mm512_setzero_ps()); 2273296417Sdim} 2274296417Sdim 2275309124Sdim#define _mm512_mul_round_pd(A, B, R) __extension__ ({ \ 2276309124Sdim (__m512d)__builtin_ia32_mulpd512_mask((__v8df)(__m512d)(A), \ 2277309124Sdim (__v8df)(__m512d)(B), \ 2278309124Sdim (__v8df)_mm512_setzero_pd(), \ 2279309124Sdim (__mmask8)-1, (int)(R)); }) 2280296417Sdim 2281309124Sdim#define _mm512_mask_mul_round_pd(W, U, A, B, R) __extension__ ({ \ 2282309124Sdim (__m512d)__builtin_ia32_mulpd512_mask((__v8df)(__m512d)(A), \ 2283309124Sdim (__v8df)(__m512d)(B), \ 2284309124Sdim (__v8df)(__m512d)(W), (__mmask8)(U), \ 2285309124Sdim (int)(R)); }) 2286296417Sdim 2287309124Sdim#define _mm512_maskz_mul_round_pd(U, A, B, R) __extension__ ({ \ 2288309124Sdim (__m512d)__builtin_ia32_mulpd512_mask((__v8df)(__m512d)(A), \ 2289309124Sdim (__v8df)(__m512d)(B), \ 2290309124Sdim (__v8df)_mm512_setzero_pd(), \ 2291309124Sdim (__mmask8)(U), (int)(R)); }) 2292296417Sdim 2293309124Sdim#define _mm512_mul_round_ps(A, B, R) __extension__ ({ \ 2294309124Sdim (__m512)__builtin_ia32_mulps512_mask((__v16sf)(__m512)(A), \ 2295309124Sdim (__v16sf)(__m512)(B), \ 2296309124Sdim (__v16sf)_mm512_setzero_ps(), \ 2297309124Sdim (__mmask16)-1, (int)(R)); }) 2298296417Sdim 2299309124Sdim#define _mm512_mask_mul_round_ps(W, U, A, B, R) __extension__ ({ \ 2300309124Sdim (__m512)__builtin_ia32_mulps512_mask((__v16sf)(__m512)(A), \ 2301309124Sdim (__v16sf)(__m512)(B), \ 2302309124Sdim (__v16sf)(__m512)(W), (__mmask16)(U), \ 2303309124Sdim (int)(R)); }); 2304296417Sdim 2305309124Sdim#define _mm512_maskz_mul_round_ps(U, A, B, R) __extension__ ({ \ 2306309124Sdim (__m512)__builtin_ia32_mulps512_mask((__v16sf)(__m512)(A), \ 2307309124Sdim (__v16sf)(__m512)(B), \ 2308309124Sdim (__v16sf)_mm512_setzero_ps(), \ 2309309124Sdim (__mmask16)(U), (int)(R)); }); 2310296417Sdim 2311296417Sdimstatic __inline__ __m128 __DEFAULT_FN_ATTRS 2312296417Sdim_mm_mask_div_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) { 2313309124Sdim return (__m128) __builtin_ia32_divss_round_mask ((__v4sf) __A, 2314296417Sdim (__v4sf) __B, 2315296417Sdim (__v4sf) __W, 2316296417Sdim (__mmask8) __U, 2317296417Sdim _MM_FROUND_CUR_DIRECTION); 2318296417Sdim} 2319296417Sdim 2320296417Sdimstatic __inline__ __m128 __DEFAULT_FN_ATTRS 2321296417Sdim_mm_maskz_div_ss(__mmask8 __U,__m128 __A, __m128 __B) { 2322309124Sdim return (__m128) __builtin_ia32_divss_round_mask ((__v4sf) __A, 2323296417Sdim (__v4sf) __B, 2324296417Sdim (__v4sf) _mm_setzero_ps (), 2325296417Sdim (__mmask8) __U, 2326296417Sdim _MM_FROUND_CUR_DIRECTION); 2327296417Sdim} 2328296417Sdim 2329309124Sdim#define _mm_div_round_ss(A, B, R) __extension__ ({ \ 2330309124Sdim (__m128)__builtin_ia32_divss_round_mask((__v4sf)(__m128)(A), \ 2331309124Sdim (__v4sf)(__m128)(B), \ 2332309124Sdim (__v4sf)_mm_setzero_ps(), \ 2333309124Sdim (__mmask8)-1, (int)(R)); }) 2334296417Sdim 2335309124Sdim#define _mm_mask_div_round_ss(W, U, A, B, R) __extension__ ({ \ 2336309124Sdim (__m128)__builtin_ia32_divss_round_mask((__v4sf)(__m128)(A), \ 2337309124Sdim (__v4sf)(__m128)(B), \ 2338309124Sdim (__v4sf)(__m128)(W), (__mmask8)(U), \ 2339309124Sdim (int)(R)); }) 2340296417Sdim 2341309124Sdim#define _mm_maskz_div_round_ss(U, A, B, R) __extension__ ({ \ 2342309124Sdim (__m128)__builtin_ia32_divss_round_mask((__v4sf)(__m128)(A), \ 2343309124Sdim (__v4sf)(__m128)(B), \ 2344309124Sdim (__v4sf)_mm_setzero_ps(), \ 2345309124Sdim (__mmask8)(U), (int)(R)); }) 2346296417Sdim 2347296417Sdimstatic __inline__ __m128d __DEFAULT_FN_ATTRS 2348296417Sdim_mm_mask_div_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) { 2349309124Sdim return (__m128d) __builtin_ia32_divsd_round_mask ((__v2df) __A, 2350296417Sdim (__v2df) __B, 2351296417Sdim (__v2df) __W, 2352296417Sdim (__mmask8) __U, 2353296417Sdim _MM_FROUND_CUR_DIRECTION); 2354296417Sdim} 2355296417Sdim 2356296417Sdimstatic __inline__ __m128d __DEFAULT_FN_ATTRS 2357296417Sdim_mm_maskz_div_sd(__mmask8 __U,__m128d __A, __m128d __B) { 2358309124Sdim return (__m128d) __builtin_ia32_divsd_round_mask ((__v2df) __A, 2359296417Sdim (__v2df) __B, 2360296417Sdim (__v2df) _mm_setzero_pd (), 2361296417Sdim (__mmask8) __U, 2362296417Sdim _MM_FROUND_CUR_DIRECTION); 2363296417Sdim} 2364296417Sdim 2365309124Sdim#define _mm_div_round_sd(A, B, R) __extension__ ({ \ 2366309124Sdim (__m128d)__builtin_ia32_divsd_round_mask((__v2df)(__m128d)(A), \ 2367309124Sdim (__v2df)(__m128d)(B), \ 2368309124Sdim (__v2df)_mm_setzero_pd(), \ 2369309124Sdim (__mmask8)-1, (int)(R)); }) 2370296417Sdim 2371309124Sdim#define _mm_mask_div_round_sd(W, U, A, B, R) __extension__ ({ \ 2372309124Sdim (__m128d)__builtin_ia32_divsd_round_mask((__v2df)(__m128d)(A), \ 2373309124Sdim (__v2df)(__m128d)(B), \ 2374309124Sdim (__v2df)(__m128d)(W), \ 2375309124Sdim (__mmask8)(U), (int)(R)); }) 2376296417Sdim 2377309124Sdim#define _mm_maskz_div_round_sd(U, A, B, R) __extension__ ({ \ 2378309124Sdim (__m128d)__builtin_ia32_divsd_round_mask((__v2df)(__m128d)(A), \ 2379309124Sdim (__v2df)(__m128d)(B), \ 2380309124Sdim (__v2df)_mm_setzero_pd(), \ 2381309124Sdim (__mmask8)(U), (int)(R)); }) 2382296417Sdim 2383309124Sdimstatic __inline __m512d __DEFAULT_FN_ATTRS 2384309124Sdim_mm512_div_pd(__m512d __a, __m512d __b) 2385309124Sdim{ 2386309124Sdim return (__m512d)((__v8df)__a/(__v8df)__b); 2387309124Sdim} 2388309124Sdim 2389296417Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS 2390296417Sdim_mm512_mask_div_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) { 2391314564Sdim return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, 2392314564Sdim (__v8df)_mm512_div_pd(__A, __B), 2393314564Sdim (__v8df)__W); 2394296417Sdim} 2395296417Sdim 2396296417Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS 2397296417Sdim_mm512_maskz_div_pd(__mmask8 __U, __m512d __A, __m512d __B) { 2398314564Sdim return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, 2399314564Sdim (__v8df)_mm512_div_pd(__A, __B), 2400314564Sdim (__v8df)_mm512_setzero_pd()); 2401296417Sdim} 2402296417Sdim 2403309124Sdimstatic __inline __m512 __DEFAULT_FN_ATTRS 2404309124Sdim_mm512_div_ps(__m512 __a, __m512 __b) 2405309124Sdim{ 2406309124Sdim return (__m512)((__v16sf)__a/(__v16sf)__b); 2407309124Sdim} 2408309124Sdim 2409296417Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS 2410296417Sdim_mm512_mask_div_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) { 2411314564Sdim return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, 2412314564Sdim (__v16sf)_mm512_div_ps(__A, __B), 2413314564Sdim (__v16sf)__W); 2414296417Sdim} 2415296417Sdim 2416296417Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS 2417296417Sdim_mm512_maskz_div_ps(__mmask16 __U, __m512 __A, __m512 __B) { 2418314564Sdim return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, 2419314564Sdim (__v16sf)_mm512_div_ps(__A, __B), 2420314564Sdim (__v16sf)_mm512_setzero_ps()); 2421296417Sdim} 2422296417Sdim 2423309124Sdim#define _mm512_div_round_pd(A, B, R) __extension__ ({ \ 2424309124Sdim (__m512d)__builtin_ia32_divpd512_mask((__v8df)(__m512d)(A), \ 2425309124Sdim (__v8df)(__m512d)(B), \ 2426309124Sdim (__v8df)_mm512_setzero_pd(), \ 2427309124Sdim (__mmask8)-1, (int)(R)); }) 2428296417Sdim 2429309124Sdim#define _mm512_mask_div_round_pd(W, U, A, B, R) __extension__ ({ \ 2430309124Sdim (__m512d)__builtin_ia32_divpd512_mask((__v8df)(__m512d)(A), \ 2431309124Sdim (__v8df)(__m512d)(B), \ 2432309124Sdim (__v8df)(__m512d)(W), (__mmask8)(U), \ 2433309124Sdim (int)(R)); }) 2434296417Sdim 2435309124Sdim#define _mm512_maskz_div_round_pd(U, A, B, R) __extension__ ({ \ 2436309124Sdim (__m512d)__builtin_ia32_divpd512_mask((__v8df)(__m512d)(A), \ 2437309124Sdim (__v8df)(__m512d)(B), \ 2438309124Sdim (__v8df)_mm512_setzero_pd(), \ 2439309124Sdim (__mmask8)(U), (int)(R)); }) 2440296417Sdim 2441309124Sdim#define _mm512_div_round_ps(A, B, R) __extension__ ({ \ 2442309124Sdim (__m512)__builtin_ia32_divps512_mask((__v16sf)(__m512)(A), \ 2443309124Sdim (__v16sf)(__m512)(B), \ 2444309124Sdim (__v16sf)_mm512_setzero_ps(), \ 2445309124Sdim (__mmask16)-1, (int)(R)); }) 2446296417Sdim 2447309124Sdim#define _mm512_mask_div_round_ps(W, U, A, B, R) __extension__ ({ \ 2448309124Sdim (__m512)__builtin_ia32_divps512_mask((__v16sf)(__m512)(A), \ 2449309124Sdim (__v16sf)(__m512)(B), \ 2450309124Sdim (__v16sf)(__m512)(W), (__mmask16)(U), \ 2451309124Sdim (int)(R)); }); 2452296417Sdim 2453309124Sdim#define _mm512_maskz_div_round_ps(U, A, B, R) __extension__ ({ \ 2454309124Sdim (__m512)__builtin_ia32_divps512_mask((__v16sf)(__m512)(A), \ 2455309124Sdim (__v16sf)(__m512)(B), \ 2456309124Sdim (__v16sf)_mm512_setzero_ps(), \ 2457309124Sdim (__mmask16)(U), (int)(R)); }); 2458296417Sdim 2459288943Sdim#define _mm512_roundscale_ps(A, B) __extension__ ({ \ 2460309124Sdim (__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(A), (int)(B), \ 2461309124Sdim (__v16sf)(__m512)(A), (__mmask16)-1, \ 2462309124Sdim _MM_FROUND_CUR_DIRECTION); }) 2463288943Sdim 2464309124Sdim#define _mm512_mask_roundscale_ps(A, B, C, imm) __extension__ ({\ 2465309124Sdim (__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(C), (int)(imm), \ 2466309124Sdim (__v16sf)(__m512)(A), (__mmask16)(B), \ 2467309124Sdim _MM_FROUND_CUR_DIRECTION); }) 2468309124Sdim 2469309124Sdim#define _mm512_maskz_roundscale_ps(A, B, imm) __extension__ ({\ 2470309124Sdim (__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(B), (int)(imm), \ 2471309124Sdim (__v16sf)_mm512_setzero_ps(), \ 2472309124Sdim (__mmask16)(A), \ 2473309124Sdim _MM_FROUND_CUR_DIRECTION); }) 2474309124Sdim 2475309124Sdim#define _mm512_mask_roundscale_round_ps(A, B, C, imm, R) __extension__ ({ \ 2476309124Sdim (__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(C), (int)(imm), \ 2477309124Sdim (__v16sf)(__m512)(A), (__mmask16)(B), \ 2478309124Sdim (int)(R)); }) 2479309124Sdim 2480309124Sdim#define _mm512_maskz_roundscale_round_ps(A, B, imm, R) __extension__ ({ \ 2481309124Sdim (__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(B), (int)(imm), \ 2482309124Sdim (__v16sf)_mm512_setzero_ps(), \ 2483309124Sdim (__mmask16)(A), (int)(R)); }) 2484309124Sdim 2485309124Sdim#define _mm512_roundscale_round_ps(A, imm, R) __extension__ ({ \ 2486309124Sdim (__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(A), (int)(imm), \ 2487309124Sdim (__v16sf)_mm512_undefined_ps(), \ 2488309124Sdim (__mmask16)-1, (int)(R)); }) 2489309124Sdim 2490288943Sdim#define _mm512_roundscale_pd(A, B) __extension__ ({ \ 2491309124Sdim (__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(A), (int)(B), \ 2492309124Sdim (__v8df)(__m512d)(A), (__mmask8)-1, \ 2493309124Sdim _MM_FROUND_CUR_DIRECTION); }) 2494288943Sdim 2495309124Sdim#define _mm512_mask_roundscale_pd(A, B, C, imm) __extension__ ({\ 2496309124Sdim (__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(C), (int)(imm), \ 2497309124Sdim (__v8df)(__m512d)(A), (__mmask8)(B), \ 2498309124Sdim _MM_FROUND_CUR_DIRECTION); }) 2499309124Sdim 2500309124Sdim#define _mm512_maskz_roundscale_pd(A, B, imm) __extension__ ({\ 2501309124Sdim (__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(B), (int)(imm), \ 2502309124Sdim (__v8df)_mm512_setzero_pd(), \ 2503309124Sdim (__mmask8)(A), \ 2504309124Sdim _MM_FROUND_CUR_DIRECTION); }) 2505309124Sdim 2506309124Sdim#define _mm512_mask_roundscale_round_pd(A, B, C, imm, R) __extension__ ({ \ 2507309124Sdim (__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(C), (int)(imm), \ 2508309124Sdim (__v8df)(__m512d)(A), (__mmask8)(B), \ 2509309124Sdim (int)(R)); }) 2510309124Sdim 2511309124Sdim#define _mm512_maskz_roundscale_round_pd(A, B, imm, R) __extension__ ({ \ 2512309124Sdim (__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(B), (int)(imm), \ 2513309124Sdim (__v8df)_mm512_setzero_pd(), \ 2514309124Sdim (__mmask8)(A), (int)(R)); }) 2515309124Sdim 2516309124Sdim#define _mm512_roundscale_round_pd(A, imm, R) __extension__ ({ \ 2517309124Sdim (__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(A), (int)(imm), \ 2518309124Sdim (__v8df)_mm512_undefined_pd(), \ 2519309124Sdim (__mmask8)-1, (int)(R)); }) 2520309124Sdim 2521288943Sdim#define _mm512_fmadd_round_pd(A, B, C, R) __extension__ ({ \ 2522309124Sdim (__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \ 2523309124Sdim (__v8df)(__m512d)(B), \ 2524309124Sdim (__v8df)(__m512d)(C), (__mmask8)-1, \ 2525309124Sdim (int)(R)); }) 2526288943Sdim 2527288943Sdim 2528288943Sdim#define _mm512_mask_fmadd_round_pd(A, U, B, C, R) __extension__ ({ \ 2529309124Sdim (__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \ 2530309124Sdim (__v8df)(__m512d)(B), \ 2531309124Sdim (__v8df)(__m512d)(C), \ 2532309124Sdim (__mmask8)(U), (int)(R)); }) 2533288943Sdim 2534288943Sdim 2535288943Sdim#define _mm512_mask3_fmadd_round_pd(A, B, C, U, R) __extension__ ({ \ 2536309124Sdim (__m512d)__builtin_ia32_vfmaddpd512_mask3((__v8df)(__m512d)(A), \ 2537309124Sdim (__v8df)(__m512d)(B), \ 2538309124Sdim (__v8df)(__m512d)(C), \ 2539309124Sdim (__mmask8)(U), (int)(R)); }) 2540288943Sdim 2541288943Sdim 2542288943Sdim#define _mm512_maskz_fmadd_round_pd(U, A, B, C, R) __extension__ ({ \ 2543309124Sdim (__m512d)__builtin_ia32_vfmaddpd512_maskz((__v8df)(__m512d)(A), \ 2544309124Sdim (__v8df)(__m512d)(B), \ 2545309124Sdim (__v8df)(__m512d)(C), \ 2546309124Sdim (__mmask8)(U), (int)(R)); }) 2547288943Sdim 2548288943Sdim 2549288943Sdim#define _mm512_fmsub_round_pd(A, B, C, R) __extension__ ({ \ 2550309124Sdim (__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \ 2551309124Sdim (__v8df)(__m512d)(B), \ 2552309124Sdim -(__v8df)(__m512d)(C), \ 2553309124Sdim (__mmask8)-1, (int)(R)); }) 2554288943Sdim 2555288943Sdim 2556288943Sdim#define _mm512_mask_fmsub_round_pd(A, U, B, C, R) __extension__ ({ \ 2557309124Sdim (__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \ 2558309124Sdim (__v8df)(__m512d)(B), \ 2559309124Sdim -(__v8df)(__m512d)(C), \ 2560309124Sdim (__mmask8)(U), (int)(R)); }) 2561288943Sdim 2562288943Sdim 2563288943Sdim#define _mm512_maskz_fmsub_round_pd(U, A, B, C, R) __extension__ ({ \ 2564309124Sdim (__m512d)__builtin_ia32_vfmaddpd512_maskz((__v8df)(__m512d)(A), \ 2565309124Sdim (__v8df)(__m512d)(B), \ 2566309124Sdim -(__v8df)(__m512d)(C), \ 2567309124Sdim (__mmask8)(U), (int)(R)); }) 2568288943Sdim 2569288943Sdim 2570288943Sdim#define _mm512_fnmadd_round_pd(A, B, C, R) __extension__ ({ \ 2571309124Sdim (__m512d)__builtin_ia32_vfmaddpd512_mask(-(__v8df)(__m512d)(A), \ 2572309124Sdim (__v8df)(__m512d)(B), \ 2573309124Sdim (__v8df)(__m512d)(C), (__mmask8)-1, \ 2574309124Sdim (int)(R)); }) 2575288943Sdim 2576288943Sdim 2577288943Sdim#define _mm512_mask3_fnmadd_round_pd(A, B, C, U, R) __extension__ ({ \ 2578309124Sdim (__m512d)__builtin_ia32_vfmaddpd512_mask3(-(__v8df)(__m512d)(A), \ 2579309124Sdim (__v8df)(__m512d)(B), \ 2580309124Sdim (__v8df)(__m512d)(C), \ 2581309124Sdim (__mmask8)(U), (int)(R)); }) 2582288943Sdim 2583288943Sdim 2584288943Sdim#define _mm512_maskz_fnmadd_round_pd(U, A, B, C, R) __extension__ ({ \ 2585309124Sdim (__m512d)__builtin_ia32_vfmaddpd512_maskz(-(__v8df)(__m512d)(A), \ 2586309124Sdim (__v8df)(__m512d)(B), \ 2587309124Sdim (__v8df)(__m512d)(C), \ 2588309124Sdim (__mmask8)(U), (int)(R)); }) 2589288943Sdim 2590288943Sdim 2591288943Sdim#define _mm512_fnmsub_round_pd(A, B, C, R) __extension__ ({ \ 2592309124Sdim (__m512d)__builtin_ia32_vfmaddpd512_mask(-(__v8df)(__m512d)(A), \ 2593309124Sdim (__v8df)(__m512d)(B), \ 2594309124Sdim -(__v8df)(__m512d)(C), \ 2595309124Sdim (__mmask8)-1, (int)(R)); }) 2596288943Sdim 2597288943Sdim 2598288943Sdim#define _mm512_maskz_fnmsub_round_pd(U, A, B, C, R) __extension__ ({ \ 2599309124Sdim (__m512d)__builtin_ia32_vfmaddpd512_maskz(-(__v8df)(__m512d)(A), \ 2600309124Sdim (__v8df)(__m512d)(B), \ 2601309124Sdim -(__v8df)(__m512d)(C), \ 2602309124Sdim (__mmask8)(U), (int)(R)); }) 2603288943Sdim 2604288943Sdim 2605288943Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS 2606288943Sdim_mm512_fmadd_pd(__m512d __A, __m512d __B, __m512d __C) 2607277325Sdim{ 2608288943Sdim return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A, 2609288943Sdim (__v8df) __B, 2610288943Sdim (__v8df) __C, 2611288943Sdim (__mmask8) -1, 2612288943Sdim _MM_FROUND_CUR_DIRECTION); 2613277325Sdim} 2614288943Sdim 2615288943Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS 2616288943Sdim_mm512_mask_fmadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C) 2617277325Sdim{ 2618288943Sdim return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A, 2619288943Sdim (__v8df) __B, 2620288943Sdim (__v8df) __C, 2621288943Sdim (__mmask8) __U, 2622288943Sdim _MM_FROUND_CUR_DIRECTION); 2623277325Sdim} 2624277325Sdim 2625288943Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS 2626288943Sdim_mm512_mask3_fmadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U) 2627277325Sdim{ 2628288943Sdim return (__m512d) __builtin_ia32_vfmaddpd512_mask3 ((__v8df) __A, 2629288943Sdim (__v8df) __B, 2630288943Sdim (__v8df) __C, 2631288943Sdim (__mmask8) __U, 2632288943Sdim _MM_FROUND_CUR_DIRECTION); 2633277325Sdim} 2634277325Sdim 2635288943Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS 2636288943Sdim_mm512_maskz_fmadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C) 2637288943Sdim{ 2638288943Sdim return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A, 2639288943Sdim (__v8df) __B, 2640288943Sdim (__v8df) __C, 2641288943Sdim (__mmask8) __U, 2642288943Sdim _MM_FROUND_CUR_DIRECTION); 2643288943Sdim} 2644288943Sdim 2645288943Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS 2646277325Sdim_mm512_fmsub_pd(__m512d __A, __m512d __B, __m512d __C) 2647277325Sdim{ 2648288943Sdim return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A, 2649288943Sdim (__v8df) __B, 2650288943Sdim -(__v8df) __C, 2651288943Sdim (__mmask8) -1, 2652288943Sdim _MM_FROUND_CUR_DIRECTION); 2653277325Sdim} 2654277325Sdim 2655288943Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS 2656288943Sdim_mm512_mask_fmsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C) 2657288943Sdim{ 2658288943Sdim return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A, 2659288943Sdim (__v8df) __B, 2660288943Sdim -(__v8df) __C, 2661288943Sdim (__mmask8) __U, 2662288943Sdim _MM_FROUND_CUR_DIRECTION); 2663288943Sdim} 2664288943Sdim 2665288943Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS 2666288943Sdim_mm512_maskz_fmsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C) 2667288943Sdim{ 2668288943Sdim return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A, 2669288943Sdim (__v8df) __B, 2670288943Sdim -(__v8df) __C, 2671288943Sdim (__mmask8) __U, 2672288943Sdim _MM_FROUND_CUR_DIRECTION); 2673288943Sdim} 2674288943Sdim 2675288943Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS 2676277325Sdim_mm512_fnmadd_pd(__m512d __A, __m512d __B, __m512d __C) 2677277325Sdim{ 2678288943Sdim return (__m512d) __builtin_ia32_vfmaddpd512_mask (-(__v8df) __A, 2679288943Sdim (__v8df) __B, 2680288943Sdim (__v8df) __C, 2681288943Sdim (__mmask8) -1, 2682288943Sdim _MM_FROUND_CUR_DIRECTION); 2683277325Sdim} 2684277325Sdim 2685288943Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS 2686288943Sdim_mm512_mask3_fnmadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U) 2687288943Sdim{ 2688288943Sdim return (__m512d) __builtin_ia32_vfmaddpd512_mask3 (-(__v8df) __A, 2689288943Sdim (__v8df) __B, 2690288943Sdim (__v8df) __C, 2691288943Sdim (__mmask8) __U, 2692288943Sdim _MM_FROUND_CUR_DIRECTION); 2693288943Sdim} 2694288943Sdim 2695288943Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS 2696288943Sdim_mm512_maskz_fnmadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C) 2697288943Sdim{ 2698288943Sdim return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A, 2699288943Sdim (__v8df) __B, 2700288943Sdim (__v8df) __C, 2701288943Sdim (__mmask8) __U, 2702288943Sdim _MM_FROUND_CUR_DIRECTION); 2703288943Sdim} 2704288943Sdim 2705288943Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS 2706288943Sdim_mm512_fnmsub_pd(__m512d __A, __m512d __B, __m512d __C) 2707288943Sdim{ 2708288943Sdim return (__m512d) __builtin_ia32_vfmaddpd512_mask (-(__v8df) __A, 2709288943Sdim (__v8df) __B, 2710288943Sdim -(__v8df) __C, 2711288943Sdim (__mmask8) -1, 2712288943Sdim _MM_FROUND_CUR_DIRECTION); 2713288943Sdim} 2714288943Sdim 2715288943Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS 2716288943Sdim_mm512_maskz_fnmsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C) 2717288943Sdim{ 2718288943Sdim return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A, 2719288943Sdim (__v8df) __B, 2720288943Sdim -(__v8df) __C, 2721288943Sdim (__mmask8) __U, 2722288943Sdim _MM_FROUND_CUR_DIRECTION); 2723288943Sdim} 2724288943Sdim 2725288943Sdim#define _mm512_fmadd_round_ps(A, B, C, R) __extension__ ({ \ 2726309124Sdim (__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \ 2727309124Sdim (__v16sf)(__m512)(B), \ 2728309124Sdim (__v16sf)(__m512)(C), (__mmask16)-1, \ 2729309124Sdim (int)(R)); }) 2730288943Sdim 2731288943Sdim 2732288943Sdim#define _mm512_mask_fmadd_round_ps(A, U, B, C, R) __extension__ ({ \ 2733309124Sdim (__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \ 2734309124Sdim (__v16sf)(__m512)(B), \ 2735309124Sdim (__v16sf)(__m512)(C), \ 2736309124Sdim (__mmask16)(U), (int)(R)); }) 2737288943Sdim 2738288943Sdim 2739288943Sdim#define _mm512_mask3_fmadd_round_ps(A, B, C, U, R) __extension__ ({ \ 2740309124Sdim (__m512)__builtin_ia32_vfmaddps512_mask3((__v16sf)(__m512)(A), \ 2741309124Sdim (__v16sf)(__m512)(B), \ 2742309124Sdim (__v16sf)(__m512)(C), \ 2743309124Sdim (__mmask16)(U), (int)(R)); }) 2744288943Sdim 2745288943Sdim 2746288943Sdim#define _mm512_maskz_fmadd_round_ps(U, A, B, C, R) __extension__ ({ \ 2747309124Sdim (__m512)__builtin_ia32_vfmaddps512_maskz((__v16sf)(__m512)(A), \ 2748309124Sdim (__v16sf)(__m512)(B), \ 2749309124Sdim (__v16sf)(__m512)(C), \ 2750309124Sdim (__mmask16)(U), (int)(R)); }) 2751288943Sdim 2752288943Sdim 2753288943Sdim#define _mm512_fmsub_round_ps(A, B, C, R) __extension__ ({ \ 2754309124Sdim (__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \ 2755309124Sdim (__v16sf)(__m512)(B), \ 2756309124Sdim -(__v16sf)(__m512)(C), \ 2757309124Sdim (__mmask16)-1, (int)(R)); }) 2758288943Sdim 2759288943Sdim 2760288943Sdim#define _mm512_mask_fmsub_round_ps(A, U, B, C, R) __extension__ ({ \ 2761309124Sdim (__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \ 2762309124Sdim (__v16sf)(__m512)(B), \ 2763309124Sdim -(__v16sf)(__m512)(C), \ 2764309124Sdim (__mmask16)(U), (int)(R)); }) 2765288943Sdim 2766288943Sdim 2767288943Sdim#define _mm512_maskz_fmsub_round_ps(U, A, B, C, R) __extension__ ({ \ 2768309124Sdim (__m512)__builtin_ia32_vfmaddps512_maskz((__v16sf)(__m512)(A), \ 2769309124Sdim (__v16sf)(__m512)(B), \ 2770309124Sdim -(__v16sf)(__m512)(C), \ 2771309124Sdim (__mmask16)(U), (int)(R)); }) 2772288943Sdim 2773288943Sdim 2774288943Sdim#define _mm512_fnmadd_round_ps(A, B, C, R) __extension__ ({ \ 2775309124Sdim (__m512)__builtin_ia32_vfmaddps512_mask(-(__v16sf)(__m512)(A), \ 2776309124Sdim (__v16sf)(__m512)(B), \ 2777309124Sdim (__v16sf)(__m512)(C), (__mmask16)-1, \ 2778309124Sdim (int)(R)); }) 2779288943Sdim 2780288943Sdim 2781288943Sdim#define _mm512_mask3_fnmadd_round_ps(A, B, C, U, R) __extension__ ({ \ 2782309124Sdim (__m512)__builtin_ia32_vfmaddps512_mask3(-(__v16sf)(__m512)(A), \ 2783309124Sdim (__v16sf)(__m512)(B), \ 2784309124Sdim (__v16sf)(__m512)(C), \ 2785309124Sdim (__mmask16)(U), (int)(R)); }) 2786288943Sdim 2787288943Sdim 2788288943Sdim#define _mm512_maskz_fnmadd_round_ps(U, A, B, C, R) __extension__ ({ \ 2789309124Sdim (__m512)__builtin_ia32_vfmaddps512_maskz(-(__v16sf)(__m512)(A), \ 2790309124Sdim (__v16sf)(__m512)(B), \ 2791309124Sdim (__v16sf)(__m512)(C), \ 2792309124Sdim (__mmask16)(U), (int)(R)); }) 2793288943Sdim 2794288943Sdim 2795288943Sdim#define _mm512_fnmsub_round_ps(A, B, C, R) __extension__ ({ \ 2796309124Sdim (__m512)__builtin_ia32_vfmaddps512_mask(-(__v16sf)(__m512)(A), \ 2797309124Sdim (__v16sf)(__m512)(B), \ 2798309124Sdim -(__v16sf)(__m512)(C), \ 2799309124Sdim (__mmask16)-1, (int)(R)); }) 2800288943Sdim 2801288943Sdim 2802288943Sdim#define _mm512_maskz_fnmsub_round_ps(U, A, B, C, R) __extension__ ({ \ 2803309124Sdim (__m512)__builtin_ia32_vfmaddps512_maskz(-(__v16sf)(__m512)(A), \ 2804309124Sdim (__v16sf)(__m512)(B), \ 2805309124Sdim -(__v16sf)(__m512)(C), \ 2806309124Sdim (__mmask16)(U), (int)(R)); }) 2807288943Sdim 2808288943Sdim 2809288943Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS 2810277325Sdim_mm512_fmadd_ps(__m512 __A, __m512 __B, __m512 __C) 2811277325Sdim{ 2812288943Sdim return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A, 2813288943Sdim (__v16sf) __B, 2814288943Sdim (__v16sf) __C, 2815288943Sdim (__mmask16) -1, 2816288943Sdim _MM_FROUND_CUR_DIRECTION); 2817277325Sdim} 2818277325Sdim 2819288943Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS 2820288943Sdim_mm512_mask_fmadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C) 2821288943Sdim{ 2822288943Sdim return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A, 2823288943Sdim (__v16sf) __B, 2824288943Sdim (__v16sf) __C, 2825288943Sdim (__mmask16) __U, 2826288943Sdim _MM_FROUND_CUR_DIRECTION); 2827288943Sdim} 2828288943Sdim 2829288943Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS 2830288943Sdim_mm512_mask3_fmadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U) 2831288943Sdim{ 2832288943Sdim return (__m512) __builtin_ia32_vfmaddps512_mask3 ((__v16sf) __A, 2833288943Sdim (__v16sf) __B, 2834288943Sdim (__v16sf) __C, 2835288943Sdim (__mmask16) __U, 2836288943Sdim _MM_FROUND_CUR_DIRECTION); 2837288943Sdim} 2838288943Sdim 2839288943Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS 2840288943Sdim_mm512_maskz_fmadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C) 2841288943Sdim{ 2842288943Sdim return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A, 2843288943Sdim (__v16sf) __B, 2844288943Sdim (__v16sf) __C, 2845288943Sdim (__mmask16) __U, 2846288943Sdim _MM_FROUND_CUR_DIRECTION); 2847288943Sdim} 2848288943Sdim 2849288943Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS 2850277325Sdim_mm512_fmsub_ps(__m512 __A, __m512 __B, __m512 __C) 2851277325Sdim{ 2852288943Sdim return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A, 2853288943Sdim (__v16sf) __B, 2854288943Sdim -(__v16sf) __C, 2855288943Sdim (__mmask16) -1, 2856288943Sdim _MM_FROUND_CUR_DIRECTION); 2857277325Sdim} 2858277325Sdim 2859288943Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS 2860288943Sdim_mm512_mask_fmsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C) 2861288943Sdim{ 2862288943Sdim return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A, 2863288943Sdim (__v16sf) __B, 2864288943Sdim -(__v16sf) __C, 2865288943Sdim (__mmask16) __U, 2866288943Sdim _MM_FROUND_CUR_DIRECTION); 2867288943Sdim} 2868288943Sdim 2869288943Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS 2870288943Sdim_mm512_maskz_fmsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C) 2871288943Sdim{ 2872288943Sdim return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A, 2873288943Sdim (__v16sf) __B, 2874288943Sdim -(__v16sf) __C, 2875288943Sdim (__mmask16) __U, 2876288943Sdim _MM_FROUND_CUR_DIRECTION); 2877288943Sdim} 2878288943Sdim 2879288943Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS 2880277325Sdim_mm512_fnmadd_ps(__m512 __A, __m512 __B, __m512 __C) 2881277325Sdim{ 2882288943Sdim return (__m512) __builtin_ia32_vfmaddps512_mask (-(__v16sf) __A, 2883288943Sdim (__v16sf) __B, 2884288943Sdim (__v16sf) __C, 2885288943Sdim (__mmask16) -1, 2886288943Sdim _MM_FROUND_CUR_DIRECTION); 2887277325Sdim} 2888277325Sdim 2889288943Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS 2890288943Sdim_mm512_mask3_fnmadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U) 2891288943Sdim{ 2892288943Sdim return (__m512) __builtin_ia32_vfmaddps512_mask3 (-(__v16sf) __A, 2893288943Sdim (__v16sf) __B, 2894288943Sdim (__v16sf) __C, 2895288943Sdim (__mmask16) __U, 2896288943Sdim _MM_FROUND_CUR_DIRECTION); 2897288943Sdim} 2898288943Sdim 2899288943Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS 2900288943Sdim_mm512_maskz_fnmadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C) 2901288943Sdim{ 2902288943Sdim return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A, 2903288943Sdim (__v16sf) __B, 2904288943Sdim (__v16sf) __C, 2905288943Sdim (__mmask16) __U, 2906288943Sdim _MM_FROUND_CUR_DIRECTION); 2907288943Sdim} 2908288943Sdim 2909288943Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS 2910288943Sdim_mm512_fnmsub_ps(__m512 __A, __m512 __B, __m512 __C) 2911288943Sdim{ 2912288943Sdim return (__m512) __builtin_ia32_vfmaddps512_mask (-(__v16sf) __A, 2913288943Sdim (__v16sf) __B, 2914288943Sdim -(__v16sf) __C, 2915288943Sdim (__mmask16) -1, 2916288943Sdim _MM_FROUND_CUR_DIRECTION); 2917288943Sdim} 2918288943Sdim 2919288943Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS 2920288943Sdim_mm512_maskz_fnmsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C) 2921288943Sdim{ 2922288943Sdim return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A, 2923288943Sdim (__v16sf) __B, 2924288943Sdim -(__v16sf) __C, 2925288943Sdim (__mmask16) __U, 2926288943Sdim _MM_FROUND_CUR_DIRECTION); 2927288943Sdim} 2928288943Sdim 2929288943Sdim#define _mm512_fmaddsub_round_pd(A, B, C, R) __extension__ ({ \ 2930309124Sdim (__m512d)__builtin_ia32_vfmaddsubpd512_mask((__v8df)(__m512d)(A), \ 2931309124Sdim (__v8df)(__m512d)(B), \ 2932309124Sdim (__v8df)(__m512d)(C), \ 2933309124Sdim (__mmask8)-1, (int)(R)); }) 2934288943Sdim 2935288943Sdim 2936288943Sdim#define _mm512_mask_fmaddsub_round_pd(A, U, B, C, R) __extension__ ({ \ 2937309124Sdim (__m512d)__builtin_ia32_vfmaddsubpd512_mask((__v8df)(__m512d)(A), \ 2938309124Sdim (__v8df)(__m512d)(B), \ 2939309124Sdim (__v8df)(__m512d)(C), \ 2940309124Sdim (__mmask8)(U), (int)(R)); }) 2941288943Sdim 2942288943Sdim 2943288943Sdim#define _mm512_mask3_fmaddsub_round_pd(A, B, C, U, R) __extension__ ({ \ 2944309124Sdim (__m512d)__builtin_ia32_vfmaddsubpd512_mask3((__v8df)(__m512d)(A), \ 2945309124Sdim (__v8df)(__m512d)(B), \ 2946309124Sdim (__v8df)(__m512d)(C), \ 2947309124Sdim (__mmask8)(U), (int)(R)); }) 2948288943Sdim 2949288943Sdim 2950288943Sdim#define _mm512_maskz_fmaddsub_round_pd(U, A, B, C, R) __extension__ ({ \ 2951309124Sdim (__m512d)__builtin_ia32_vfmaddsubpd512_maskz((__v8df)(__m512d)(A), \ 2952309124Sdim (__v8df)(__m512d)(B), \ 2953309124Sdim (__v8df)(__m512d)(C), \ 2954309124Sdim (__mmask8)(U), (int)(R)); }) 2955288943Sdim 2956288943Sdim 2957288943Sdim#define _mm512_fmsubadd_round_pd(A, B, C, R) __extension__ ({ \ 2958309124Sdim (__m512d)__builtin_ia32_vfmaddsubpd512_mask((__v8df)(__m512d)(A), \ 2959309124Sdim (__v8df)(__m512d)(B), \ 2960309124Sdim -(__v8df)(__m512d)(C), \ 2961309124Sdim (__mmask8)-1, (int)(R)); }) 2962288943Sdim 2963288943Sdim 2964288943Sdim#define _mm512_mask_fmsubadd_round_pd(A, U, B, C, R) __extension__ ({ \ 2965309124Sdim (__m512d)__builtin_ia32_vfmaddsubpd512_mask((__v8df)(__m512d)(A), \ 2966309124Sdim (__v8df)(__m512d)(B), \ 2967309124Sdim -(__v8df)(__m512d)(C), \ 2968309124Sdim (__mmask8)(U), (int)(R)); }) 2969288943Sdim 2970288943Sdim 2971288943Sdim#define _mm512_maskz_fmsubadd_round_pd(U, A, B, C, R) __extension__ ({ \ 2972309124Sdim (__m512d)__builtin_ia32_vfmaddsubpd512_maskz((__v8df)(__m512d)(A), \ 2973309124Sdim (__v8df)(__m512d)(B), \ 2974309124Sdim -(__v8df)(__m512d)(C), \ 2975309124Sdim (__mmask8)(U), (int)(R)); }) 2976288943Sdim 2977288943Sdim 2978288943Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS 2979288943Sdim_mm512_fmaddsub_pd(__m512d __A, __m512d __B, __m512d __C) 2980288943Sdim{ 2981288943Sdim return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A, 2982288943Sdim (__v8df) __B, 2983288943Sdim (__v8df) __C, 2984288943Sdim (__mmask8) -1, 2985288943Sdim _MM_FROUND_CUR_DIRECTION); 2986288943Sdim} 2987288943Sdim 2988288943Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS 2989288943Sdim_mm512_mask_fmaddsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C) 2990288943Sdim{ 2991288943Sdim return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A, 2992288943Sdim (__v8df) __B, 2993288943Sdim (__v8df) __C, 2994288943Sdim (__mmask8) __U, 2995288943Sdim _MM_FROUND_CUR_DIRECTION); 2996288943Sdim} 2997288943Sdim 2998288943Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS 2999288943Sdim_mm512_mask3_fmaddsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U) 3000288943Sdim{ 3001288943Sdim return (__m512d) __builtin_ia32_vfmaddsubpd512_mask3 ((__v8df) __A, 3002288943Sdim (__v8df) __B, 3003288943Sdim (__v8df) __C, 3004288943Sdim (__mmask8) __U, 3005288943Sdim _MM_FROUND_CUR_DIRECTION); 3006288943Sdim} 3007288943Sdim 3008288943Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS 3009288943Sdim_mm512_maskz_fmaddsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C) 3010288943Sdim{ 3011288943Sdim return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A, 3012288943Sdim (__v8df) __B, 3013288943Sdim (__v8df) __C, 3014288943Sdim (__mmask8) __U, 3015288943Sdim _MM_FROUND_CUR_DIRECTION); 3016288943Sdim} 3017288943Sdim 3018288943Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS 3019288943Sdim_mm512_fmsubadd_pd(__m512d __A, __m512d __B, __m512d __C) 3020288943Sdim{ 3021288943Sdim return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A, 3022288943Sdim (__v8df) __B, 3023288943Sdim -(__v8df) __C, 3024288943Sdim (__mmask8) -1, 3025288943Sdim _MM_FROUND_CUR_DIRECTION); 3026288943Sdim} 3027288943Sdim 3028288943Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS 3029288943Sdim_mm512_mask_fmsubadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C) 3030288943Sdim{ 3031288943Sdim return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A, 3032288943Sdim (__v8df) __B, 3033288943Sdim -(__v8df) __C, 3034288943Sdim (__mmask8) __U, 3035288943Sdim _MM_FROUND_CUR_DIRECTION); 3036288943Sdim} 3037288943Sdim 3038288943Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS 3039288943Sdim_mm512_maskz_fmsubadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C) 3040288943Sdim{ 3041288943Sdim return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A, 3042288943Sdim (__v8df) __B, 3043288943Sdim -(__v8df) __C, 3044288943Sdim (__mmask8) __U, 3045288943Sdim _MM_FROUND_CUR_DIRECTION); 3046288943Sdim} 3047288943Sdim 3048288943Sdim#define _mm512_fmaddsub_round_ps(A, B, C, R) __extension__ ({ \ 3049309124Sdim (__m512)__builtin_ia32_vfmaddsubps512_mask((__v16sf)(__m512)(A), \ 3050309124Sdim (__v16sf)(__m512)(B), \ 3051309124Sdim (__v16sf)(__m512)(C), \ 3052309124Sdim (__mmask16)-1, (int)(R)); }) 3053288943Sdim 3054288943Sdim 3055288943Sdim#define _mm512_mask_fmaddsub_round_ps(A, U, B, C, R) __extension__ ({ \ 3056309124Sdim (__m512)__builtin_ia32_vfmaddsubps512_mask((__v16sf)(__m512)(A), \ 3057309124Sdim (__v16sf)(__m512)(B), \ 3058309124Sdim (__v16sf)(__m512)(C), \ 3059309124Sdim (__mmask16)(U), (int)(R)); }) 3060288943Sdim 3061288943Sdim 3062288943Sdim#define _mm512_mask3_fmaddsub_round_ps(A, B, C, U, R) __extension__ ({ \ 3063309124Sdim (__m512)__builtin_ia32_vfmaddsubps512_mask3((__v16sf)(__m512)(A), \ 3064309124Sdim (__v16sf)(__m512)(B), \ 3065309124Sdim (__v16sf)(__m512)(C), \ 3066309124Sdim (__mmask16)(U), (int)(R)); }) 3067288943Sdim 3068288943Sdim 3069288943Sdim#define _mm512_maskz_fmaddsub_round_ps(U, A, B, C, R) __extension__ ({ \ 3070309124Sdim (__m512)__builtin_ia32_vfmaddsubps512_maskz((__v16sf)(__m512)(A), \ 3071309124Sdim (__v16sf)(__m512)(B), \ 3072309124Sdim (__v16sf)(__m512)(C), \ 3073309124Sdim (__mmask16)(U), (int)(R)); }) 3074288943Sdim 3075288943Sdim 3076288943Sdim#define _mm512_fmsubadd_round_ps(A, B, C, R) __extension__ ({ \ 3077309124Sdim (__m512)__builtin_ia32_vfmaddsubps512_mask((__v16sf)(__m512)(A), \ 3078309124Sdim (__v16sf)(__m512)(B), \ 3079309124Sdim -(__v16sf)(__m512)(C), \ 3080309124Sdim (__mmask16)-1, (int)(R)); }) 3081288943Sdim 3082288943Sdim 3083288943Sdim#define _mm512_mask_fmsubadd_round_ps(A, U, B, C, R) __extension__ ({ \ 3084309124Sdim (__m512)__builtin_ia32_vfmaddsubps512_mask((__v16sf)(__m512)(A), \ 3085309124Sdim (__v16sf)(__m512)(B), \ 3086309124Sdim -(__v16sf)(__m512)(C), \ 3087309124Sdim (__mmask16)(U), (int)(R)); }) 3088288943Sdim 3089288943Sdim 3090288943Sdim#define _mm512_maskz_fmsubadd_round_ps(U, A, B, C, R) __extension__ ({ \ 3091309124Sdim (__m512)__builtin_ia32_vfmaddsubps512_maskz((__v16sf)(__m512)(A), \ 3092309124Sdim (__v16sf)(__m512)(B), \ 3093309124Sdim -(__v16sf)(__m512)(C), \ 3094309124Sdim (__mmask16)(U), (int)(R)); }) 3095288943Sdim 3096288943Sdim 3097288943Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS 3098288943Sdim_mm512_fmaddsub_ps(__m512 __A, __m512 __B, __m512 __C) 3099288943Sdim{ 3100288943Sdim return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A, 3101288943Sdim (__v16sf) __B, 3102288943Sdim (__v16sf) __C, 3103288943Sdim (__mmask16) -1, 3104288943Sdim _MM_FROUND_CUR_DIRECTION); 3105288943Sdim} 3106288943Sdim 3107288943Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS 3108288943Sdim_mm512_mask_fmaddsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C) 3109288943Sdim{ 3110288943Sdim return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A, 3111288943Sdim (__v16sf) __B, 3112288943Sdim (__v16sf) __C, 3113288943Sdim (__mmask16) __U, 3114288943Sdim _MM_FROUND_CUR_DIRECTION); 3115288943Sdim} 3116288943Sdim 3117288943Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS 3118288943Sdim_mm512_mask3_fmaddsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U) 3119288943Sdim{ 3120288943Sdim return (__m512) __builtin_ia32_vfmaddsubps512_mask3 ((__v16sf) __A, 3121288943Sdim (__v16sf) __B, 3122288943Sdim (__v16sf) __C, 3123288943Sdim (__mmask16) __U, 3124288943Sdim _MM_FROUND_CUR_DIRECTION); 3125288943Sdim} 3126288943Sdim 3127288943Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS 3128288943Sdim_mm512_maskz_fmaddsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C) 3129288943Sdim{ 3130288943Sdim return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A, 3131288943Sdim (__v16sf) __B, 3132288943Sdim (__v16sf) __C, 3133288943Sdim (__mmask16) __U, 3134288943Sdim _MM_FROUND_CUR_DIRECTION); 3135288943Sdim} 3136288943Sdim 3137288943Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS 3138288943Sdim_mm512_fmsubadd_ps(__m512 __A, __m512 __B, __m512 __C) 3139288943Sdim{ 3140288943Sdim return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A, 3141288943Sdim (__v16sf) __B, 3142288943Sdim -(__v16sf) __C, 3143288943Sdim (__mmask16) -1, 3144288943Sdim _MM_FROUND_CUR_DIRECTION); 3145288943Sdim} 3146288943Sdim 3147288943Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS 3148288943Sdim_mm512_mask_fmsubadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C) 3149288943Sdim{ 3150288943Sdim return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A, 3151288943Sdim (__v16sf) __B, 3152288943Sdim -(__v16sf) __C, 3153288943Sdim (__mmask16) __U, 3154288943Sdim _MM_FROUND_CUR_DIRECTION); 3155288943Sdim} 3156288943Sdim 3157288943Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS 3158288943Sdim_mm512_maskz_fmsubadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C) 3159288943Sdim{ 3160288943Sdim return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A, 3161288943Sdim (__v16sf) __B, 3162288943Sdim -(__v16sf) __C, 3163288943Sdim (__mmask16) __U, 3164288943Sdim _MM_FROUND_CUR_DIRECTION); 3165288943Sdim} 3166288943Sdim 3167288943Sdim#define _mm512_mask3_fmsub_round_pd(A, B, C, U, R) __extension__ ({ \ 3168309124Sdim (__m512d)__builtin_ia32_vfmsubpd512_mask3((__v8df)(__m512d)(A), \ 3169309124Sdim (__v8df)(__m512d)(B), \ 3170309124Sdim (__v8df)(__m512d)(C), \ 3171309124Sdim (__mmask8)(U), (int)(R)); }) 3172288943Sdim 3173288943Sdim 3174288943Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS 3175288943Sdim_mm512_mask3_fmsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U) 3176288943Sdim{ 3177288943Sdim return (__m512d) __builtin_ia32_vfmsubpd512_mask3 ((__v8df) __A, 3178288943Sdim (__v8df) __B, 3179288943Sdim (__v8df) __C, 3180288943Sdim (__mmask8) __U, 3181288943Sdim _MM_FROUND_CUR_DIRECTION); 3182288943Sdim} 3183288943Sdim 3184288943Sdim#define _mm512_mask3_fmsub_round_ps(A, B, C, U, R) __extension__ ({ \ 3185309124Sdim (__m512)__builtin_ia32_vfmsubps512_mask3((__v16sf)(__m512)(A), \ 3186309124Sdim (__v16sf)(__m512)(B), \ 3187309124Sdim (__v16sf)(__m512)(C), \ 3188309124Sdim (__mmask16)(U), (int)(R)); }) 3189288943Sdim 3190288943Sdim 3191288943Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS 3192288943Sdim_mm512_mask3_fmsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U) 3193288943Sdim{ 3194288943Sdim return (__m512) __builtin_ia32_vfmsubps512_mask3 ((__v16sf) __A, 3195288943Sdim (__v16sf) __B, 3196288943Sdim (__v16sf) __C, 3197288943Sdim (__mmask16) __U, 3198288943Sdim _MM_FROUND_CUR_DIRECTION); 3199288943Sdim} 3200288943Sdim 3201288943Sdim#define _mm512_mask3_fmsubadd_round_pd(A, B, C, U, R) __extension__ ({ \ 3202309124Sdim (__m512d)__builtin_ia32_vfmsubaddpd512_mask3((__v8df)(__m512d)(A), \ 3203309124Sdim (__v8df)(__m512d)(B), \ 3204309124Sdim (__v8df)(__m512d)(C), \ 3205309124Sdim (__mmask8)(U), (int)(R)); }) 3206288943Sdim 3207288943Sdim 3208288943Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS 3209288943Sdim_mm512_mask3_fmsubadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U) 3210288943Sdim{ 3211288943Sdim return (__m512d) __builtin_ia32_vfmsubaddpd512_mask3 ((__v8df) __A, 3212288943Sdim (__v8df) __B, 3213288943Sdim (__v8df) __C, 3214288943Sdim (__mmask8) __U, 3215288943Sdim _MM_FROUND_CUR_DIRECTION); 3216288943Sdim} 3217288943Sdim 3218288943Sdim#define _mm512_mask3_fmsubadd_round_ps(A, B, C, U, R) __extension__ ({ \ 3219309124Sdim (__m512)__builtin_ia32_vfmsubaddps512_mask3((__v16sf)(__m512)(A), \ 3220309124Sdim (__v16sf)(__m512)(B), \ 3221309124Sdim (__v16sf)(__m512)(C), \ 3222309124Sdim (__mmask16)(U), (int)(R)); }) 3223288943Sdim 3224288943Sdim 3225288943Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS 3226288943Sdim_mm512_mask3_fmsubadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U) 3227288943Sdim{ 3228288943Sdim return (__m512) __builtin_ia32_vfmsubaddps512_mask3 ((__v16sf) __A, 3229288943Sdim (__v16sf) __B, 3230288943Sdim (__v16sf) __C, 3231288943Sdim (__mmask16) __U, 3232288943Sdim _MM_FROUND_CUR_DIRECTION); 3233288943Sdim} 3234288943Sdim 3235288943Sdim#define _mm512_mask_fnmadd_round_pd(A, U, B, C, R) __extension__ ({ \ 3236309124Sdim (__m512d)__builtin_ia32_vfnmaddpd512_mask((__v8df)(__m512d)(A), \ 3237309124Sdim (__v8df)(__m512d)(B), \ 3238309124Sdim (__v8df)(__m512d)(C), \ 3239309124Sdim (__mmask8)(U), (int)(R)); }) 3240288943Sdim 3241288943Sdim 3242288943Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS 3243288943Sdim_mm512_mask_fnmadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C) 3244288943Sdim{ 3245288943Sdim return (__m512d) __builtin_ia32_vfnmaddpd512_mask ((__v8df) __A, 3246288943Sdim (__v8df) __B, 3247288943Sdim (__v8df) __C, 3248288943Sdim (__mmask8) __U, 3249288943Sdim _MM_FROUND_CUR_DIRECTION); 3250288943Sdim} 3251288943Sdim 3252288943Sdim#define _mm512_mask_fnmadd_round_ps(A, U, B, C, R) __extension__ ({ \ 3253309124Sdim (__m512)__builtin_ia32_vfnmaddps512_mask((__v16sf)(__m512)(A), \ 3254309124Sdim (__v16sf)(__m512)(B), \ 3255309124Sdim (__v16sf)(__m512)(C), \ 3256309124Sdim (__mmask16)(U), (int)(R)); }) 3257288943Sdim 3258288943Sdim 3259288943Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS 3260288943Sdim_mm512_mask_fnmadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C) 3261288943Sdim{ 3262288943Sdim return (__m512) __builtin_ia32_vfnmaddps512_mask ((__v16sf) __A, 3263288943Sdim (__v16sf) __B, 3264288943Sdim (__v16sf) __C, 3265288943Sdim (__mmask16) __U, 3266288943Sdim _MM_FROUND_CUR_DIRECTION); 3267288943Sdim} 3268288943Sdim 3269288943Sdim#define _mm512_mask_fnmsub_round_pd(A, U, B, C, R) __extension__ ({ \ 3270309124Sdim (__m512d)__builtin_ia32_vfnmsubpd512_mask((__v8df)(__m512d)(A), \ 3271309124Sdim (__v8df)(__m512d)(B), \ 3272309124Sdim (__v8df)(__m512d)(C), \ 3273309124Sdim (__mmask8)(U), (int)(R)); }) 3274288943Sdim 3275288943Sdim 3276288943Sdim#define _mm512_mask3_fnmsub_round_pd(A, B, C, U, R) __extension__ ({ \ 3277309124Sdim (__m512d)__builtin_ia32_vfnmsubpd512_mask3((__v8df)(__m512d)(A), \ 3278309124Sdim (__v8df)(__m512d)(B), \ 3279309124Sdim (__v8df)(__m512d)(C), \ 3280309124Sdim (__mmask8)(U), (int)(R)); }) 3281288943Sdim 3282288943Sdim 3283288943Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS 3284288943Sdim_mm512_mask_fnmsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C) 3285288943Sdim{ 3286288943Sdim return (__m512d) __builtin_ia32_vfnmsubpd512_mask ((__v8df) __A, 3287288943Sdim (__v8df) __B, 3288288943Sdim (__v8df) __C, 3289288943Sdim (__mmask8) __U, 3290288943Sdim _MM_FROUND_CUR_DIRECTION); 3291288943Sdim} 3292288943Sdim 3293288943Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS 3294288943Sdim_mm512_mask3_fnmsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U) 3295288943Sdim{ 3296288943Sdim return (__m512d) __builtin_ia32_vfnmsubpd512_mask3 ((__v8df) __A, 3297288943Sdim (__v8df) __B, 3298288943Sdim (__v8df) __C, 3299288943Sdim (__mmask8) __U, 3300288943Sdim _MM_FROUND_CUR_DIRECTION); 3301288943Sdim} 3302288943Sdim 3303288943Sdim#define _mm512_mask_fnmsub_round_ps(A, U, B, C, R) __extension__ ({ \ 3304309124Sdim (__m512)__builtin_ia32_vfnmsubps512_mask((__v16sf)(__m512)(A), \ 3305309124Sdim (__v16sf)(__m512)(B), \ 3306309124Sdim (__v16sf)(__m512)(C), \ 3307309124Sdim (__mmask16)(U), (int)(R)); }) 3308288943Sdim 3309288943Sdim 3310288943Sdim#define _mm512_mask3_fnmsub_round_ps(A, B, C, U, R) __extension__ ({ \ 3311309124Sdim (__m512)__builtin_ia32_vfnmsubps512_mask3((__v16sf)(__m512)(A), \ 3312309124Sdim (__v16sf)(__m512)(B), \ 3313309124Sdim (__v16sf)(__m512)(C), \ 3314309124Sdim (__mmask16)(U), (int)(R)); }) 3315288943Sdim 3316288943Sdim 3317288943Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS 3318288943Sdim_mm512_mask_fnmsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C) 3319288943Sdim{ 3320288943Sdim return (__m512) __builtin_ia32_vfnmsubps512_mask ((__v16sf) __A, 3321288943Sdim (__v16sf) __B, 3322288943Sdim (__v16sf) __C, 3323288943Sdim (__mmask16) __U, 3324288943Sdim _MM_FROUND_CUR_DIRECTION); 3325288943Sdim} 3326288943Sdim 3327288943Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS 3328288943Sdim_mm512_mask3_fnmsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U) 3329288943Sdim{ 3330288943Sdim return (__m512) __builtin_ia32_vfnmsubps512_mask3 ((__v16sf) __A, 3331288943Sdim (__v16sf) __B, 3332288943Sdim (__v16sf) __C, 3333288943Sdim (__mmask16) __U, 3334288943Sdim _MM_FROUND_CUR_DIRECTION); 3335288943Sdim} 3336288943Sdim 3337288943Sdim 3338288943Sdim 3339277325Sdim/* Vector permutations */ 3340277325Sdim 3341288943Sdimstatic __inline __m512i __DEFAULT_FN_ATTRS 3342277325Sdim_mm512_permutex2var_epi32(__m512i __A, __m512i __I, __m512i __B) 3343277325Sdim{ 3344277325Sdim return (__m512i) __builtin_ia32_vpermt2vard512_mask ((__v16si) __I 3345277325Sdim /* idx */ , 3346277325Sdim (__v16si) __A, 3347277325Sdim (__v16si) __B, 3348277325Sdim (__mmask16) -1); 3349277325Sdim} 3350309124Sdim 3351309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 3352309124Sdim_mm512_mask_permutex2var_epi32 (__m512i __A, __mmask16 __U, 3353309124Sdim __m512i __I, __m512i __B) 3354309124Sdim{ 3355309124Sdim return (__m512i) __builtin_ia32_vpermt2vard512_mask ((__v16si) __I 3356309124Sdim /* idx */ , 3357309124Sdim (__v16si) __A, 3358309124Sdim (__v16si) __B, 3359309124Sdim (__mmask16) __U); 3360309124Sdim} 3361309124Sdim 3362309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 3363309124Sdim_mm512_maskz_permutex2var_epi32 (__mmask16 __U, __m512i __A, 3364309124Sdim __m512i __I, __m512i __B) 3365309124Sdim{ 3366309124Sdim return (__m512i) __builtin_ia32_vpermt2vard512_maskz ((__v16si) __I 3367309124Sdim /* idx */ , 3368309124Sdim (__v16si) __A, 3369309124Sdim (__v16si) __B, 3370309124Sdim (__mmask16) __U); 3371309124Sdim} 3372309124Sdim 3373288943Sdimstatic __inline __m512i __DEFAULT_FN_ATTRS 3374277325Sdim_mm512_permutex2var_epi64(__m512i __A, __m512i __I, __m512i __B) 3375277325Sdim{ 3376277325Sdim return (__m512i) __builtin_ia32_vpermt2varq512_mask ((__v8di) __I 3377277325Sdim /* idx */ , 3378277325Sdim (__v8di) __A, 3379277325Sdim (__v8di) __B, 3380277325Sdim (__mmask8) -1); 3381277325Sdim} 3382277325Sdim 3383309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 3384309124Sdim_mm512_mask_permutex2var_epi64 (__m512i __A, __mmask8 __U, __m512i __I, 3385309124Sdim __m512i __B) 3386277325Sdim{ 3387309124Sdim return (__m512i) __builtin_ia32_vpermt2varq512_mask ((__v8di) __I 3388309124Sdim /* idx */ , 3389309124Sdim (__v8di) __A, 3390309124Sdim (__v8di) __B, 3391309124Sdim (__mmask8) __U); 3392277325Sdim} 3393309124Sdim 3394309124Sdim 3395309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 3396309124Sdim_mm512_maskz_permutex2var_epi64 (__mmask8 __U, __m512i __A, 3397309124Sdim __m512i __I, __m512i __B) 3398277325Sdim{ 3399309124Sdim return (__m512i) __builtin_ia32_vpermt2varq512_maskz ((__v8di) __I 3400309124Sdim /* idx */ , 3401309124Sdim (__v8di) __A, 3402309124Sdim (__v8di) __B, 3403309124Sdim (__mmask8) __U); 3404277325Sdim} 3405277325Sdim 3406288943Sdim#define _mm512_alignr_epi64(A, B, I) __extension__ ({ \ 3407314564Sdim (__m512i)__builtin_shufflevector((__v8di)(__m512i)(B), \ 3408314564Sdim (__v8di)(__m512i)(A), \ 3409314564Sdim ((int)(I) & 0x7) + 0, \ 3410314564Sdim ((int)(I) & 0x7) + 1, \ 3411314564Sdim ((int)(I) & 0x7) + 2, \ 3412314564Sdim ((int)(I) & 0x7) + 3, \ 3413314564Sdim ((int)(I) & 0x7) + 4, \ 3414314564Sdim ((int)(I) & 0x7) + 5, \ 3415314564Sdim ((int)(I) & 0x7) + 6, \ 3416314564Sdim ((int)(I) & 0x7) + 7); }) 3417277325Sdim 3418309124Sdim#define _mm512_mask_alignr_epi64(W, U, A, B, imm) __extension__({\ 3419314564Sdim (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \ 3420314564Sdim (__v8di)_mm512_alignr_epi64((A), (B), (imm)), \ 3421314564Sdim (__v8di)(__m512i)(W)); }) 3422309124Sdim 3423309124Sdim#define _mm512_maskz_alignr_epi64(U, A, B, imm) __extension__({\ 3424314564Sdim (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \ 3425314564Sdim (__v8di)_mm512_alignr_epi64((A), (B), (imm)), \ 3426314564Sdim (__v8di)_mm512_setzero_si512()); }) 3427309124Sdim 3428288943Sdim#define _mm512_alignr_epi32(A, B, I) __extension__ ({ \ 3429314564Sdim (__m512i)__builtin_shufflevector((__v16si)(__m512i)(B), \ 3430314564Sdim (__v16si)(__m512i)(A), \ 3431314564Sdim ((int)(I) & 0xf) + 0, \ 3432314564Sdim ((int)(I) & 0xf) + 1, \ 3433314564Sdim ((int)(I) & 0xf) + 2, \ 3434314564Sdim ((int)(I) & 0xf) + 3, \ 3435314564Sdim ((int)(I) & 0xf) + 4, \ 3436314564Sdim ((int)(I) & 0xf) + 5, \ 3437314564Sdim ((int)(I) & 0xf) + 6, \ 3438314564Sdim ((int)(I) & 0xf) + 7, \ 3439314564Sdim ((int)(I) & 0xf) + 8, \ 3440314564Sdim ((int)(I) & 0xf) + 9, \ 3441314564Sdim ((int)(I) & 0xf) + 10, \ 3442314564Sdim ((int)(I) & 0xf) + 11, \ 3443314564Sdim ((int)(I) & 0xf) + 12, \ 3444314564Sdim ((int)(I) & 0xf) + 13, \ 3445314564Sdim ((int)(I) & 0xf) + 14, \ 3446314564Sdim ((int)(I) & 0xf) + 15); }) 3447277325Sdim 3448309124Sdim#define _mm512_mask_alignr_epi32(W, U, A, B, imm) __extension__ ({\ 3449314564Sdim (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \ 3450314564Sdim (__v16si)_mm512_alignr_epi32((A), (B), (imm)), \ 3451314564Sdim (__v16si)(__m512i)(W)); }) 3452309124Sdim 3453309124Sdim#define _mm512_maskz_alignr_epi32(U, A, B, imm) __extension__({\ 3454314564Sdim (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \ 3455314564Sdim (__v16si)_mm512_alignr_epi32((A), (B), (imm)), \ 3456314564Sdim (__v16si)_mm512_setzero_si512()); }) 3457288943Sdim/* Vector Extract */ 3458288943Sdim 3459314564Sdim#define _mm512_extractf64x4_pd(A, I) __extension__ ({ \ 3460314564Sdim (__m256d)__builtin_shufflevector((__v8df)(__m512d)(A), \ 3461314564Sdim (__v8df)_mm512_undefined_pd(), \ 3462314564Sdim ((I) & 1) ? 4 : 0, \ 3463314564Sdim ((I) & 1) ? 5 : 1, \ 3464314564Sdim ((I) & 1) ? 6 : 2, \ 3465314564Sdim ((I) & 1) ? 7 : 3); }) 3466288943Sdim 3467309124Sdim#define _mm512_mask_extractf64x4_pd(W, U, A, imm) __extension__ ({\ 3468314564Sdim (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \ 3469314564Sdim (__v4df)_mm512_extractf64x4_pd((A), (imm)), \ 3470314564Sdim (__v4df)(W)); }) 3471309124Sdim 3472309124Sdim#define _mm512_maskz_extractf64x4_pd(U, A, imm) __extension__ ({\ 3473314564Sdim (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \ 3474314564Sdim (__v4df)_mm512_extractf64x4_pd((A), (imm)), \ 3475314564Sdim (__v4df)_mm256_setzero_pd()); }) 3476309124Sdim 3477314564Sdim#define _mm512_extractf32x4_ps(A, I) __extension__ ({ \ 3478314564Sdim (__m128)__builtin_shufflevector((__v16sf)(__m512)(A), \ 3479314564Sdim (__v16sf)_mm512_undefined_ps(), \ 3480314564Sdim 0 + ((I) & 0x3) * 4, \ 3481314564Sdim 1 + ((I) & 0x3) * 4, \ 3482314564Sdim 2 + ((I) & 0x3) * 4, \ 3483314564Sdim 3 + ((I) & 0x3) * 4); }) 3484288943Sdim 3485309124Sdim#define _mm512_mask_extractf32x4_ps(W, U, A, imm) __extension__ ({\ 3486314564Sdim (__m128)__builtin_ia32_selectps_128((__mmask8)(U), \ 3487314564Sdim (__v4sf)_mm512_extractf32x4_ps((A), (imm)), \ 3488314564Sdim (__v4sf)(W)); }) 3489309124Sdim 3490309124Sdim#define _mm512_maskz_extractf32x4_ps(U, A, imm) __extension__ ({\ 3491314564Sdim (__m128)__builtin_ia32_selectps_128((__mmask8)(U), \ 3492314564Sdim (__v4sf)_mm512_extractf32x4_ps((A), (imm)), \ 3493314564Sdim (__v4sf)_mm_setzero_ps()); }) 3494314564Sdim 3495277325Sdim/* Vector Blend */ 3496277325Sdim 3497288943Sdimstatic __inline __m512d __DEFAULT_FN_ATTRS 3498277325Sdim_mm512_mask_blend_pd(__mmask8 __U, __m512d __A, __m512d __W) 3499277325Sdim{ 3500309124Sdim return (__m512d) __builtin_ia32_selectpd_512 ((__mmask8) __U, 3501277325Sdim (__v8df) __W, 3502309124Sdim (__v8df) __A); 3503277325Sdim} 3504277325Sdim 3505288943Sdimstatic __inline __m512 __DEFAULT_FN_ATTRS 3506277325Sdim_mm512_mask_blend_ps(__mmask16 __U, __m512 __A, __m512 __W) 3507277325Sdim{ 3508309124Sdim return (__m512) __builtin_ia32_selectps_512 ((__mmask16) __U, 3509277325Sdim (__v16sf) __W, 3510309124Sdim (__v16sf) __A); 3511277325Sdim} 3512277325Sdim 3513288943Sdimstatic __inline __m512i __DEFAULT_FN_ATTRS 3514277325Sdim_mm512_mask_blend_epi64(__mmask8 __U, __m512i __A, __m512i __W) 3515277325Sdim{ 3516309124Sdim return (__m512i) __builtin_ia32_selectq_512 ((__mmask8) __U, 3517277325Sdim (__v8di) __W, 3518309124Sdim (__v8di) __A); 3519277325Sdim} 3520277325Sdim 3521288943Sdimstatic __inline __m512i __DEFAULT_FN_ATTRS 3522277325Sdim_mm512_mask_blend_epi32(__mmask16 __U, __m512i __A, __m512i __W) 3523277325Sdim{ 3524309124Sdim return (__m512i) __builtin_ia32_selectd_512 ((__mmask16) __U, 3525277325Sdim (__v16si) __W, 3526309124Sdim (__v16si) __A); 3527277325Sdim} 3528277325Sdim 3529277325Sdim/* Compare */ 3530277325Sdim 3531288943Sdim#define _mm512_cmp_round_ps_mask(A, B, P, R) __extension__ ({ \ 3532288943Sdim (__mmask16)__builtin_ia32_cmpps512_mask((__v16sf)(__m512)(A), \ 3533309124Sdim (__v16sf)(__m512)(B), (int)(P), \ 3534309124Sdim (__mmask16)-1, (int)(R)); }) 3535277325Sdim 3536288943Sdim#define _mm512_mask_cmp_round_ps_mask(U, A, B, P, R) __extension__ ({ \ 3537288943Sdim (__mmask16)__builtin_ia32_cmpps512_mask((__v16sf)(__m512)(A), \ 3538309124Sdim (__v16sf)(__m512)(B), (int)(P), \ 3539309124Sdim (__mmask16)(U), (int)(R)); }) 3540277325Sdim 3541288943Sdim#define _mm512_cmp_ps_mask(A, B, P) \ 3542288943Sdim _mm512_cmp_round_ps_mask((A), (B), (P), _MM_FROUND_CUR_DIRECTION) 3543288943Sdim#define _mm512_mask_cmp_ps_mask(U, A, B, P) \ 3544288943Sdim _mm512_mask_cmp_round_ps_mask((U), (A), (B), (P), _MM_FROUND_CUR_DIRECTION) 3545288943Sdim 3546314564Sdim#define _mm512_cmpeq_ps_mask(A, B) \ 3547314564Sdim _mm512_cmp_ps_mask((A), (B), _CMP_EQ_OQ) 3548314564Sdim#define _mm512_mask_cmpeq_ps_mask(k, A, B) \ 3549314564Sdim _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_EQ_OQ) 3550314564Sdim 3551314564Sdim#define _mm512_cmplt_ps_mask(A, B) \ 3552314564Sdim _mm512_cmp_ps_mask((A), (B), _CMP_LT_OS) 3553314564Sdim#define _mm512_mask_cmplt_ps_mask(k, A, B) \ 3554314564Sdim _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_LT_OS) 3555314564Sdim 3556314564Sdim#define _mm512_cmple_ps_mask(A, B) \ 3557314564Sdim _mm512_cmp_ps_mask((A), (B), _CMP_LE_OS) 3558314564Sdim#define _mm512_mask_cmple_ps_mask(k, A, B) \ 3559314564Sdim _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_LE_OS) 3560314564Sdim 3561314564Sdim#define _mm512_cmpunord_ps_mask(A, B) \ 3562314564Sdim _mm512_cmp_ps_mask((A), (B), _CMP_UNORD_Q) 3563314564Sdim#define _mm512_mask_cmpunord_ps_mask(k, A, B) \ 3564314564Sdim _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_UNORD_Q) 3565314564Sdim 3566314564Sdim#define _mm512_cmpneq_ps_mask(A, B) \ 3567314564Sdim _mm512_cmp_ps_mask((A), (B), _CMP_NEQ_UQ) 3568314564Sdim#define _mm512_mask_cmpneq_ps_mask(k, A, B) \ 3569314564Sdim _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_NEQ_UQ) 3570314564Sdim 3571314564Sdim#define _mm512_cmpnlt_ps_mask(A, B) \ 3572314564Sdim _mm512_cmp_ps_mask((A), (B), _CMP_NLT_US) 3573314564Sdim#define _mm512_mask_cmpnlt_ps_mask(k, A, B) \ 3574314564Sdim _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_NLT_US) 3575314564Sdim 3576314564Sdim#define _mm512_cmpnle_ps_mask(A, B) \ 3577314564Sdim _mm512_cmp_ps_mask((A), (B), _CMP_NLE_US) 3578314564Sdim#define _mm512_mask_cmpnle_ps_mask(k, A, B) \ 3579314564Sdim _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_NLE_US) 3580314564Sdim 3581314564Sdim#define _mm512_cmpord_ps_mask(A, B) \ 3582314564Sdim _mm512_cmp_ps_mask((A), (B), _CMP_ORD_Q) 3583314564Sdim#define _mm512_mask_cmpord_ps_mask(k, A, B) \ 3584314564Sdim _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_ORD_Q) 3585314564Sdim 3586288943Sdim#define _mm512_cmp_round_pd_mask(A, B, P, R) __extension__ ({ \ 3587288943Sdim (__mmask8)__builtin_ia32_cmppd512_mask((__v8df)(__m512d)(A), \ 3588309124Sdim (__v8df)(__m512d)(B), (int)(P), \ 3589309124Sdim (__mmask8)-1, (int)(R)); }) 3590288943Sdim 3591288943Sdim#define _mm512_mask_cmp_round_pd_mask(U, A, B, P, R) __extension__ ({ \ 3592288943Sdim (__mmask8)__builtin_ia32_cmppd512_mask((__v8df)(__m512d)(A), \ 3593309124Sdim (__v8df)(__m512d)(B), (int)(P), \ 3594309124Sdim (__mmask8)(U), (int)(R)); }) 3595288943Sdim 3596288943Sdim#define _mm512_cmp_pd_mask(A, B, P) \ 3597288943Sdim _mm512_cmp_round_pd_mask((A), (B), (P), _MM_FROUND_CUR_DIRECTION) 3598288943Sdim#define _mm512_mask_cmp_pd_mask(U, A, B, P) \ 3599288943Sdim _mm512_mask_cmp_round_pd_mask((U), (A), (B), (P), _MM_FROUND_CUR_DIRECTION) 3600288943Sdim 3601314564Sdim#define _mm512_cmpeq_pd_mask(A, B) \ 3602314564Sdim _mm512_cmp_pd_mask((A), (B), _CMP_EQ_OQ) 3603314564Sdim#define _mm512_mask_cmpeq_pd_mask(k, A, B) \ 3604314564Sdim _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_EQ_OQ) 3605314564Sdim 3606314564Sdim#define _mm512_cmplt_pd_mask(A, B) \ 3607314564Sdim _mm512_cmp_pd_mask((A), (B), _CMP_LT_OS) 3608314564Sdim#define _mm512_mask_cmplt_pd_mask(k, A, B) \ 3609314564Sdim _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_LT_OS) 3610314564Sdim 3611314564Sdim#define _mm512_cmple_pd_mask(A, B) \ 3612314564Sdim _mm512_cmp_pd_mask((A), (B), _CMP_LE_OS) 3613314564Sdim#define _mm512_mask_cmple_pd_mask(k, A, B) \ 3614314564Sdim _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_LE_OS) 3615314564Sdim 3616314564Sdim#define _mm512_cmpunord_pd_mask(A, B) \ 3617314564Sdim _mm512_cmp_pd_mask((A), (B), _CMP_UNORD_Q) 3618314564Sdim#define _mm512_mask_cmpunord_pd_mask(k, A, B) \ 3619314564Sdim _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_UNORD_Q) 3620314564Sdim 3621314564Sdim#define _mm512_cmpneq_pd_mask(A, B) \ 3622314564Sdim _mm512_cmp_pd_mask((A), (B), _CMP_NEQ_UQ) 3623314564Sdim#define _mm512_mask_cmpneq_pd_mask(k, A, B) \ 3624314564Sdim _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_NEQ_UQ) 3625314564Sdim 3626314564Sdim#define _mm512_cmpnlt_pd_mask(A, B) \ 3627314564Sdim _mm512_cmp_pd_mask((A), (B), _CMP_NLT_US) 3628314564Sdim#define _mm512_mask_cmpnlt_pd_mask(k, A, B) \ 3629314564Sdim _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_NLT_US) 3630314564Sdim 3631314564Sdim#define _mm512_cmpnle_pd_mask(A, B) \ 3632314564Sdim _mm512_cmp_pd_mask((A), (B), _CMP_NLE_US) 3633314564Sdim#define _mm512_mask_cmpnle_pd_mask(k, A, B) \ 3634314564Sdim _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_NLE_US) 3635314564Sdim 3636314564Sdim#define _mm512_cmpord_pd_mask(A, B) \ 3637314564Sdim _mm512_cmp_pd_mask((A), (B), _CMP_ORD_Q) 3638314564Sdim#define _mm512_mask_cmpord_pd_mask(k, A, B) \ 3639314564Sdim _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_ORD_Q) 3640314564Sdim 3641277325Sdim/* Conversion */ 3642277325Sdim 3643309124Sdim#define _mm512_cvtt_roundps_epu32(A, R) __extension__ ({ \ 3644309124Sdim (__m512i)__builtin_ia32_cvttps2udq512_mask((__v16sf)(__m512)(A), \ 3645309124Sdim (__v16si)_mm512_undefined_epi32(), \ 3646309124Sdim (__mmask16)-1, (int)(R)); }) 3647309124Sdim 3648309124Sdim#define _mm512_mask_cvtt_roundps_epu32(W, U, A, R) __extension__ ({ \ 3649309124Sdim (__m512i)__builtin_ia32_cvttps2udq512_mask((__v16sf)(__m512)(A), \ 3650309124Sdim (__v16si)(__m512i)(W), \ 3651309124Sdim (__mmask16)(U), (int)(R)); }) 3652309124Sdim 3653309124Sdim#define _mm512_maskz_cvtt_roundps_epu32(U, A, R) __extension__ ({ \ 3654309124Sdim (__m512i)__builtin_ia32_cvttps2udq512_mask((__v16sf)(__m512)(A), \ 3655309124Sdim (__v16si)_mm512_setzero_si512(), \ 3656309124Sdim (__mmask16)(U), (int)(R)); }) 3657309124Sdim 3658309124Sdim 3659288943Sdimstatic __inline __m512i __DEFAULT_FN_ATTRS 3660277325Sdim_mm512_cvttps_epu32(__m512 __A) 3661277325Sdim{ 3662277325Sdim return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A, 3663277325Sdim (__v16si) 3664277325Sdim _mm512_setzero_si512 (), 3665277325Sdim (__mmask16) -1, 3666277325Sdim _MM_FROUND_CUR_DIRECTION); 3667277325Sdim} 3668277325Sdim 3669309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 3670309124Sdim_mm512_mask_cvttps_epu32 (__m512i __W, __mmask16 __U, __m512 __A) 3671309124Sdim{ 3672309124Sdim return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A, 3673309124Sdim (__v16si) __W, 3674309124Sdim (__mmask16) __U, 3675309124Sdim _MM_FROUND_CUR_DIRECTION); 3676309124Sdim} 3677309124Sdim 3678309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 3679309124Sdim_mm512_maskz_cvttps_epu32 (__mmask16 __U, __m512 __A) 3680309124Sdim{ 3681309124Sdim return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A, 3682309124Sdim (__v16si) _mm512_setzero_si512 (), 3683309124Sdim (__mmask16) __U, 3684309124Sdim _MM_FROUND_CUR_DIRECTION); 3685309124Sdim} 3686309124Sdim 3687288943Sdim#define _mm512_cvt_roundepi32_ps(A, R) __extension__ ({ \ 3688309124Sdim (__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(__m512i)(A), \ 3689288943Sdim (__v16sf)_mm512_setzero_ps(), \ 3690309124Sdim (__mmask16)-1, (int)(R)); }) 3691277325Sdim 3692309124Sdim#define _mm512_mask_cvt_roundepi32_ps(W, U, A, R) __extension__ ({ \ 3693309124Sdim (__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(__m512i)(A), \ 3694309124Sdim (__v16sf)(__m512)(W), \ 3695309124Sdim (__mmask16)(U), (int)(R)); }) 3696309124Sdim 3697309124Sdim#define _mm512_maskz_cvt_roundepi32_ps(U, A, R) __extension__ ({ \ 3698309124Sdim (__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(__m512i)(A), \ 3699309124Sdim (__v16sf)_mm512_setzero_ps(), \ 3700309124Sdim (__mmask16)(U), (int)(R)); }) 3701309124Sdim 3702288943Sdim#define _mm512_cvt_roundepu32_ps(A, R) __extension__ ({ \ 3703309124Sdim (__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(__m512i)(A), \ 3704288943Sdim (__v16sf)_mm512_setzero_ps(), \ 3705309124Sdim (__mmask16)-1, (int)(R)); }) 3706277325Sdim 3707309124Sdim#define _mm512_mask_cvt_roundepu32_ps(W, U, A, R) __extension__ ({ \ 3708309124Sdim (__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(__m512i)(A), \ 3709309124Sdim (__v16sf)(__m512)(W), \ 3710309124Sdim (__mmask16)(U), (int)(R)); }) 3711309124Sdim 3712309124Sdim#define _mm512_maskz_cvt_roundepu32_ps(U, A, R) __extension__ ({ \ 3713309124Sdim (__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(__m512i)(A), \ 3714309124Sdim (__v16sf)_mm512_setzero_ps(), \ 3715309124Sdim (__mmask16)(U), (int)(R)); }) 3716309124Sdim 3717309124Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS 3718309124Sdim_mm512_cvtepu32_ps (__m512i __A) 3719309124Sdim{ 3720309124Sdim return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A, 3721309124Sdim (__v16sf) _mm512_undefined_ps (), 3722309124Sdim (__mmask16) -1, 3723309124Sdim _MM_FROUND_CUR_DIRECTION); 3724309124Sdim} 3725309124Sdim 3726309124Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS 3727309124Sdim_mm512_mask_cvtepu32_ps (__m512 __W, __mmask16 __U, __m512i __A) 3728309124Sdim{ 3729309124Sdim return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A, 3730309124Sdim (__v16sf) __W, 3731309124Sdim (__mmask16) __U, 3732309124Sdim _MM_FROUND_CUR_DIRECTION); 3733309124Sdim} 3734309124Sdim 3735309124Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS 3736309124Sdim_mm512_maskz_cvtepu32_ps (__mmask16 __U, __m512i __A) 3737309124Sdim{ 3738309124Sdim return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A, 3739309124Sdim (__v16sf) _mm512_setzero_ps (), 3740309124Sdim (__mmask16) __U, 3741309124Sdim _MM_FROUND_CUR_DIRECTION); 3742309124Sdim} 3743309124Sdim 3744288943Sdimstatic __inline __m512d __DEFAULT_FN_ATTRS 3745277325Sdim_mm512_cvtepi32_pd(__m256i __A) 3746277325Sdim{ 3747314564Sdim return (__m512d)__builtin_convertvector((__v8si)__A, __v8df); 3748277325Sdim} 3749277325Sdim 3750309124Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS 3751309124Sdim_mm512_mask_cvtepi32_pd (__m512d __W, __mmask8 __U, __m256i __A) 3752309124Sdim{ 3753314564Sdim return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U, 3754314564Sdim (__v8df)_mm512_cvtepi32_pd(__A), 3755314564Sdim (__v8df)__W); 3756309124Sdim} 3757309124Sdim 3758309124Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS 3759309124Sdim_mm512_maskz_cvtepi32_pd (__mmask8 __U, __m256i __A) 3760309124Sdim{ 3761314564Sdim return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U, 3762314564Sdim (__v8df)_mm512_cvtepi32_pd(__A), 3763314564Sdim (__v8df)_mm512_setzero_pd()); 3764309124Sdim} 3765309124Sdim 3766314564Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS 3767314564Sdim_mm512_cvtepi32lo_pd(__m512i __A) 3768314564Sdim{ 3769314564Sdim return (__m512d) _mm512_cvtepi32_pd(_mm512_castsi512_si256(__A)); 3770314564Sdim} 3771314564Sdim 3772314564Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS 3773314564Sdim_mm512_mask_cvtepi32lo_pd(__m512d __W, __mmask8 __U,__m512i __A) 3774314564Sdim{ 3775314564Sdim return (__m512d) _mm512_mask_cvtepi32_pd(__W, __U, _mm512_castsi512_si256(__A)); 3776314564Sdim} 3777314564Sdim 3778309124Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS 3779309124Sdim_mm512_cvtepi32_ps (__m512i __A) 3780309124Sdim{ 3781309124Sdim return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A, 3782309124Sdim (__v16sf) _mm512_undefined_ps (), 3783309124Sdim (__mmask16) -1, 3784309124Sdim _MM_FROUND_CUR_DIRECTION); 3785309124Sdim} 3786309124Sdim 3787309124Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS 3788309124Sdim_mm512_mask_cvtepi32_ps (__m512 __W, __mmask16 __U, __m512i __A) 3789309124Sdim{ 3790309124Sdim return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A, 3791309124Sdim (__v16sf) __W, 3792309124Sdim (__mmask16) __U, 3793309124Sdim _MM_FROUND_CUR_DIRECTION); 3794309124Sdim} 3795309124Sdim 3796309124Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS 3797309124Sdim_mm512_maskz_cvtepi32_ps (__mmask16 __U, __m512i __A) 3798309124Sdim{ 3799309124Sdim return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A, 3800309124Sdim (__v16sf) _mm512_setzero_ps (), 3801309124Sdim (__mmask16) __U, 3802309124Sdim _MM_FROUND_CUR_DIRECTION); 3803309124Sdim} 3804309124Sdim 3805288943Sdimstatic __inline __m512d __DEFAULT_FN_ATTRS 3806277325Sdim_mm512_cvtepu32_pd(__m256i __A) 3807277325Sdim{ 3808314564Sdim return (__m512d)__builtin_convertvector((__v8su)__A, __v8df); 3809277325Sdim} 3810277325Sdim 3811309124Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS 3812309124Sdim_mm512_mask_cvtepu32_pd (__m512d __W, __mmask8 __U, __m256i __A) 3813309124Sdim{ 3814314564Sdim return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U, 3815314564Sdim (__v8df)_mm512_cvtepu32_pd(__A), 3816314564Sdim (__v8df)__W); 3817309124Sdim} 3818309124Sdim 3819309124Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS 3820309124Sdim_mm512_maskz_cvtepu32_pd (__mmask8 __U, __m256i __A) 3821309124Sdim{ 3822314564Sdim return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U, 3823314564Sdim (__v8df)_mm512_cvtepu32_pd(__A), 3824314564Sdim (__v8df)_mm512_setzero_pd()); 3825309124Sdim} 3826309124Sdim 3827314564Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS 3828314564Sdim_mm512_cvtepu32lo_pd(__m512i __A) 3829314564Sdim{ 3830314564Sdim return (__m512d) _mm512_cvtepu32_pd(_mm512_castsi512_si256(__A)); 3831314564Sdim} 3832314564Sdim 3833314564Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS 3834314564Sdim_mm512_mask_cvtepu32lo_pd(__m512d __W, __mmask8 __U,__m512i __A) 3835314564Sdim{ 3836314564Sdim return (__m512d) _mm512_mask_cvtepu32_pd(__W, __U, _mm512_castsi512_si256(__A)); 3837314564Sdim} 3838314564Sdim 3839288943Sdim#define _mm512_cvt_roundpd_ps(A, R) __extension__ ({ \ 3840309124Sdim (__m256)__builtin_ia32_cvtpd2ps512_mask((__v8df)(__m512d)(A), \ 3841288943Sdim (__v8sf)_mm256_setzero_ps(), \ 3842309124Sdim (__mmask8)-1, (int)(R)); }) 3843277325Sdim 3844309124Sdim#define _mm512_mask_cvt_roundpd_ps(W, U, A, R) __extension__ ({ \ 3845309124Sdim (__m256)__builtin_ia32_cvtpd2ps512_mask((__v8df)(__m512d)(A), \ 3846309124Sdim (__v8sf)(__m256)(W), (__mmask8)(U), \ 3847309124Sdim (int)(R)); }) 3848309124Sdim 3849309124Sdim#define _mm512_maskz_cvt_roundpd_ps(U, A, R) __extension__ ({ \ 3850309124Sdim (__m256)__builtin_ia32_cvtpd2ps512_mask((__v8df)(__m512d)(A), \ 3851309124Sdim (__v8sf)_mm256_setzero_ps(), \ 3852309124Sdim (__mmask8)(U), (int)(R)); }) 3853309124Sdim 3854309124Sdimstatic __inline__ __m256 __DEFAULT_FN_ATTRS 3855309124Sdim_mm512_cvtpd_ps (__m512d __A) 3856309124Sdim{ 3857309124Sdim return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A, 3858309124Sdim (__v8sf) _mm256_undefined_ps (), 3859309124Sdim (__mmask8) -1, 3860309124Sdim _MM_FROUND_CUR_DIRECTION); 3861309124Sdim} 3862309124Sdim 3863309124Sdimstatic __inline__ __m256 __DEFAULT_FN_ATTRS 3864309124Sdim_mm512_mask_cvtpd_ps (__m256 __W, __mmask8 __U, __m512d __A) 3865309124Sdim{ 3866309124Sdim return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A, 3867309124Sdim (__v8sf) __W, 3868309124Sdim (__mmask8) __U, 3869309124Sdim _MM_FROUND_CUR_DIRECTION); 3870309124Sdim} 3871309124Sdim 3872309124Sdimstatic __inline__ __m256 __DEFAULT_FN_ATTRS 3873309124Sdim_mm512_maskz_cvtpd_ps (__mmask8 __U, __m512d __A) 3874309124Sdim{ 3875309124Sdim return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A, 3876309124Sdim (__v8sf) _mm256_setzero_ps (), 3877309124Sdim (__mmask8) __U, 3878309124Sdim _MM_FROUND_CUR_DIRECTION); 3879309124Sdim} 3880309124Sdim 3881314564Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS 3882314564Sdim_mm512_cvtpd_pslo (__m512d __A) 3883314564Sdim{ 3884314564Sdim return (__m512) __builtin_shufflevector((__v8sf) _mm512_cvtpd_ps(__A), 3885314564Sdim (__v8sf) _mm256_setzero_ps (), 3886314564Sdim 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); 3887314564Sdim} 3888314564Sdim 3889314564Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS 3890314564Sdim_mm512_mask_cvtpd_pslo (__m512 __W, __mmask8 __U,__m512d __A) 3891314564Sdim{ 3892314564Sdim return (__m512) __builtin_shufflevector ( 3893314564Sdim (__v8sf) _mm512_mask_cvtpd_ps (_mm512_castps512_ps256(__W), 3894314564Sdim __U, __A), 3895314564Sdim (__v8sf) _mm256_setzero_ps (), 3896314564Sdim 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); 3897314564Sdim} 3898314564Sdim 3899309124Sdim#define _mm512_cvt_roundps_ph(A, I) __extension__ ({ \ 3900309124Sdim (__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \ 3901309124Sdim (__v16hi)_mm256_undefined_si256(), \ 3902309124Sdim (__mmask16)-1); }) 3903309124Sdim 3904309124Sdim#define _mm512_mask_cvt_roundps_ph(U, W, A, I) __extension__ ({ \ 3905309124Sdim (__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \ 3906309124Sdim (__v16hi)(__m256i)(U), \ 3907309124Sdim (__mmask16)(W)); }) 3908309124Sdim 3909309124Sdim#define _mm512_maskz_cvt_roundps_ph(W, A, I) __extension__ ({ \ 3910309124Sdim (__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \ 3911309124Sdim (__v16hi)_mm256_setzero_si256(), \ 3912309124Sdim (__mmask16)(W)); }) 3913309124Sdim 3914288943Sdim#define _mm512_cvtps_ph(A, I) __extension__ ({ \ 3915309124Sdim (__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \ 3916288943Sdim (__v16hi)_mm256_setzero_si256(), \ 3917309124Sdim (__mmask16)-1); }) 3918288943Sdim 3919309124Sdim#define _mm512_mask_cvtps_ph(U, W, A, I) __extension__ ({ \ 3920309124Sdim (__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \ 3921309124Sdim (__v16hi)(__m256i)(U), \ 3922309124Sdim (__mmask16)(W)); }) 3923309124Sdim 3924309124Sdim#define _mm512_maskz_cvtps_ph(W, A, I) __extension__ ({\ 3925309124Sdim (__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \ 3926309124Sdim (__v16hi)_mm256_setzero_si256(), \ 3927309124Sdim (__mmask16)(W)); }) 3928309124Sdim 3929309124Sdim#define _mm512_cvt_roundph_ps(A, R) __extension__ ({ \ 3930309124Sdim (__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(__m256i)(A), \ 3931309124Sdim (__v16sf)_mm512_undefined_ps(), \ 3932309124Sdim (__mmask16)-1, (int)(R)); }) 3933309124Sdim 3934309124Sdim#define _mm512_mask_cvt_roundph_ps(W, U, A, R) __extension__ ({ \ 3935309124Sdim (__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(__m256i)(A), \ 3936309124Sdim (__v16sf)(__m512)(W), \ 3937309124Sdim (__mmask16)(U), (int)(R)); }) 3938309124Sdim 3939309124Sdim#define _mm512_maskz_cvt_roundph_ps(U, A, R) __extension__ ({ \ 3940309124Sdim (__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(__m256i)(A), \ 3941309124Sdim (__v16sf)_mm512_setzero_ps(), \ 3942309124Sdim (__mmask16)(U), (int)(R)); }) 3943309124Sdim 3944309124Sdim 3945288943Sdimstatic __inline __m512 __DEFAULT_FN_ATTRS 3946277325Sdim_mm512_cvtph_ps(__m256i __A) 3947277325Sdim{ 3948277325Sdim return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A, 3949277325Sdim (__v16sf) 3950277325Sdim _mm512_setzero_ps (), 3951277325Sdim (__mmask16) -1, 3952277325Sdim _MM_FROUND_CUR_DIRECTION); 3953277325Sdim} 3954277325Sdim 3955309124Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS 3956309124Sdim_mm512_mask_cvtph_ps (__m512 __W, __mmask16 __U, __m256i __A) 3957277325Sdim{ 3958309124Sdim return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A, 3959309124Sdim (__v16sf) __W, 3960309124Sdim (__mmask16) __U, 3961309124Sdim _MM_FROUND_CUR_DIRECTION); 3962277325Sdim} 3963277325Sdim 3964309124Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS 3965309124Sdim_mm512_maskz_cvtph_ps (__mmask16 __U, __m256i __A) 3966309124Sdim{ 3967309124Sdim return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A, 3968309124Sdim (__v16sf) _mm512_setzero_ps (), 3969309124Sdim (__mmask16) __U, 3970309124Sdim _MM_FROUND_CUR_DIRECTION); 3971309124Sdim} 3972309124Sdim 3973309124Sdim#define _mm512_cvtt_roundpd_epi32(A, R) __extension__ ({ \ 3974309124Sdim (__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df)(__m512d)(A), \ 3975309124Sdim (__v8si)_mm256_setzero_si256(), \ 3976309124Sdim (__mmask8)-1, (int)(R)); }) 3977309124Sdim 3978309124Sdim#define _mm512_mask_cvtt_roundpd_epi32(W, U, A, R) __extension__ ({ \ 3979309124Sdim (__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df)(__m512d)(A), \ 3980309124Sdim (__v8si)(__m256i)(W), \ 3981309124Sdim (__mmask8)(U), (int)(R)); }) 3982309124Sdim 3983309124Sdim#define _mm512_maskz_cvtt_roundpd_epi32(U, A, R) __extension__ ({ \ 3984309124Sdim (__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df)(__m512d)(A), \ 3985309124Sdim (__v8si)_mm256_setzero_si256(), \ 3986309124Sdim (__mmask8)(U), (int)(R)); }) 3987309124Sdim 3988288943Sdimstatic __inline __m256i __DEFAULT_FN_ATTRS 3989296417Sdim_mm512_cvttpd_epi32(__m512d __a) 3990277325Sdim{ 3991296417Sdim return (__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df) __a, 3992277325Sdim (__v8si)_mm256_setzero_si256(), 3993277325Sdim (__mmask8) -1, 3994277325Sdim _MM_FROUND_CUR_DIRECTION); 3995277325Sdim} 3996277325Sdim 3997309124Sdimstatic __inline__ __m256i __DEFAULT_FN_ATTRS 3998309124Sdim_mm512_mask_cvttpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A) 3999309124Sdim{ 4000309124Sdim return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A, 4001309124Sdim (__v8si) __W, 4002309124Sdim (__mmask8) __U, 4003309124Sdim _MM_FROUND_CUR_DIRECTION); 4004309124Sdim} 4005277325Sdim 4006309124Sdimstatic __inline__ __m256i __DEFAULT_FN_ATTRS 4007309124Sdim_mm512_maskz_cvttpd_epi32 (__mmask8 __U, __m512d __A) 4008309124Sdim{ 4009309124Sdim return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A, 4010309124Sdim (__v8si) _mm256_setzero_si256 (), 4011309124Sdim (__mmask8) __U, 4012309124Sdim _MM_FROUND_CUR_DIRECTION); 4013309124Sdim} 4014309124Sdim 4015288943Sdim#define _mm512_cvtt_roundps_epi32(A, R) __extension__ ({ \ 4016309124Sdim (__m512i)__builtin_ia32_cvttps2dq512_mask((__v16sf)(__m512)(A), \ 4017288943Sdim (__v16si)_mm512_setzero_si512(), \ 4018309124Sdim (__mmask16)-1, (int)(R)); }) 4019277325Sdim 4020309124Sdim#define _mm512_mask_cvtt_roundps_epi32(W, U, A, R) __extension__ ({ \ 4021309124Sdim (__m512i)__builtin_ia32_cvttps2dq512_mask((__v16sf)(__m512)(A), \ 4022309124Sdim (__v16si)(__m512i)(W), \ 4023309124Sdim (__mmask16)(U), (int)(R)); }) 4024309124Sdim 4025309124Sdim#define _mm512_maskz_cvtt_roundps_epi32(U, A, R) __extension__ ({ \ 4026309124Sdim (__m512i)__builtin_ia32_cvttps2dq512_mask((__v16sf)(__m512)(A), \ 4027309124Sdim (__v16si)_mm512_setzero_si512(), \ 4028309124Sdim (__mmask16)(U), (int)(R)); }) 4029309124Sdim 4030309124Sdimstatic __inline __m512i __DEFAULT_FN_ATTRS 4031309124Sdim_mm512_cvttps_epi32(__m512 __a) 4032309124Sdim{ 4033309124Sdim return (__m512i) 4034309124Sdim __builtin_ia32_cvttps2dq512_mask((__v16sf) __a, 4035309124Sdim (__v16si) _mm512_setzero_si512 (), 4036309124Sdim (__mmask16) -1, _MM_FROUND_CUR_DIRECTION); 4037309124Sdim} 4038309124Sdim 4039309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 4040309124Sdim_mm512_mask_cvttps_epi32 (__m512i __W, __mmask16 __U, __m512 __A) 4041309124Sdim{ 4042309124Sdim return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A, 4043309124Sdim (__v16si) __W, 4044309124Sdim (__mmask16) __U, 4045309124Sdim _MM_FROUND_CUR_DIRECTION); 4046309124Sdim} 4047309124Sdim 4048309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 4049309124Sdim_mm512_maskz_cvttps_epi32 (__mmask16 __U, __m512 __A) 4050309124Sdim{ 4051309124Sdim return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A, 4052309124Sdim (__v16si) _mm512_setzero_si512 (), 4053309124Sdim (__mmask16) __U, 4054309124Sdim _MM_FROUND_CUR_DIRECTION); 4055309124Sdim} 4056309124Sdim 4057288943Sdim#define _mm512_cvt_roundps_epi32(A, R) __extension__ ({ \ 4058309124Sdim (__m512i)__builtin_ia32_cvtps2dq512_mask((__v16sf)(__m512)(A), \ 4059288943Sdim (__v16si)_mm512_setzero_si512(), \ 4060309124Sdim (__mmask16)-1, (int)(R)); }) 4061288943Sdim 4062309124Sdim#define _mm512_mask_cvt_roundps_epi32(W, U, A, R) __extension__ ({ \ 4063309124Sdim (__m512i)__builtin_ia32_cvtps2dq512_mask((__v16sf)(__m512)(A), \ 4064309124Sdim (__v16si)(__m512i)(W), \ 4065309124Sdim (__mmask16)(U), (int)(R)); }) 4066309124Sdim 4067309124Sdim#define _mm512_maskz_cvt_roundps_epi32(U, A, R) __extension__ ({ \ 4068309124Sdim (__m512i)__builtin_ia32_cvtps2dq512_mask((__v16sf)(__m512)(A), \ 4069309124Sdim (__v16si)_mm512_setzero_si512(), \ 4070309124Sdim (__mmask16)(U), (int)(R)); }) 4071309124Sdim 4072309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 4073309124Sdim_mm512_cvtps_epi32 (__m512 __A) 4074309124Sdim{ 4075309124Sdim return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A, 4076309124Sdim (__v16si) _mm512_undefined_epi32 (), 4077309124Sdim (__mmask16) -1, 4078309124Sdim _MM_FROUND_CUR_DIRECTION); 4079309124Sdim} 4080309124Sdim 4081309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 4082309124Sdim_mm512_mask_cvtps_epi32 (__m512i __W, __mmask16 __U, __m512 __A) 4083309124Sdim{ 4084309124Sdim return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A, 4085309124Sdim (__v16si) __W, 4086309124Sdim (__mmask16) __U, 4087309124Sdim _MM_FROUND_CUR_DIRECTION); 4088309124Sdim} 4089309124Sdim 4090309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 4091309124Sdim_mm512_maskz_cvtps_epi32 (__mmask16 __U, __m512 __A) 4092309124Sdim{ 4093309124Sdim return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A, 4094309124Sdim (__v16si) 4095309124Sdim _mm512_setzero_si512 (), 4096309124Sdim (__mmask16) __U, 4097309124Sdim _MM_FROUND_CUR_DIRECTION); 4098309124Sdim} 4099309124Sdim 4100288943Sdim#define _mm512_cvt_roundpd_epi32(A, R) __extension__ ({ \ 4101309124Sdim (__m256i)__builtin_ia32_cvtpd2dq512_mask((__v8df)(__m512d)(A), \ 4102288943Sdim (__v8si)_mm256_setzero_si256(), \ 4103309124Sdim (__mmask8)-1, (int)(R)); }) 4104288943Sdim 4105309124Sdim#define _mm512_mask_cvt_roundpd_epi32(W, U, A, R) __extension__ ({ \ 4106309124Sdim (__m256i)__builtin_ia32_cvtpd2dq512_mask((__v8df)(__m512d)(A), \ 4107309124Sdim (__v8si)(__m256i)(W), \ 4108309124Sdim (__mmask8)(U), (int)(R)); }) 4109309124Sdim 4110309124Sdim#define _mm512_maskz_cvt_roundpd_epi32(U, A, R) __extension__ ({ \ 4111309124Sdim (__m256i)__builtin_ia32_cvtpd2dq512_mask((__v8df)(__m512d)(A), \ 4112309124Sdim (__v8si)_mm256_setzero_si256(), \ 4113309124Sdim (__mmask8)(U), (int)(R)); }) 4114309124Sdim 4115309124Sdimstatic __inline__ __m256i __DEFAULT_FN_ATTRS 4116309124Sdim_mm512_cvtpd_epi32 (__m512d __A) 4117309124Sdim{ 4118309124Sdim return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A, 4119309124Sdim (__v8si) 4120309124Sdim _mm256_undefined_si256 (), 4121309124Sdim (__mmask8) -1, 4122309124Sdim _MM_FROUND_CUR_DIRECTION); 4123309124Sdim} 4124309124Sdim 4125309124Sdimstatic __inline__ __m256i __DEFAULT_FN_ATTRS 4126309124Sdim_mm512_mask_cvtpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A) 4127309124Sdim{ 4128309124Sdim return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A, 4129309124Sdim (__v8si) __W, 4130309124Sdim (__mmask8) __U, 4131309124Sdim _MM_FROUND_CUR_DIRECTION); 4132309124Sdim} 4133309124Sdim 4134309124Sdimstatic __inline__ __m256i __DEFAULT_FN_ATTRS 4135309124Sdim_mm512_maskz_cvtpd_epi32 (__mmask8 __U, __m512d __A) 4136309124Sdim{ 4137309124Sdim return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A, 4138309124Sdim (__v8si) 4139309124Sdim _mm256_setzero_si256 (), 4140309124Sdim (__mmask8) __U, 4141309124Sdim _MM_FROUND_CUR_DIRECTION); 4142309124Sdim} 4143309124Sdim 4144288943Sdim#define _mm512_cvt_roundps_epu32(A, R) __extension__ ({ \ 4145309124Sdim (__m512i)__builtin_ia32_cvtps2udq512_mask((__v16sf)(__m512)(A), \ 4146288943Sdim (__v16si)_mm512_setzero_si512(), \ 4147309124Sdim (__mmask16)-1, (int)(R)); }) 4148288943Sdim 4149309124Sdim#define _mm512_mask_cvt_roundps_epu32(W, U, A, R) __extension__ ({ \ 4150309124Sdim (__m512i)__builtin_ia32_cvtps2udq512_mask((__v16sf)(__m512)(A), \ 4151309124Sdim (__v16si)(__m512i)(W), \ 4152309124Sdim (__mmask16)(U), (int)(R)); }) 4153309124Sdim 4154309124Sdim#define _mm512_maskz_cvt_roundps_epu32(U, A, R) __extension__ ({ \ 4155309124Sdim (__m512i)__builtin_ia32_cvtps2udq512_mask((__v16sf)(__m512)(A), \ 4156309124Sdim (__v16si)_mm512_setzero_si512(), \ 4157309124Sdim (__mmask16)(U), (int)(R)); }) 4158309124Sdim 4159309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 4160309124Sdim_mm512_cvtps_epu32 ( __m512 __A) 4161309124Sdim{ 4162309124Sdim return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,\ 4163309124Sdim (__v16si)\ 4164309124Sdim _mm512_undefined_epi32 (),\ 4165309124Sdim (__mmask16) -1,\ 4166309124Sdim _MM_FROUND_CUR_DIRECTION);\ 4167309124Sdim} 4168309124Sdim 4169309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 4170309124Sdim_mm512_mask_cvtps_epu32 (__m512i __W, __mmask16 __U, __m512 __A) 4171309124Sdim{ 4172309124Sdim return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A, 4173309124Sdim (__v16si) __W, 4174309124Sdim (__mmask16) __U, 4175309124Sdim _MM_FROUND_CUR_DIRECTION); 4176309124Sdim} 4177309124Sdim 4178309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 4179309124Sdim_mm512_maskz_cvtps_epu32 ( __mmask16 __U, __m512 __A) 4180309124Sdim{ 4181309124Sdim return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A, 4182309124Sdim (__v16si) 4183309124Sdim _mm512_setzero_si512 (), 4184309124Sdim (__mmask16) __U , 4185309124Sdim _MM_FROUND_CUR_DIRECTION); 4186309124Sdim} 4187309124Sdim 4188288943Sdim#define _mm512_cvt_roundpd_epu32(A, R) __extension__ ({ \ 4189309124Sdim (__m256i)__builtin_ia32_cvtpd2udq512_mask((__v8df)(__m512d)(A), \ 4190288943Sdim (__v8si)_mm256_setzero_si256(), \ 4191309124Sdim (__mmask8)-1, (int)(R)); }) 4192288943Sdim 4193309124Sdim#define _mm512_mask_cvt_roundpd_epu32(W, U, A, R) __extension__ ({ \ 4194309124Sdim (__m256i)__builtin_ia32_cvtpd2udq512_mask((__v8df)(__m512d)(A), \ 4195309124Sdim (__v8si)(W), \ 4196309124Sdim (__mmask8)(U), (int)(R)); }) 4197309124Sdim 4198309124Sdim#define _mm512_maskz_cvt_roundpd_epu32(U, A, R) __extension__ ({ \ 4199309124Sdim (__m256i)__builtin_ia32_cvtpd2udq512_mask((__v8df)(__m512d)(A), \ 4200309124Sdim (__v8si)_mm256_setzero_si256(), \ 4201309124Sdim (__mmask8)(U), (int)(R)); }) 4202309124Sdim 4203309124Sdimstatic __inline__ __m256i __DEFAULT_FN_ATTRS 4204309124Sdim_mm512_cvtpd_epu32 (__m512d __A) 4205309124Sdim{ 4206309124Sdim return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A, 4207309124Sdim (__v8si) 4208309124Sdim _mm256_undefined_si256 (), 4209309124Sdim (__mmask8) -1, 4210309124Sdim _MM_FROUND_CUR_DIRECTION); 4211309124Sdim} 4212309124Sdim 4213309124Sdimstatic __inline__ __m256i __DEFAULT_FN_ATTRS 4214309124Sdim_mm512_mask_cvtpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A) 4215309124Sdim{ 4216309124Sdim return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A, 4217309124Sdim (__v8si) __W, 4218309124Sdim (__mmask8) __U, 4219309124Sdim _MM_FROUND_CUR_DIRECTION); 4220309124Sdim} 4221309124Sdim 4222309124Sdimstatic __inline__ __m256i __DEFAULT_FN_ATTRS 4223309124Sdim_mm512_maskz_cvtpd_epu32 (__mmask8 __U, __m512d __A) 4224309124Sdim{ 4225309124Sdim return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A, 4226309124Sdim (__v8si) 4227309124Sdim _mm256_setzero_si256 (), 4228309124Sdim (__mmask8) __U, 4229309124Sdim _MM_FROUND_CUR_DIRECTION); 4230309124Sdim} 4231309124Sdim 4232277325Sdim/* Unpack and Interleave */ 4233309124Sdim 4234288943Sdimstatic __inline __m512d __DEFAULT_FN_ATTRS 4235277325Sdim_mm512_unpackhi_pd(__m512d __a, __m512d __b) 4236277325Sdim{ 4237309124Sdim return (__m512d)__builtin_shufflevector((__v8df)__a, (__v8df)__b, 4238309124Sdim 1, 9, 1+2, 9+2, 1+4, 9+4, 1+6, 9+6); 4239277325Sdim} 4240277325Sdim 4241309124Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS 4242309124Sdim_mm512_mask_unpackhi_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) 4243309124Sdim{ 4244309124Sdim return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U, 4245309124Sdim (__v8df)_mm512_unpackhi_pd(__A, __B), 4246309124Sdim (__v8df)__W); 4247309124Sdim} 4248309124Sdim 4249309124Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS 4250309124Sdim_mm512_maskz_unpackhi_pd(__mmask8 __U, __m512d __A, __m512d __B) 4251309124Sdim{ 4252309124Sdim return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U, 4253309124Sdim (__v8df)_mm512_unpackhi_pd(__A, __B), 4254309124Sdim (__v8df)_mm512_setzero_pd()); 4255309124Sdim} 4256309124Sdim 4257288943Sdimstatic __inline __m512d __DEFAULT_FN_ATTRS 4258277325Sdim_mm512_unpacklo_pd(__m512d __a, __m512d __b) 4259277325Sdim{ 4260309124Sdim return (__m512d)__builtin_shufflevector((__v8df)__a, (__v8df)__b, 4261309124Sdim 0, 8, 0+2, 8+2, 0+4, 8+4, 0+6, 8+6); 4262277325Sdim} 4263277325Sdim 4264309124Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS 4265309124Sdim_mm512_mask_unpacklo_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) 4266309124Sdim{ 4267309124Sdim return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U, 4268309124Sdim (__v8df)_mm512_unpacklo_pd(__A, __B), 4269309124Sdim (__v8df)__W); 4270309124Sdim} 4271309124Sdim 4272309124Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS 4273309124Sdim_mm512_maskz_unpacklo_pd (__mmask8 __U, __m512d __A, __m512d __B) 4274309124Sdim{ 4275309124Sdim return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U, 4276309124Sdim (__v8df)_mm512_unpacklo_pd(__A, __B), 4277309124Sdim (__v8df)_mm512_setzero_pd()); 4278309124Sdim} 4279309124Sdim 4280288943Sdimstatic __inline __m512 __DEFAULT_FN_ATTRS 4281277325Sdim_mm512_unpackhi_ps(__m512 __a, __m512 __b) 4282277325Sdim{ 4283309124Sdim return (__m512)__builtin_shufflevector((__v16sf)__a, (__v16sf)__b, 4284309124Sdim 2, 18, 3, 19, 4285309124Sdim 2+4, 18+4, 3+4, 19+4, 4286309124Sdim 2+8, 18+8, 3+8, 19+8, 4287309124Sdim 2+12, 18+12, 3+12, 19+12); 4288277325Sdim} 4289277325Sdim 4290309124Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS 4291309124Sdim_mm512_mask_unpackhi_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) 4292309124Sdim{ 4293309124Sdim return (__m512)__builtin_ia32_selectps_512((__mmask16) __U, 4294309124Sdim (__v16sf)_mm512_unpackhi_ps(__A, __B), 4295309124Sdim (__v16sf)__W); 4296309124Sdim} 4297309124Sdim 4298309124Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS 4299309124Sdim_mm512_maskz_unpackhi_ps (__mmask16 __U, __m512 __A, __m512 __B) 4300309124Sdim{ 4301309124Sdim return (__m512)__builtin_ia32_selectps_512((__mmask16) __U, 4302309124Sdim (__v16sf)_mm512_unpackhi_ps(__A, __B), 4303309124Sdim (__v16sf)_mm512_setzero_ps()); 4304309124Sdim} 4305309124Sdim 4306288943Sdimstatic __inline __m512 __DEFAULT_FN_ATTRS 4307277325Sdim_mm512_unpacklo_ps(__m512 __a, __m512 __b) 4308277325Sdim{ 4309309124Sdim return (__m512)__builtin_shufflevector((__v16sf)__a, (__v16sf)__b, 4310309124Sdim 0, 16, 1, 17, 4311309124Sdim 0+4, 16+4, 1+4, 17+4, 4312309124Sdim 0+8, 16+8, 1+8, 17+8, 4313309124Sdim 0+12, 16+12, 1+12, 17+12); 4314277325Sdim} 4315277325Sdim 4316309124Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS 4317309124Sdim_mm512_mask_unpacklo_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) 4318309124Sdim{ 4319309124Sdim return (__m512)__builtin_ia32_selectps_512((__mmask16) __U, 4320309124Sdim (__v16sf)_mm512_unpacklo_ps(__A, __B), 4321309124Sdim (__v16sf)__W); 4322309124Sdim} 4323309124Sdim 4324309124Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS 4325309124Sdim_mm512_maskz_unpacklo_ps (__mmask16 __U, __m512 __A, __m512 __B) 4326309124Sdim{ 4327309124Sdim return (__m512)__builtin_ia32_selectps_512((__mmask16) __U, 4328309124Sdim (__v16sf)_mm512_unpacklo_ps(__A, __B), 4329309124Sdim (__v16sf)_mm512_setzero_ps()); 4330309124Sdim} 4331309124Sdim 4332309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 4333309124Sdim_mm512_unpackhi_epi32(__m512i __A, __m512i __B) 4334309124Sdim{ 4335309124Sdim return (__m512i)__builtin_shufflevector((__v16si)__A, (__v16si)__B, 4336309124Sdim 2, 18, 3, 19, 4337309124Sdim 2+4, 18+4, 3+4, 19+4, 4338309124Sdim 2+8, 18+8, 3+8, 19+8, 4339309124Sdim 2+12, 18+12, 3+12, 19+12); 4340309124Sdim} 4341309124Sdim 4342309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 4343309124Sdim_mm512_mask_unpackhi_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B) 4344309124Sdim{ 4345309124Sdim return (__m512i)__builtin_ia32_selectd_512((__mmask16) __U, 4346309124Sdim (__v16si)_mm512_unpackhi_epi32(__A, __B), 4347309124Sdim (__v16si)__W); 4348309124Sdim} 4349309124Sdim 4350309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 4351309124Sdim_mm512_maskz_unpackhi_epi32(__mmask16 __U, __m512i __A, __m512i __B) 4352309124Sdim{ 4353309124Sdim return (__m512i)__builtin_ia32_selectd_512((__mmask16) __U, 4354309124Sdim (__v16si)_mm512_unpackhi_epi32(__A, __B), 4355309124Sdim (__v16si)_mm512_setzero_si512()); 4356309124Sdim} 4357309124Sdim 4358309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 4359309124Sdim_mm512_unpacklo_epi32(__m512i __A, __m512i __B) 4360309124Sdim{ 4361309124Sdim return (__m512i)__builtin_shufflevector((__v16si)__A, (__v16si)__B, 4362309124Sdim 0, 16, 1, 17, 4363309124Sdim 0+4, 16+4, 1+4, 17+4, 4364309124Sdim 0+8, 16+8, 1+8, 17+8, 4365309124Sdim 0+12, 16+12, 1+12, 17+12); 4366309124Sdim} 4367309124Sdim 4368309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 4369309124Sdim_mm512_mask_unpacklo_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B) 4370309124Sdim{ 4371309124Sdim return (__m512i)__builtin_ia32_selectd_512((__mmask16) __U, 4372309124Sdim (__v16si)_mm512_unpacklo_epi32(__A, __B), 4373309124Sdim (__v16si)__W); 4374309124Sdim} 4375309124Sdim 4376309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 4377309124Sdim_mm512_maskz_unpacklo_epi32(__mmask16 __U, __m512i __A, __m512i __B) 4378309124Sdim{ 4379309124Sdim return (__m512i)__builtin_ia32_selectd_512((__mmask16) __U, 4380309124Sdim (__v16si)_mm512_unpacklo_epi32(__A, __B), 4381309124Sdim (__v16si)_mm512_setzero_si512()); 4382309124Sdim} 4383309124Sdim 4384309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 4385309124Sdim_mm512_unpackhi_epi64(__m512i __A, __m512i __B) 4386309124Sdim{ 4387309124Sdim return (__m512i)__builtin_shufflevector((__v8di)__A, (__v8di)__B, 4388309124Sdim 1, 9, 1+2, 9+2, 1+4, 9+4, 1+6, 9+6); 4389309124Sdim} 4390309124Sdim 4391309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 4392309124Sdim_mm512_mask_unpackhi_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) 4393309124Sdim{ 4394309124Sdim return (__m512i)__builtin_ia32_selectq_512((__mmask8) __U, 4395309124Sdim (__v8di)_mm512_unpackhi_epi64(__A, __B), 4396309124Sdim (__v8di)__W); 4397309124Sdim} 4398309124Sdim 4399309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 4400309124Sdim_mm512_maskz_unpackhi_epi64(__mmask8 __U, __m512i __A, __m512i __B) 4401309124Sdim{ 4402309124Sdim return (__m512i)__builtin_ia32_selectq_512((__mmask8) __U, 4403309124Sdim (__v8di)_mm512_unpackhi_epi64(__A, __B), 4404309124Sdim (__v8di)_mm512_setzero_si512()); 4405309124Sdim} 4406309124Sdim 4407309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 4408309124Sdim_mm512_unpacklo_epi64 (__m512i __A, __m512i __B) 4409309124Sdim{ 4410309124Sdim return (__m512i)__builtin_shufflevector((__v8di)__A, (__v8di)__B, 4411309124Sdim 0, 8, 0+2, 8+2, 0+4, 8+4, 0+6, 8+6); 4412309124Sdim} 4413309124Sdim 4414309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 4415309124Sdim_mm512_mask_unpacklo_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) 4416309124Sdim{ 4417309124Sdim return (__m512i)__builtin_ia32_selectq_512((__mmask8) __U, 4418309124Sdim (__v8di)_mm512_unpacklo_epi64(__A, __B), 4419309124Sdim (__v8di)__W); 4420309124Sdim} 4421309124Sdim 4422309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 4423309124Sdim_mm512_maskz_unpacklo_epi64 (__mmask8 __U, __m512i __A, __m512i __B) 4424309124Sdim{ 4425309124Sdim return (__m512i)__builtin_ia32_selectq_512((__mmask8) __U, 4426309124Sdim (__v8di)_mm512_unpacklo_epi64(__A, __B), 4427309124Sdim (__v8di)_mm512_setzero_si512()); 4428309124Sdim} 4429309124Sdim 4430277325Sdim/* Bit Test */ 4431277325Sdim 4432288943Sdimstatic __inline __mmask16 __DEFAULT_FN_ATTRS 4433277325Sdim_mm512_test_epi32_mask(__m512i __A, __m512i __B) 4434277325Sdim{ 4435277325Sdim return (__mmask16) __builtin_ia32_ptestmd512 ((__v16si) __A, 4436277325Sdim (__v16si) __B, 4437277325Sdim (__mmask16) -1); 4438277325Sdim} 4439277325Sdim 4440309124Sdimstatic __inline__ __mmask16 __DEFAULT_FN_ATTRS 4441309124Sdim_mm512_mask_test_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B) 4442309124Sdim{ 4443309124Sdim return (__mmask16) __builtin_ia32_ptestmd512 ((__v16si) __A, 4444309124Sdim (__v16si) __B, __U); 4445309124Sdim} 4446309124Sdim 4447288943Sdimstatic __inline __mmask8 __DEFAULT_FN_ATTRS 4448277325Sdim_mm512_test_epi64_mask(__m512i __A, __m512i __B) 4449277325Sdim{ 4450277325Sdim return (__mmask8) __builtin_ia32_ptestmq512 ((__v8di) __A, 4451277325Sdim (__v8di) __B, 4452277325Sdim (__mmask8) -1); 4453277325Sdim} 4454277325Sdim 4455309124Sdimstatic __inline__ __mmask8 __DEFAULT_FN_ATTRS 4456309124Sdim_mm512_mask_test_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B) 4457309124Sdim{ 4458309124Sdim return (__mmask8) __builtin_ia32_ptestmq512 ((__v8di) __A, (__v8di) __B, __U); 4459309124Sdim} 4460309124Sdim 4461309124Sdim 4462277325Sdim/* SIMD load ops */ 4463277325Sdim 4464288943Sdimstatic __inline __m512i __DEFAULT_FN_ATTRS 4465309124Sdim_mm512_loadu_si512 (void const *__P) 4466309124Sdim{ 4467309124Sdim return (__m512i) __builtin_ia32_loaddqusi512_mask ((const int *) __P, 4468309124Sdim (__v16si) 4469309124Sdim _mm512_setzero_si512 (), 4470309124Sdim (__mmask16) -1); 4471309124Sdim} 4472309124Sdim 4473309124Sdimstatic __inline __m512i __DEFAULT_FN_ATTRS 4474309124Sdim_mm512_mask_loadu_epi32 (__m512i __W, __mmask16 __U, void const *__P) 4475309124Sdim{ 4476309124Sdim return (__m512i) __builtin_ia32_loaddqusi512_mask ((const int *) __P, 4477309124Sdim (__v16si) __W, 4478309124Sdim (__mmask16) __U); 4479309124Sdim} 4480309124Sdim 4481309124Sdim 4482309124Sdimstatic __inline __m512i __DEFAULT_FN_ATTRS 4483277325Sdim_mm512_maskz_loadu_epi32(__mmask16 __U, void const *__P) 4484277325Sdim{ 4485309124Sdim return (__m512i) __builtin_ia32_loaddqusi512_mask ((const int *)__P, 4486277325Sdim (__v16si) 4487277325Sdim _mm512_setzero_si512 (), 4488277325Sdim (__mmask16) __U); 4489277325Sdim} 4490277325Sdim 4491288943Sdimstatic __inline __m512i __DEFAULT_FN_ATTRS 4492309124Sdim_mm512_mask_loadu_epi64 (__m512i __W, __mmask8 __U, void const *__P) 4493309124Sdim{ 4494309124Sdim return (__m512i) __builtin_ia32_loaddqudi512_mask ((const long long *) __P, 4495309124Sdim (__v8di) __W, 4496309124Sdim (__mmask8) __U); 4497309124Sdim} 4498309124Sdim 4499309124Sdimstatic __inline __m512i __DEFAULT_FN_ATTRS 4500277325Sdim_mm512_maskz_loadu_epi64(__mmask8 __U, void const *__P) 4501277325Sdim{ 4502309124Sdim return (__m512i) __builtin_ia32_loaddqudi512_mask ((const long long *)__P, 4503277325Sdim (__v8di) 4504277325Sdim _mm512_setzero_si512 (), 4505277325Sdim (__mmask8) __U); 4506277325Sdim} 4507277325Sdim 4508288943Sdimstatic __inline __m512 __DEFAULT_FN_ATTRS 4509309124Sdim_mm512_mask_loadu_ps (__m512 __W, __mmask16 __U, void const *__P) 4510309124Sdim{ 4511309124Sdim return (__m512) __builtin_ia32_loadups512_mask ((const float *) __P, 4512309124Sdim (__v16sf) __W, 4513309124Sdim (__mmask16) __U); 4514309124Sdim} 4515309124Sdim 4516309124Sdimstatic __inline __m512 __DEFAULT_FN_ATTRS 4517277325Sdim_mm512_maskz_loadu_ps(__mmask16 __U, void const *__P) 4518277325Sdim{ 4519309124Sdim return (__m512) __builtin_ia32_loadups512_mask ((const float *)__P, 4520277325Sdim (__v16sf) 4521277325Sdim _mm512_setzero_ps (), 4522277325Sdim (__mmask16) __U); 4523277325Sdim} 4524277325Sdim 4525288943Sdimstatic __inline __m512d __DEFAULT_FN_ATTRS 4526309124Sdim_mm512_mask_loadu_pd (__m512d __W, __mmask8 __U, void const *__P) 4527277325Sdim{ 4528309124Sdim return (__m512d) __builtin_ia32_loadupd512_mask ((const double *) __P, 4529309124Sdim (__v8df) __W, 4530309124Sdim (__mmask8) __U); 4531277325Sdim} 4532277325Sdim 4533288943Sdimstatic __inline __m512d __DEFAULT_FN_ATTRS 4534309124Sdim_mm512_maskz_loadu_pd(__mmask8 __U, void const *__P) 4535288943Sdim{ 4536309124Sdim return (__m512d) __builtin_ia32_loadupd512_mask ((const double *)__P, 4537288943Sdim (__v8df) 4538288943Sdim _mm512_setzero_pd (), 4539288943Sdim (__mmask8) __U); 4540288943Sdim} 4541288943Sdim 4542288943Sdimstatic __inline __m512d __DEFAULT_FN_ATTRS 4543277325Sdim_mm512_loadu_pd(double const *__p) 4544277325Sdim{ 4545277325Sdim struct __loadu_pd { 4546277325Sdim __m512d __v; 4547288943Sdim } __attribute__((__packed__, __may_alias__)); 4548277325Sdim return ((struct __loadu_pd*)__p)->__v; 4549277325Sdim} 4550277325Sdim 4551288943Sdimstatic __inline __m512 __DEFAULT_FN_ATTRS 4552277325Sdim_mm512_loadu_ps(float const *__p) 4553277325Sdim{ 4554277325Sdim struct __loadu_ps { 4555277325Sdim __m512 __v; 4556288943Sdim } __attribute__((__packed__, __may_alias__)); 4557277325Sdim return ((struct __loadu_ps*)__p)->__v; 4558277325Sdim} 4559277325Sdim 4560288943Sdimstatic __inline __m512 __DEFAULT_FN_ATTRS 4561309124Sdim_mm512_load_ps(float const *__p) 4562288943Sdim{ 4563288943Sdim return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *)__p, 4564288943Sdim (__v16sf) 4565288943Sdim _mm512_setzero_ps (), 4566288943Sdim (__mmask16) -1); 4567288943Sdim} 4568288943Sdim 4569309124Sdimstatic __inline __m512 __DEFAULT_FN_ATTRS 4570309124Sdim_mm512_mask_load_ps (__m512 __W, __mmask16 __U, void const *__P) 4571309124Sdim{ 4572309124Sdim return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *) __P, 4573309124Sdim (__v16sf) __W, 4574309124Sdim (__mmask16) __U); 4575309124Sdim} 4576309124Sdim 4577309124Sdimstatic __inline __m512 __DEFAULT_FN_ATTRS 4578309124Sdim_mm512_maskz_load_ps(__mmask16 __U, void const *__P) 4579309124Sdim{ 4580309124Sdim return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *)__P, 4581309124Sdim (__v16sf) 4582309124Sdim _mm512_setzero_ps (), 4583309124Sdim (__mmask16) __U); 4584309124Sdim} 4585309124Sdim 4586288943Sdimstatic __inline __m512d __DEFAULT_FN_ATTRS 4587309124Sdim_mm512_load_pd(double const *__p) 4588288943Sdim{ 4589288943Sdim return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df *)__p, 4590288943Sdim (__v8df) 4591288943Sdim _mm512_setzero_pd (), 4592288943Sdim (__mmask8) -1); 4593288943Sdim} 4594288943Sdim 4595309124Sdimstatic __inline __m512d __DEFAULT_FN_ATTRS 4596309124Sdim_mm512_mask_load_pd (__m512d __W, __mmask8 __U, void const *__P) 4597309124Sdim{ 4598309124Sdim return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df *) __P, 4599309124Sdim (__v8df) __W, 4600309124Sdim (__mmask8) __U); 4601309124Sdim} 4602309124Sdim 4603309124Sdimstatic __inline __m512d __DEFAULT_FN_ATTRS 4604309124Sdim_mm512_maskz_load_pd(__mmask8 __U, void const *__P) 4605309124Sdim{ 4606309124Sdim return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df *)__P, 4607309124Sdim (__v8df) 4608309124Sdim _mm512_setzero_pd (), 4609309124Sdim (__mmask8) __U); 4610309124Sdim} 4611309124Sdim 4612309124Sdimstatic __inline __m512i __DEFAULT_FN_ATTRS 4613309124Sdim_mm512_load_si512 (void const *__P) 4614309124Sdim{ 4615309124Sdim return *(__m512i *) __P; 4616309124Sdim} 4617309124Sdim 4618309124Sdimstatic __inline __m512i __DEFAULT_FN_ATTRS 4619309124Sdim_mm512_load_epi32 (void const *__P) 4620309124Sdim{ 4621309124Sdim return *(__m512i *) __P; 4622309124Sdim} 4623309124Sdim 4624309124Sdimstatic __inline __m512i __DEFAULT_FN_ATTRS 4625309124Sdim_mm512_load_epi64 (void const *__P) 4626309124Sdim{ 4627309124Sdim return *(__m512i *) __P; 4628309124Sdim} 4629309124Sdim 4630277325Sdim/* SIMD store ops */ 4631277325Sdim 4632288943Sdimstatic __inline void __DEFAULT_FN_ATTRS 4633277325Sdim_mm512_mask_storeu_epi64(void *__P, __mmask8 __U, __m512i __A) 4634277325Sdim{ 4635309124Sdim __builtin_ia32_storedqudi512_mask ((long long *)__P, (__v8di) __A, 4636277325Sdim (__mmask8) __U); 4637277325Sdim} 4638277325Sdim 4639288943Sdimstatic __inline void __DEFAULT_FN_ATTRS 4640309124Sdim_mm512_storeu_si512 (void *__P, __m512i __A) 4641309124Sdim{ 4642309124Sdim __builtin_ia32_storedqusi512_mask ((int *) __P, (__v16si) __A, 4643309124Sdim (__mmask16) -1); 4644309124Sdim} 4645309124Sdim 4646309124Sdimstatic __inline void __DEFAULT_FN_ATTRS 4647277325Sdim_mm512_mask_storeu_epi32(void *__P, __mmask16 __U, __m512i __A) 4648277325Sdim{ 4649309124Sdim __builtin_ia32_storedqusi512_mask ((int *)__P, (__v16si) __A, 4650277325Sdim (__mmask16) __U); 4651277325Sdim} 4652277325Sdim 4653288943Sdimstatic __inline void __DEFAULT_FN_ATTRS 4654277325Sdim_mm512_mask_storeu_pd(void *__P, __mmask8 __U, __m512d __A) 4655277325Sdim{ 4656309124Sdim __builtin_ia32_storeupd512_mask ((double *)__P, (__v8df) __A, (__mmask8) __U); 4657277325Sdim} 4658277325Sdim 4659288943Sdimstatic __inline void __DEFAULT_FN_ATTRS 4660277325Sdim_mm512_storeu_pd(void *__P, __m512d __A) 4661277325Sdim{ 4662309124Sdim __builtin_ia32_storeupd512_mask((double *)__P, (__v8df)__A, (__mmask8)-1); 4663277325Sdim} 4664277325Sdim 4665288943Sdimstatic __inline void __DEFAULT_FN_ATTRS 4666277325Sdim_mm512_mask_storeu_ps(void *__P, __mmask16 __U, __m512 __A) 4667277325Sdim{ 4668309124Sdim __builtin_ia32_storeups512_mask ((float *)__P, (__v16sf) __A, 4669277325Sdim (__mmask16) __U); 4670277325Sdim} 4671277325Sdim 4672288943Sdimstatic __inline void __DEFAULT_FN_ATTRS 4673277325Sdim_mm512_storeu_ps(void *__P, __m512 __A) 4674277325Sdim{ 4675309124Sdim __builtin_ia32_storeups512_mask((float *)__P, (__v16sf)__A, (__mmask16)-1); 4676277325Sdim} 4677277325Sdim 4678288943Sdimstatic __inline void __DEFAULT_FN_ATTRS 4679288943Sdim_mm512_mask_store_pd(void *__P, __mmask8 __U, __m512d __A) 4680277325Sdim{ 4681288943Sdim __builtin_ia32_storeapd512_mask ((__v8df *)__P, (__v8df) __A, (__mmask8) __U); 4682277325Sdim} 4683277325Sdim 4684288943Sdimstatic __inline void __DEFAULT_FN_ATTRS 4685277325Sdim_mm512_store_pd(void *__P, __m512d __A) 4686277325Sdim{ 4687277325Sdim *(__m512d*)__P = __A; 4688277325Sdim} 4689277325Sdim 4690288943Sdimstatic __inline void __DEFAULT_FN_ATTRS 4691288943Sdim_mm512_mask_store_ps(void *__P, __mmask16 __U, __m512 __A) 4692288943Sdim{ 4693288943Sdim __builtin_ia32_storeaps512_mask ((__v16sf *)__P, (__v16sf) __A, 4694288943Sdim (__mmask16) __U); 4695288943Sdim} 4696288943Sdim 4697288943Sdimstatic __inline void __DEFAULT_FN_ATTRS 4698288943Sdim_mm512_store_ps(void *__P, __m512 __A) 4699288943Sdim{ 4700288943Sdim *(__m512*)__P = __A; 4701288943Sdim} 4702288943Sdim 4703309124Sdimstatic __inline void __DEFAULT_FN_ATTRS 4704309124Sdim_mm512_store_si512 (void *__P, __m512i __A) 4705309124Sdim{ 4706309124Sdim *(__m512i *) __P = __A; 4707309124Sdim} 4708309124Sdim 4709309124Sdimstatic __inline void __DEFAULT_FN_ATTRS 4710309124Sdim_mm512_store_epi32 (void *__P, __m512i __A) 4711309124Sdim{ 4712309124Sdim *(__m512i *) __P = __A; 4713309124Sdim} 4714309124Sdim 4715309124Sdimstatic __inline void __DEFAULT_FN_ATTRS 4716309124Sdim_mm512_store_epi64 (void *__P, __m512i __A) 4717309124Sdim{ 4718309124Sdim *(__m512i *) __P = __A; 4719309124Sdim} 4720309124Sdim 4721277325Sdim/* Mask ops */ 4722277325Sdim 4723288943Sdimstatic __inline __mmask16 __DEFAULT_FN_ATTRS 4724277325Sdim_mm512_knot(__mmask16 __M) 4725277325Sdim{ 4726277325Sdim return __builtin_ia32_knothi(__M); 4727277325Sdim} 4728277325Sdim 4729277325Sdim/* Integer compare */ 4730277325Sdim 4731288943Sdimstatic __inline__ __mmask16 __DEFAULT_FN_ATTRS 4732277325Sdim_mm512_cmpeq_epi32_mask(__m512i __a, __m512i __b) { 4733277325Sdim return (__mmask16)__builtin_ia32_pcmpeqd512_mask((__v16si)__a, (__v16si)__b, 4734277325Sdim (__mmask16)-1); 4735277325Sdim} 4736277325Sdim 4737288943Sdimstatic __inline__ __mmask16 __DEFAULT_FN_ATTRS 4738277325Sdim_mm512_mask_cmpeq_epi32_mask(__mmask16 __u, __m512i __a, __m512i __b) { 4739277325Sdim return (__mmask16)__builtin_ia32_pcmpeqd512_mask((__v16si)__a, (__v16si)__b, 4740277325Sdim __u); 4741277325Sdim} 4742277325Sdim 4743288943Sdimstatic __inline__ __mmask16 __DEFAULT_FN_ATTRS 4744288943Sdim_mm512_cmpeq_epu32_mask(__m512i __a, __m512i __b) { 4745288943Sdim return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 0, 4746288943Sdim (__mmask16)-1); 4747288943Sdim} 4748288943Sdim 4749288943Sdimstatic __inline__ __mmask16 __DEFAULT_FN_ATTRS 4750288943Sdim_mm512_mask_cmpeq_epu32_mask(__mmask16 __u, __m512i __a, __m512i __b) { 4751288943Sdim return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 0, 4752288943Sdim __u); 4753288943Sdim} 4754288943Sdim 4755288943Sdimstatic __inline__ __mmask8 __DEFAULT_FN_ATTRS 4756277325Sdim_mm512_mask_cmpeq_epi64_mask(__mmask8 __u, __m512i __a, __m512i __b) { 4757277325Sdim return (__mmask8)__builtin_ia32_pcmpeqq512_mask((__v8di)__a, (__v8di)__b, 4758277325Sdim __u); 4759277325Sdim} 4760277325Sdim 4761288943Sdimstatic __inline__ __mmask8 __DEFAULT_FN_ATTRS 4762277325Sdim_mm512_cmpeq_epi64_mask(__m512i __a, __m512i __b) { 4763277325Sdim return (__mmask8)__builtin_ia32_pcmpeqq512_mask((__v8di)__a, (__v8di)__b, 4764277325Sdim (__mmask8)-1); 4765277325Sdim} 4766277325Sdim 4767288943Sdimstatic __inline__ __mmask8 __DEFAULT_FN_ATTRS 4768288943Sdim_mm512_cmpeq_epu64_mask(__m512i __a, __m512i __b) { 4769288943Sdim return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 0, 4770288943Sdim (__mmask8)-1); 4771288943Sdim} 4772288943Sdim 4773288943Sdimstatic __inline__ __mmask8 __DEFAULT_FN_ATTRS 4774288943Sdim_mm512_mask_cmpeq_epu64_mask(__mmask8 __u, __m512i __a, __m512i __b) { 4775288943Sdim return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 0, 4776288943Sdim __u); 4777288943Sdim} 4778288943Sdim 4779288943Sdimstatic __inline__ __mmask16 __DEFAULT_FN_ATTRS 4780288943Sdim_mm512_cmpge_epi32_mask(__m512i __a, __m512i __b) { 4781288943Sdim return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 5, 4782288943Sdim (__mmask16)-1); 4783288943Sdim} 4784288943Sdim 4785288943Sdimstatic __inline__ __mmask16 __DEFAULT_FN_ATTRS 4786288943Sdim_mm512_mask_cmpge_epi32_mask(__mmask16 __u, __m512i __a, __m512i __b) { 4787288943Sdim return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 5, 4788288943Sdim __u); 4789288943Sdim} 4790288943Sdim 4791288943Sdimstatic __inline__ __mmask16 __DEFAULT_FN_ATTRS 4792288943Sdim_mm512_cmpge_epu32_mask(__m512i __a, __m512i __b) { 4793288943Sdim return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 5, 4794288943Sdim (__mmask16)-1); 4795288943Sdim} 4796288943Sdim 4797288943Sdimstatic __inline__ __mmask16 __DEFAULT_FN_ATTRS 4798288943Sdim_mm512_mask_cmpge_epu32_mask(__mmask16 __u, __m512i __a, __m512i __b) { 4799288943Sdim return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 5, 4800288943Sdim __u); 4801288943Sdim} 4802288943Sdim 4803288943Sdimstatic __inline__ __mmask8 __DEFAULT_FN_ATTRS 4804288943Sdim_mm512_cmpge_epi64_mask(__m512i __a, __m512i __b) { 4805288943Sdim return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 5, 4806288943Sdim (__mmask8)-1); 4807288943Sdim} 4808288943Sdim 4809288943Sdimstatic __inline__ __mmask8 __DEFAULT_FN_ATTRS 4810288943Sdim_mm512_mask_cmpge_epi64_mask(__mmask8 __u, __m512i __a, __m512i __b) { 4811288943Sdim return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 5, 4812288943Sdim __u); 4813288943Sdim} 4814288943Sdim 4815288943Sdimstatic __inline__ __mmask8 __DEFAULT_FN_ATTRS 4816288943Sdim_mm512_cmpge_epu64_mask(__m512i __a, __m512i __b) { 4817288943Sdim return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 5, 4818288943Sdim (__mmask8)-1); 4819288943Sdim} 4820288943Sdim 4821288943Sdimstatic __inline__ __mmask8 __DEFAULT_FN_ATTRS 4822288943Sdim_mm512_mask_cmpge_epu64_mask(__mmask8 __u, __m512i __a, __m512i __b) { 4823288943Sdim return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 5, 4824288943Sdim __u); 4825288943Sdim} 4826288943Sdim 4827288943Sdimstatic __inline__ __mmask16 __DEFAULT_FN_ATTRS 4828288943Sdim_mm512_cmpgt_epi32_mask(__m512i __a, __m512i __b) { 4829288943Sdim return (__mmask16)__builtin_ia32_pcmpgtd512_mask((__v16si)__a, (__v16si)__b, 4830288943Sdim (__mmask16)-1); 4831288943Sdim} 4832288943Sdim 4833288943Sdimstatic __inline__ __mmask16 __DEFAULT_FN_ATTRS 4834288943Sdim_mm512_mask_cmpgt_epi32_mask(__mmask16 __u, __m512i __a, __m512i __b) { 4835288943Sdim return (__mmask16)__builtin_ia32_pcmpgtd512_mask((__v16si)__a, (__v16si)__b, 4836288943Sdim __u); 4837288943Sdim} 4838288943Sdim 4839288943Sdimstatic __inline__ __mmask16 __DEFAULT_FN_ATTRS 4840288943Sdim_mm512_cmpgt_epu32_mask(__m512i __a, __m512i __b) { 4841288943Sdim return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 6, 4842288943Sdim (__mmask16)-1); 4843288943Sdim} 4844288943Sdim 4845288943Sdimstatic __inline__ __mmask16 __DEFAULT_FN_ATTRS 4846288943Sdim_mm512_mask_cmpgt_epu32_mask(__mmask16 __u, __m512i __a, __m512i __b) { 4847288943Sdim return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 6, 4848288943Sdim __u); 4849288943Sdim} 4850288943Sdim 4851288943Sdimstatic __inline__ __mmask8 __DEFAULT_FN_ATTRS 4852288943Sdim_mm512_mask_cmpgt_epi64_mask(__mmask8 __u, __m512i __a, __m512i __b) { 4853288943Sdim return (__mmask8)__builtin_ia32_pcmpgtq512_mask((__v8di)__a, (__v8di)__b, 4854288943Sdim __u); 4855288943Sdim} 4856288943Sdim 4857288943Sdimstatic __inline__ __mmask8 __DEFAULT_FN_ATTRS 4858288943Sdim_mm512_cmpgt_epi64_mask(__m512i __a, __m512i __b) { 4859288943Sdim return (__mmask8)__builtin_ia32_pcmpgtq512_mask((__v8di)__a, (__v8di)__b, 4860288943Sdim (__mmask8)-1); 4861288943Sdim} 4862288943Sdim 4863288943Sdimstatic __inline__ __mmask8 __DEFAULT_FN_ATTRS 4864288943Sdim_mm512_cmpgt_epu64_mask(__m512i __a, __m512i __b) { 4865288943Sdim return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 6, 4866288943Sdim (__mmask8)-1); 4867288943Sdim} 4868288943Sdim 4869288943Sdimstatic __inline__ __mmask8 __DEFAULT_FN_ATTRS 4870288943Sdim_mm512_mask_cmpgt_epu64_mask(__mmask8 __u, __m512i __a, __m512i __b) { 4871288943Sdim return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 6, 4872288943Sdim __u); 4873288943Sdim} 4874288943Sdim 4875288943Sdimstatic __inline__ __mmask16 __DEFAULT_FN_ATTRS 4876288943Sdim_mm512_cmple_epi32_mask(__m512i __a, __m512i __b) { 4877288943Sdim return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 2, 4878288943Sdim (__mmask16)-1); 4879288943Sdim} 4880288943Sdim 4881288943Sdimstatic __inline__ __mmask16 __DEFAULT_FN_ATTRS 4882288943Sdim_mm512_mask_cmple_epi32_mask(__mmask16 __u, __m512i __a, __m512i __b) { 4883288943Sdim return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 2, 4884288943Sdim __u); 4885288943Sdim} 4886288943Sdim 4887288943Sdimstatic __inline__ __mmask16 __DEFAULT_FN_ATTRS 4888288943Sdim_mm512_cmple_epu32_mask(__m512i __a, __m512i __b) { 4889288943Sdim return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 2, 4890288943Sdim (__mmask16)-1); 4891288943Sdim} 4892288943Sdim 4893288943Sdimstatic __inline__ __mmask16 __DEFAULT_FN_ATTRS 4894288943Sdim_mm512_mask_cmple_epu32_mask(__mmask16 __u, __m512i __a, __m512i __b) { 4895288943Sdim return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 2, 4896288943Sdim __u); 4897288943Sdim} 4898288943Sdim 4899288943Sdimstatic __inline__ __mmask8 __DEFAULT_FN_ATTRS 4900288943Sdim_mm512_cmple_epi64_mask(__m512i __a, __m512i __b) { 4901288943Sdim return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 2, 4902288943Sdim (__mmask8)-1); 4903288943Sdim} 4904288943Sdim 4905288943Sdimstatic __inline__ __mmask8 __DEFAULT_FN_ATTRS 4906288943Sdim_mm512_mask_cmple_epi64_mask(__mmask8 __u, __m512i __a, __m512i __b) { 4907288943Sdim return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 2, 4908288943Sdim __u); 4909288943Sdim} 4910288943Sdim 4911288943Sdimstatic __inline__ __mmask8 __DEFAULT_FN_ATTRS 4912288943Sdim_mm512_cmple_epu64_mask(__m512i __a, __m512i __b) { 4913288943Sdim return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 2, 4914288943Sdim (__mmask8)-1); 4915288943Sdim} 4916288943Sdim 4917288943Sdimstatic __inline__ __mmask8 __DEFAULT_FN_ATTRS 4918288943Sdim_mm512_mask_cmple_epu64_mask(__mmask8 __u, __m512i __a, __m512i __b) { 4919288943Sdim return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 2, 4920288943Sdim __u); 4921288943Sdim} 4922288943Sdim 4923288943Sdimstatic __inline__ __mmask16 __DEFAULT_FN_ATTRS 4924288943Sdim_mm512_cmplt_epi32_mask(__m512i __a, __m512i __b) { 4925288943Sdim return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 1, 4926288943Sdim (__mmask16)-1); 4927288943Sdim} 4928288943Sdim 4929288943Sdimstatic __inline__ __mmask16 __DEFAULT_FN_ATTRS 4930288943Sdim_mm512_mask_cmplt_epi32_mask(__mmask16 __u, __m512i __a, __m512i __b) { 4931288943Sdim return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 1, 4932288943Sdim __u); 4933288943Sdim} 4934288943Sdim 4935288943Sdimstatic __inline__ __mmask16 __DEFAULT_FN_ATTRS 4936288943Sdim_mm512_cmplt_epu32_mask(__m512i __a, __m512i __b) { 4937288943Sdim return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 1, 4938288943Sdim (__mmask16)-1); 4939288943Sdim} 4940288943Sdim 4941288943Sdimstatic __inline__ __mmask16 __DEFAULT_FN_ATTRS 4942288943Sdim_mm512_mask_cmplt_epu32_mask(__mmask16 __u, __m512i __a, __m512i __b) { 4943288943Sdim return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 1, 4944288943Sdim __u); 4945288943Sdim} 4946288943Sdim 4947288943Sdimstatic __inline__ __mmask8 __DEFAULT_FN_ATTRS 4948288943Sdim_mm512_cmplt_epi64_mask(__m512i __a, __m512i __b) { 4949288943Sdim return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 1, 4950288943Sdim (__mmask8)-1); 4951288943Sdim} 4952288943Sdim 4953288943Sdimstatic __inline__ __mmask8 __DEFAULT_FN_ATTRS 4954288943Sdim_mm512_mask_cmplt_epi64_mask(__mmask8 __u, __m512i __a, __m512i __b) { 4955288943Sdim return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 1, 4956288943Sdim __u); 4957288943Sdim} 4958288943Sdim 4959288943Sdimstatic __inline__ __mmask8 __DEFAULT_FN_ATTRS 4960288943Sdim_mm512_cmplt_epu64_mask(__m512i __a, __m512i __b) { 4961288943Sdim return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 1, 4962288943Sdim (__mmask8)-1); 4963288943Sdim} 4964288943Sdim 4965288943Sdimstatic __inline__ __mmask8 __DEFAULT_FN_ATTRS 4966288943Sdim_mm512_mask_cmplt_epu64_mask(__mmask8 __u, __m512i __a, __m512i __b) { 4967288943Sdim return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 1, 4968288943Sdim __u); 4969288943Sdim} 4970288943Sdim 4971288943Sdimstatic __inline__ __mmask16 __DEFAULT_FN_ATTRS 4972288943Sdim_mm512_cmpneq_epi32_mask(__m512i __a, __m512i __b) { 4973288943Sdim return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 4, 4974288943Sdim (__mmask16)-1); 4975288943Sdim} 4976288943Sdim 4977288943Sdimstatic __inline__ __mmask16 __DEFAULT_FN_ATTRS 4978288943Sdim_mm512_mask_cmpneq_epi32_mask(__mmask16 __u, __m512i __a, __m512i __b) { 4979288943Sdim return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 4, 4980288943Sdim __u); 4981288943Sdim} 4982288943Sdim 4983288943Sdimstatic __inline__ __mmask16 __DEFAULT_FN_ATTRS 4984288943Sdim_mm512_cmpneq_epu32_mask(__m512i __a, __m512i __b) { 4985288943Sdim return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 4, 4986288943Sdim (__mmask16)-1); 4987288943Sdim} 4988288943Sdim 4989288943Sdimstatic __inline__ __mmask16 __DEFAULT_FN_ATTRS 4990288943Sdim_mm512_mask_cmpneq_epu32_mask(__mmask16 __u, __m512i __a, __m512i __b) { 4991288943Sdim return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 4, 4992288943Sdim __u); 4993288943Sdim} 4994288943Sdim 4995288943Sdimstatic __inline__ __mmask8 __DEFAULT_FN_ATTRS 4996288943Sdim_mm512_cmpneq_epi64_mask(__m512i __a, __m512i __b) { 4997288943Sdim return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 4, 4998288943Sdim (__mmask8)-1); 4999288943Sdim} 5000288943Sdim 5001288943Sdimstatic __inline__ __mmask8 __DEFAULT_FN_ATTRS 5002288943Sdim_mm512_mask_cmpneq_epi64_mask(__mmask8 __u, __m512i __a, __m512i __b) { 5003288943Sdim return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 4, 5004288943Sdim __u); 5005288943Sdim} 5006288943Sdim 5007288943Sdimstatic __inline__ __mmask8 __DEFAULT_FN_ATTRS 5008288943Sdim_mm512_cmpneq_epu64_mask(__m512i __a, __m512i __b) { 5009288943Sdim return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 4, 5010288943Sdim (__mmask8)-1); 5011288943Sdim} 5012288943Sdim 5013288943Sdimstatic __inline__ __mmask8 __DEFAULT_FN_ATTRS 5014288943Sdim_mm512_mask_cmpneq_epu64_mask(__mmask8 __u, __m512i __a, __m512i __b) { 5015288943Sdim return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 4, 5016288943Sdim __u); 5017288943Sdim} 5018288943Sdim 5019309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 5020314564Sdim_mm512_cvtepi8_epi32(__m128i __A) 5021309124Sdim{ 5022314564Sdim /* This function always performs a signed extension, but __v16qi is a char 5023314564Sdim which may be signed or unsigned, so use __v16qs. */ 5024314564Sdim return (__m512i)__builtin_convertvector((__v16qs)__A, __v16si); 5025309124Sdim} 5026309124Sdim 5027309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 5028314564Sdim_mm512_mask_cvtepi8_epi32(__m512i __W, __mmask16 __U, __m128i __A) 5029309124Sdim{ 5030314564Sdim return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, 5031314564Sdim (__v16si)_mm512_cvtepi8_epi32(__A), 5032314564Sdim (__v16si)__W); 5033309124Sdim} 5034309124Sdim 5035309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 5036314564Sdim_mm512_maskz_cvtepi8_epi32(__mmask16 __U, __m128i __A) 5037309124Sdim{ 5038314564Sdim return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, 5039314564Sdim (__v16si)_mm512_cvtepi8_epi32(__A), 5040314564Sdim (__v16si)_mm512_setzero_si512()); 5041309124Sdim} 5042309124Sdim 5043309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 5044314564Sdim_mm512_cvtepi8_epi64(__m128i __A) 5045309124Sdim{ 5046314564Sdim /* This function always performs a signed extension, but __v16qi is a char 5047314564Sdim which may be signed or unsigned, so use __v16qs. */ 5048314564Sdim return (__m512i)__builtin_convertvector(__builtin_shufflevector((__v16qs)__A, (__v16qs)__A, 0, 1, 2, 3, 4, 5, 6, 7), __v8di); 5049309124Sdim} 5050309124Sdim 5051309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 5052314564Sdim_mm512_mask_cvtepi8_epi64(__m512i __W, __mmask8 __U, __m128i __A) 5053309124Sdim{ 5054314564Sdim return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, 5055314564Sdim (__v8di)_mm512_cvtepi8_epi64(__A), 5056314564Sdim (__v8di)__W); 5057309124Sdim} 5058309124Sdim 5059309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 5060314564Sdim_mm512_maskz_cvtepi8_epi64(__mmask8 __U, __m128i __A) 5061309124Sdim{ 5062314564Sdim return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, 5063314564Sdim (__v8di)_mm512_cvtepi8_epi64(__A), 5064314564Sdim (__v8di)_mm512_setzero_si512 ()); 5065309124Sdim} 5066309124Sdim 5067309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 5068314564Sdim_mm512_cvtepi32_epi64(__m256i __X) 5069309124Sdim{ 5070314564Sdim return (__m512i)__builtin_convertvector((__v8si)__X, __v8di); 5071309124Sdim} 5072309124Sdim 5073309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 5074314564Sdim_mm512_mask_cvtepi32_epi64(__m512i __W, __mmask8 __U, __m256i __X) 5075309124Sdim{ 5076314564Sdim return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, 5077314564Sdim (__v8di)_mm512_cvtepi32_epi64(__X), 5078314564Sdim (__v8di)__W); 5079309124Sdim} 5080309124Sdim 5081309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 5082314564Sdim_mm512_maskz_cvtepi32_epi64(__mmask8 __U, __m256i __X) 5083309124Sdim{ 5084314564Sdim return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, 5085314564Sdim (__v8di)_mm512_cvtepi32_epi64(__X), 5086314564Sdim (__v8di)_mm512_setzero_si512()); 5087309124Sdim} 5088309124Sdim 5089309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 5090314564Sdim_mm512_cvtepi16_epi32(__m256i __A) 5091309124Sdim{ 5092314564Sdim return (__m512i)__builtin_convertvector((__v16hi)__A, __v16si); 5093309124Sdim} 5094309124Sdim 5095309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 5096314564Sdim_mm512_mask_cvtepi16_epi32(__m512i __W, __mmask16 __U, __m256i __A) 5097309124Sdim{ 5098314564Sdim return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, 5099314564Sdim (__v16si)_mm512_cvtepi16_epi32(__A), 5100314564Sdim (__v16si)__W); 5101309124Sdim} 5102309124Sdim 5103309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 5104314564Sdim_mm512_maskz_cvtepi16_epi32(__mmask16 __U, __m256i __A) 5105309124Sdim{ 5106314564Sdim return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, 5107314564Sdim (__v16si)_mm512_cvtepi16_epi32(__A), 5108314564Sdim (__v16si)_mm512_setzero_si512 ()); 5109309124Sdim} 5110309124Sdim 5111309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 5112314564Sdim_mm512_cvtepi16_epi64(__m128i __A) 5113309124Sdim{ 5114314564Sdim return (__m512i)__builtin_convertvector((__v8hi)__A, __v8di); 5115309124Sdim} 5116309124Sdim 5117309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 5118314564Sdim_mm512_mask_cvtepi16_epi64(__m512i __W, __mmask8 __U, __m128i __A) 5119309124Sdim{ 5120314564Sdim return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, 5121314564Sdim (__v8di)_mm512_cvtepi16_epi64(__A), 5122314564Sdim (__v8di)__W); 5123309124Sdim} 5124309124Sdim 5125309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 5126314564Sdim_mm512_maskz_cvtepi16_epi64(__mmask8 __U, __m128i __A) 5127309124Sdim{ 5128314564Sdim return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, 5129314564Sdim (__v8di)_mm512_cvtepi16_epi64(__A), 5130314564Sdim (__v8di)_mm512_setzero_si512()); 5131309124Sdim} 5132309124Sdim 5133309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 5134314564Sdim_mm512_cvtepu8_epi32(__m128i __A) 5135309124Sdim{ 5136314564Sdim return (__m512i)__builtin_convertvector((__v16qu)__A, __v16si); 5137309124Sdim} 5138309124Sdim 5139309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 5140314564Sdim_mm512_mask_cvtepu8_epi32(__m512i __W, __mmask16 __U, __m128i __A) 5141309124Sdim{ 5142314564Sdim return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, 5143314564Sdim (__v16si)_mm512_cvtepu8_epi32(__A), 5144314564Sdim (__v16si)__W); 5145309124Sdim} 5146309124Sdim 5147309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 5148314564Sdim_mm512_maskz_cvtepu8_epi32(__mmask16 __U, __m128i __A) 5149309124Sdim{ 5150314564Sdim return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, 5151314564Sdim (__v16si)_mm512_cvtepu8_epi32(__A), 5152314564Sdim (__v16si)_mm512_setzero_si512()); 5153309124Sdim} 5154309124Sdim 5155309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 5156314564Sdim_mm512_cvtepu8_epi64(__m128i __A) 5157309124Sdim{ 5158314564Sdim return (__m512i)__builtin_convertvector(__builtin_shufflevector((__v16qu)__A, (__v16qu)__A, 0, 1, 2, 3, 4, 5, 6, 7), __v8di); 5159309124Sdim} 5160309124Sdim 5161309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 5162314564Sdim_mm512_mask_cvtepu8_epi64(__m512i __W, __mmask8 __U, __m128i __A) 5163309124Sdim{ 5164314564Sdim return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, 5165314564Sdim (__v8di)_mm512_cvtepu8_epi64(__A), 5166314564Sdim (__v8di)__W); 5167309124Sdim} 5168309124Sdim 5169309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 5170314564Sdim_mm512_maskz_cvtepu8_epi64(__mmask8 __U, __m128i __A) 5171309124Sdim{ 5172314564Sdim return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, 5173314564Sdim (__v8di)_mm512_cvtepu8_epi64(__A), 5174314564Sdim (__v8di)_mm512_setzero_si512()); 5175309124Sdim} 5176309124Sdim 5177309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 5178314564Sdim_mm512_cvtepu32_epi64(__m256i __X) 5179309124Sdim{ 5180314564Sdim return (__m512i)__builtin_convertvector((__v8su)__X, __v8di); 5181309124Sdim} 5182309124Sdim 5183309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 5184314564Sdim_mm512_mask_cvtepu32_epi64(__m512i __W, __mmask8 __U, __m256i __X) 5185309124Sdim{ 5186314564Sdim return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, 5187314564Sdim (__v8di)_mm512_cvtepu32_epi64(__X), 5188314564Sdim (__v8di)__W); 5189309124Sdim} 5190309124Sdim 5191309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 5192314564Sdim_mm512_maskz_cvtepu32_epi64(__mmask8 __U, __m256i __X) 5193309124Sdim{ 5194314564Sdim return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, 5195314564Sdim (__v8di)_mm512_cvtepu32_epi64(__X), 5196314564Sdim (__v8di)_mm512_setzero_si512()); 5197309124Sdim} 5198309124Sdim 5199309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 5200314564Sdim_mm512_cvtepu16_epi32(__m256i __A) 5201309124Sdim{ 5202314564Sdim return (__m512i)__builtin_convertvector((__v16hu)__A, __v16si); 5203309124Sdim} 5204309124Sdim 5205309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 5206314564Sdim_mm512_mask_cvtepu16_epi32(__m512i __W, __mmask16 __U, __m256i __A) 5207309124Sdim{ 5208314564Sdim return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, 5209314564Sdim (__v16si)_mm512_cvtepu16_epi32(__A), 5210314564Sdim (__v16si)__W); 5211309124Sdim} 5212309124Sdim 5213309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 5214314564Sdim_mm512_maskz_cvtepu16_epi32(__mmask16 __U, __m256i __A) 5215309124Sdim{ 5216314564Sdim return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, 5217314564Sdim (__v16si)_mm512_cvtepu16_epi32(__A), 5218314564Sdim (__v16si)_mm512_setzero_si512()); 5219309124Sdim} 5220309124Sdim 5221309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 5222314564Sdim_mm512_cvtepu16_epi64(__m128i __A) 5223309124Sdim{ 5224314564Sdim return (__m512i)__builtin_convertvector((__v8hu)__A, __v8di); 5225309124Sdim} 5226309124Sdim 5227309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 5228314564Sdim_mm512_mask_cvtepu16_epi64(__m512i __W, __mmask8 __U, __m128i __A) 5229309124Sdim{ 5230314564Sdim return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, 5231314564Sdim (__v8di)_mm512_cvtepu16_epi64(__A), 5232314564Sdim (__v8di)__W); 5233309124Sdim} 5234309124Sdim 5235309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 5236314564Sdim_mm512_maskz_cvtepu16_epi64(__mmask8 __U, __m128i __A) 5237309124Sdim{ 5238314564Sdim return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, 5239314564Sdim (__v8di)_mm512_cvtepu16_epi64(__A), 5240314564Sdim (__v8di)_mm512_setzero_si512()); 5241309124Sdim} 5242309124Sdim 5243309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 5244309124Sdim_mm512_rorv_epi32 (__m512i __A, __m512i __B) 5245309124Sdim{ 5246309124Sdim return (__m512i) __builtin_ia32_prorvd512_mask ((__v16si) __A, 5247309124Sdim (__v16si) __B, 5248309124Sdim (__v16si) 5249309124Sdim _mm512_setzero_si512 (), 5250309124Sdim (__mmask16) -1); 5251309124Sdim} 5252309124Sdim 5253309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 5254309124Sdim_mm512_mask_rorv_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B) 5255309124Sdim{ 5256309124Sdim return (__m512i) __builtin_ia32_prorvd512_mask ((__v16si) __A, 5257309124Sdim (__v16si) __B, 5258309124Sdim (__v16si) __W, 5259309124Sdim (__mmask16) __U); 5260309124Sdim} 5261309124Sdim 5262309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 5263309124Sdim_mm512_maskz_rorv_epi32 (__mmask16 __U, __m512i __A, __m512i __B) 5264309124Sdim{ 5265309124Sdim return (__m512i) __builtin_ia32_prorvd512_mask ((__v16si) __A, 5266309124Sdim (__v16si) __B, 5267309124Sdim (__v16si) 5268309124Sdim _mm512_setzero_si512 (), 5269309124Sdim (__mmask16) __U); 5270309124Sdim} 5271309124Sdim 5272309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 5273309124Sdim_mm512_rorv_epi64 (__m512i __A, __m512i __B) 5274309124Sdim{ 5275309124Sdim return (__m512i) __builtin_ia32_prorvq512_mask ((__v8di) __A, 5276309124Sdim (__v8di) __B, 5277309124Sdim (__v8di) 5278309124Sdim _mm512_setzero_si512 (), 5279309124Sdim (__mmask8) -1); 5280309124Sdim} 5281309124Sdim 5282309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 5283309124Sdim_mm512_mask_rorv_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) 5284309124Sdim{ 5285309124Sdim return (__m512i) __builtin_ia32_prorvq512_mask ((__v8di) __A, 5286309124Sdim (__v8di) __B, 5287309124Sdim (__v8di) __W, 5288309124Sdim (__mmask8) __U); 5289309124Sdim} 5290309124Sdim 5291309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 5292309124Sdim_mm512_maskz_rorv_epi64 (__mmask8 __U, __m512i __A, __m512i __B) 5293309124Sdim{ 5294309124Sdim return (__m512i) __builtin_ia32_prorvq512_mask ((__v8di) __A, 5295309124Sdim (__v8di) __B, 5296309124Sdim (__v8di) 5297309124Sdim _mm512_setzero_si512 (), 5298309124Sdim (__mmask8) __U); 5299309124Sdim} 5300309124Sdim 5301309124Sdim 5302309124Sdim 5303288943Sdim#define _mm512_cmp_epi32_mask(a, b, p) __extension__ ({ \ 5304296417Sdim (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)(__m512i)(a), \ 5305309124Sdim (__v16si)(__m512i)(b), (int)(p), \ 5306288943Sdim (__mmask16)-1); }) 5307288943Sdim 5308288943Sdim#define _mm512_cmp_epu32_mask(a, b, p) __extension__ ({ \ 5309296417Sdim (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)(__m512i)(a), \ 5310309124Sdim (__v16si)(__m512i)(b), (int)(p), \ 5311288943Sdim (__mmask16)-1); }) 5312288943Sdim 5313288943Sdim#define _mm512_cmp_epi64_mask(a, b, p) __extension__ ({ \ 5314296417Sdim (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)(__m512i)(a), \ 5315309124Sdim (__v8di)(__m512i)(b), (int)(p), \ 5316288943Sdim (__mmask8)-1); }) 5317288943Sdim 5318288943Sdim#define _mm512_cmp_epu64_mask(a, b, p) __extension__ ({ \ 5319296417Sdim (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)(__m512i)(a), \ 5320309124Sdim (__v8di)(__m512i)(b), (int)(p), \ 5321288943Sdim (__mmask8)-1); }) 5322288943Sdim 5323288943Sdim#define _mm512_mask_cmp_epi32_mask(m, a, b, p) __extension__ ({ \ 5324296417Sdim (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)(__m512i)(a), \ 5325309124Sdim (__v16si)(__m512i)(b), (int)(p), \ 5326288943Sdim (__mmask16)(m)); }) 5327288943Sdim 5328288943Sdim#define _mm512_mask_cmp_epu32_mask(m, a, b, p) __extension__ ({ \ 5329296417Sdim (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)(__m512i)(a), \ 5330309124Sdim (__v16si)(__m512i)(b), (int)(p), \ 5331288943Sdim (__mmask16)(m)); }) 5332288943Sdim 5333288943Sdim#define _mm512_mask_cmp_epi64_mask(m, a, b, p) __extension__ ({ \ 5334296417Sdim (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)(__m512i)(a), \ 5335309124Sdim (__v8di)(__m512i)(b), (int)(p), \ 5336288943Sdim (__mmask8)(m)); }) 5337288943Sdim 5338288943Sdim#define _mm512_mask_cmp_epu64_mask(m, a, b, p) __extension__ ({ \ 5339296417Sdim (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)(__m512i)(a), \ 5340309124Sdim (__v8di)(__m512i)(b), (int)(p), \ 5341288943Sdim (__mmask8)(m)); }) 5342288943Sdim 5343309124Sdim#define _mm512_rol_epi32(a, b) __extension__ ({ \ 5344309124Sdim (__m512i)__builtin_ia32_prold512_mask((__v16si)(__m512i)(a), (int)(b), \ 5345309124Sdim (__v16si)_mm512_setzero_si512(), \ 5346309124Sdim (__mmask16)-1); }) 5347309124Sdim 5348309124Sdim#define _mm512_mask_rol_epi32(W, U, a, b) __extension__ ({ \ 5349309124Sdim (__m512i)__builtin_ia32_prold512_mask((__v16si)(__m512i)(a), (int)(b), \ 5350309124Sdim (__v16si)(__m512i)(W), \ 5351309124Sdim (__mmask16)(U)); }) 5352309124Sdim 5353309124Sdim#define _mm512_maskz_rol_epi32(U, a, b) __extension__ ({ \ 5354309124Sdim (__m512i)__builtin_ia32_prold512_mask((__v16si)(__m512i)(a), (int)(b), \ 5355309124Sdim (__v16si)_mm512_setzero_si512(), \ 5356309124Sdim (__mmask16)(U)); }) 5357309124Sdim 5358309124Sdim#define _mm512_rol_epi64(a, b) __extension__ ({ \ 5359309124Sdim (__m512i)__builtin_ia32_prolq512_mask((__v8di)(__m512i)(a), (int)(b), \ 5360309124Sdim (__v8di)_mm512_setzero_si512(), \ 5361309124Sdim (__mmask8)-1); }) 5362309124Sdim 5363309124Sdim#define _mm512_mask_rol_epi64(W, U, a, b) __extension__ ({ \ 5364309124Sdim (__m512i)__builtin_ia32_prolq512_mask((__v8di)(__m512i)(a), (int)(b), \ 5365309124Sdim (__v8di)(__m512i)(W), (__mmask8)(U)); }) 5366309124Sdim 5367309124Sdim#define _mm512_maskz_rol_epi64(U, a, b) __extension__ ({ \ 5368309124Sdim (__m512i)__builtin_ia32_prolq512_mask((__v8di)(__m512i)(a), (int)(b), \ 5369309124Sdim (__v8di)_mm512_setzero_si512(), \ 5370309124Sdim (__mmask8)(U)); }) 5371309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 5372309124Sdim_mm512_rolv_epi32 (__m512i __A, __m512i __B) 5373309124Sdim{ 5374309124Sdim return (__m512i) __builtin_ia32_prolvd512_mask ((__v16si) __A, 5375309124Sdim (__v16si) __B, 5376309124Sdim (__v16si) 5377309124Sdim _mm512_setzero_si512 (), 5378309124Sdim (__mmask16) -1); 5379309124Sdim} 5380309124Sdim 5381309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 5382309124Sdim_mm512_mask_rolv_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B) 5383309124Sdim{ 5384309124Sdim return (__m512i) __builtin_ia32_prolvd512_mask ((__v16si) __A, 5385309124Sdim (__v16si) __B, 5386309124Sdim (__v16si) __W, 5387309124Sdim (__mmask16) __U); 5388309124Sdim} 5389309124Sdim 5390309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 5391309124Sdim_mm512_maskz_rolv_epi32 (__mmask16 __U, __m512i __A, __m512i __B) 5392309124Sdim{ 5393309124Sdim return (__m512i) __builtin_ia32_prolvd512_mask ((__v16si) __A, 5394309124Sdim (__v16si) __B, 5395309124Sdim (__v16si) 5396309124Sdim _mm512_setzero_si512 (), 5397309124Sdim (__mmask16) __U); 5398309124Sdim} 5399309124Sdim 5400309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 5401309124Sdim_mm512_rolv_epi64 (__m512i __A, __m512i __B) 5402309124Sdim{ 5403309124Sdim return (__m512i) __builtin_ia32_prolvq512_mask ((__v8di) __A, 5404309124Sdim (__v8di) __B, 5405309124Sdim (__v8di) 5406309124Sdim _mm512_setzero_si512 (), 5407309124Sdim (__mmask8) -1); 5408309124Sdim} 5409309124Sdim 5410309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 5411309124Sdim_mm512_mask_rolv_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) 5412309124Sdim{ 5413309124Sdim return (__m512i) __builtin_ia32_prolvq512_mask ((__v8di) __A, 5414309124Sdim (__v8di) __B, 5415309124Sdim (__v8di) __W, 5416309124Sdim (__mmask8) __U); 5417309124Sdim} 5418309124Sdim 5419309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 5420309124Sdim_mm512_maskz_rolv_epi64 (__mmask8 __U, __m512i __A, __m512i __B) 5421309124Sdim{ 5422309124Sdim return (__m512i) __builtin_ia32_prolvq512_mask ((__v8di) __A, 5423309124Sdim (__v8di) __B, 5424309124Sdim (__v8di) 5425309124Sdim _mm512_setzero_si512 (), 5426309124Sdim (__mmask8) __U); 5427309124Sdim} 5428309124Sdim 5429309124Sdim#define _mm512_ror_epi32(A, B) __extension__ ({ \ 5430309124Sdim (__m512i)__builtin_ia32_prord512_mask((__v16si)(__m512i)(A), (int)(B), \ 5431309124Sdim (__v16si)_mm512_setzero_si512(), \ 5432309124Sdim (__mmask16)-1); }) 5433309124Sdim 5434309124Sdim#define _mm512_mask_ror_epi32(W, U, A, B) __extension__ ({ \ 5435309124Sdim (__m512i)__builtin_ia32_prord512_mask((__v16si)(__m512i)(A), (int)(B), \ 5436309124Sdim (__v16si)(__m512i)(W), \ 5437309124Sdim (__mmask16)(U)); }) 5438309124Sdim 5439309124Sdim#define _mm512_maskz_ror_epi32(U, A, B) __extension__ ({ \ 5440309124Sdim (__m512i)__builtin_ia32_prord512_mask((__v16si)(__m512i)(A), (int)(B), \ 5441309124Sdim (__v16si)_mm512_setzero_si512(), \ 5442309124Sdim (__mmask16)(U)); }) 5443309124Sdim 5444309124Sdim#define _mm512_ror_epi64(A, B) __extension__ ({ \ 5445309124Sdim (__m512i)__builtin_ia32_prorq512_mask((__v8di)(__m512i)(A), (int)(B), \ 5446309124Sdim (__v8di)_mm512_setzero_si512(), \ 5447309124Sdim (__mmask8)-1); }) 5448309124Sdim 5449309124Sdim#define _mm512_mask_ror_epi64(W, U, A, B) __extension__ ({ \ 5450309124Sdim (__m512i)__builtin_ia32_prorq512_mask((__v8di)(__m512i)(A), (int)(B), \ 5451309124Sdim (__v8di)(__m512i)(W), (__mmask8)(U)); }) 5452309124Sdim 5453309124Sdim#define _mm512_maskz_ror_epi64(U, A, B) __extension__ ({ \ 5454309124Sdim (__m512i)__builtin_ia32_prorq512_mask((__v8di)(__m512i)(A), (int)(B), \ 5455309124Sdim (__v8di)_mm512_setzero_si512(), \ 5456309124Sdim (__mmask8)(U)); }) 5457309124Sdim 5458314564Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 5459314564Sdim_mm512_slli_epi32(__m512i __A, int __B) 5460314564Sdim{ 5461314564Sdim return (__m512i)__builtin_ia32_pslldi512((__v16si)__A, __B); 5462314564Sdim} 5463309124Sdim 5464314564Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 5465314564Sdim_mm512_mask_slli_epi32(__m512i __W, __mmask16 __U, __m512i __A, int __B) 5466314564Sdim{ 5467314564Sdim return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, 5468314564Sdim (__v16si)_mm512_slli_epi32(__A, __B), 5469314564Sdim (__v16si)__W); 5470314564Sdim} 5471309124Sdim 5472314564Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 5473314564Sdim_mm512_maskz_slli_epi32(__mmask16 __U, __m512i __A, int __B) { 5474314564Sdim return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, 5475314564Sdim (__v16si)_mm512_slli_epi32(__A, __B), 5476314564Sdim (__v16si)_mm512_setzero_si512()); 5477314564Sdim} 5478309124Sdim 5479314564Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 5480314564Sdim_mm512_slli_epi64(__m512i __A, int __B) 5481314564Sdim{ 5482314564Sdim return (__m512i)__builtin_ia32_psllqi512((__v8di)__A, __B); 5483314564Sdim} 5484309124Sdim 5485314564Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 5486314564Sdim_mm512_mask_slli_epi64(__m512i __W, __mmask8 __U, __m512i __A, int __B) 5487314564Sdim{ 5488314564Sdim return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, 5489314564Sdim (__v8di)_mm512_slli_epi64(__A, __B), 5490314564Sdim (__v8di)__W); 5491314564Sdim} 5492309124Sdim 5493314564Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 5494314564Sdim_mm512_maskz_slli_epi64(__mmask8 __U, __m512i __A, int __B) 5495314564Sdim{ 5496314564Sdim return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, 5497314564Sdim (__v8di)_mm512_slli_epi64(__A, __B), 5498314564Sdim (__v8di)_mm512_setzero_si512()); 5499314564Sdim} 5500309124Sdim 5501314564Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 5502314564Sdim_mm512_srli_epi32(__m512i __A, int __B) 5503314564Sdim{ 5504314564Sdim return (__m512i)__builtin_ia32_psrldi512((__v16si)__A, __B); 5505314564Sdim} 5506309124Sdim 5507314564Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 5508314564Sdim_mm512_mask_srli_epi32(__m512i __W, __mmask16 __U, __m512i __A, int __B) 5509314564Sdim{ 5510314564Sdim return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, 5511314564Sdim (__v16si)_mm512_srli_epi32(__A, __B), 5512314564Sdim (__v16si)__W); 5513314564Sdim} 5514309124Sdim 5515314564Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 5516314564Sdim_mm512_maskz_srli_epi32(__mmask16 __U, __m512i __A, int __B) { 5517314564Sdim return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, 5518314564Sdim (__v16si)_mm512_srli_epi32(__A, __B), 5519314564Sdim (__v16si)_mm512_setzero_si512()); 5520314564Sdim} 5521309124Sdim 5522314564Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 5523314564Sdim_mm512_srli_epi64(__m512i __A, int __B) 5524314564Sdim{ 5525314564Sdim return (__m512i)__builtin_ia32_psrlqi512((__v8di)__A, __B); 5526314564Sdim} 5527309124Sdim 5528314564Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 5529314564Sdim_mm512_mask_srli_epi64(__m512i __W, __mmask8 __U, __m512i __A, int __B) 5530314564Sdim{ 5531314564Sdim return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, 5532314564Sdim (__v8di)_mm512_srli_epi64(__A, __B), 5533314564Sdim (__v8di)__W); 5534314564Sdim} 5535309124Sdim 5536314564Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 5537314564Sdim_mm512_maskz_srli_epi64(__mmask8 __U, __m512i __A, int __B) 5538314564Sdim{ 5539314564Sdim return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, 5540314564Sdim (__v8di)_mm512_srli_epi64(__A, __B), 5541314564Sdim (__v8di)_mm512_setzero_si512()); 5542314564Sdim} 5543309124Sdim 5544309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 5545309124Sdim_mm512_mask_load_epi32 (__m512i __W, __mmask16 __U, void const *__P) 5546309124Sdim{ 5547309124Sdim return (__m512i) __builtin_ia32_movdqa32load512_mask ((const __v16si *) __P, 5548309124Sdim (__v16si) __W, 5549309124Sdim (__mmask16) __U); 5550309124Sdim} 5551309124Sdim 5552309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 5553309124Sdim_mm512_maskz_load_epi32 (__mmask16 __U, void const *__P) 5554309124Sdim{ 5555309124Sdim return (__m512i) __builtin_ia32_movdqa32load512_mask ((const __v16si *) __P, 5556309124Sdim (__v16si) 5557309124Sdim _mm512_setzero_si512 (), 5558309124Sdim (__mmask16) __U); 5559309124Sdim} 5560309124Sdim 5561309124Sdimstatic __inline__ void __DEFAULT_FN_ATTRS 5562309124Sdim_mm512_mask_store_epi32 (void *__P, __mmask16 __U, __m512i __A) 5563309124Sdim{ 5564309124Sdim __builtin_ia32_movdqa32store512_mask ((__v16si *) __P, (__v16si) __A, 5565309124Sdim (__mmask16) __U); 5566309124Sdim} 5567309124Sdim 5568309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 5569309124Sdim_mm512_mask_mov_epi32 (__m512i __W, __mmask16 __U, __m512i __A) 5570309124Sdim{ 5571309124Sdim return (__m512i) __builtin_ia32_selectd_512 ((__mmask16) __U, 5572309124Sdim (__v16si) __A, 5573309124Sdim (__v16si) __W); 5574309124Sdim} 5575309124Sdim 5576309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 5577309124Sdim_mm512_maskz_mov_epi32 (__mmask16 __U, __m512i __A) 5578309124Sdim{ 5579309124Sdim return (__m512i) __builtin_ia32_selectd_512 ((__mmask16) __U, 5580309124Sdim (__v16si) __A, 5581309124Sdim (__v16si) _mm512_setzero_si512 ()); 5582309124Sdim} 5583309124Sdim 5584309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 5585309124Sdim_mm512_mask_mov_epi64 (__m512i __W, __mmask8 __U, __m512i __A) 5586309124Sdim{ 5587309124Sdim return (__m512i) __builtin_ia32_selectq_512 ((__mmask8) __U, 5588309124Sdim (__v8di) __A, 5589309124Sdim (__v8di) __W); 5590309124Sdim} 5591309124Sdim 5592309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 5593309124Sdim_mm512_maskz_mov_epi64 (__mmask8 __U, __m512i __A) 5594309124Sdim{ 5595309124Sdim return (__m512i) __builtin_ia32_selectq_512 ((__mmask8) __U, 5596309124Sdim (__v8di) __A, 5597309124Sdim (__v8di) _mm512_setzero_si512 ()); 5598309124Sdim} 5599309124Sdim 5600309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 5601309124Sdim_mm512_mask_load_epi64 (__m512i __W, __mmask8 __U, void const *__P) 5602309124Sdim{ 5603309124Sdim return (__m512i) __builtin_ia32_movdqa64load512_mask ((const __v8di *) __P, 5604309124Sdim (__v8di) __W, 5605309124Sdim (__mmask8) __U); 5606309124Sdim} 5607309124Sdim 5608309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 5609309124Sdim_mm512_maskz_load_epi64 (__mmask8 __U, void const *__P) 5610309124Sdim{ 5611309124Sdim return (__m512i) __builtin_ia32_movdqa64load512_mask ((const __v8di *) __P, 5612309124Sdim (__v8di) 5613309124Sdim _mm512_setzero_si512 (), 5614309124Sdim (__mmask8) __U); 5615309124Sdim} 5616309124Sdim 5617309124Sdimstatic __inline__ void __DEFAULT_FN_ATTRS 5618309124Sdim_mm512_mask_store_epi64 (void *__P, __mmask8 __U, __m512i __A) 5619309124Sdim{ 5620309124Sdim __builtin_ia32_movdqa64store512_mask ((__v8di *) __P, (__v8di) __A, 5621309124Sdim (__mmask8) __U); 5622309124Sdim} 5623309124Sdim 5624309124Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS 5625309124Sdim_mm512_movedup_pd (__m512d __A) 5626309124Sdim{ 5627309124Sdim return (__m512d)__builtin_shufflevector((__v8df)__A, (__v8df)__A, 5628309124Sdim 0, 0, 2, 2, 4, 4, 6, 6); 5629309124Sdim} 5630309124Sdim 5631309124Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS 5632309124Sdim_mm512_mask_movedup_pd (__m512d __W, __mmask8 __U, __m512d __A) 5633309124Sdim{ 5634309124Sdim return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, 5635309124Sdim (__v8df)_mm512_movedup_pd(__A), 5636309124Sdim (__v8df)__W); 5637309124Sdim} 5638309124Sdim 5639309124Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS 5640309124Sdim_mm512_maskz_movedup_pd (__mmask8 __U, __m512d __A) 5641309124Sdim{ 5642309124Sdim return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, 5643309124Sdim (__v8df)_mm512_movedup_pd(__A), 5644309124Sdim (__v8df)_mm512_setzero_pd()); 5645309124Sdim} 5646309124Sdim 5647309124Sdim#define _mm512_fixupimm_round_pd(A, B, C, imm, R) __extension__ ({ \ 5648309124Sdim (__m512d)__builtin_ia32_fixupimmpd512_mask((__v8df)(__m512d)(A), \ 5649309124Sdim (__v8df)(__m512d)(B), \ 5650309124Sdim (__v8di)(__m512i)(C), (int)(imm), \ 5651309124Sdim (__mmask8)-1, (int)(R)); }) 5652309124Sdim 5653309124Sdim#define _mm512_mask_fixupimm_round_pd(A, U, B, C, imm, R) __extension__ ({ \ 5654309124Sdim (__m512d)__builtin_ia32_fixupimmpd512_mask((__v8df)(__m512d)(A), \ 5655309124Sdim (__v8df)(__m512d)(B), \ 5656309124Sdim (__v8di)(__m512i)(C), (int)(imm), \ 5657309124Sdim (__mmask8)(U), (int)(R)); }) 5658309124Sdim 5659309124Sdim#define _mm512_fixupimm_pd(A, B, C, imm) __extension__ ({ \ 5660309124Sdim (__m512d)__builtin_ia32_fixupimmpd512_mask((__v8df)(__m512d)(A), \ 5661309124Sdim (__v8df)(__m512d)(B), \ 5662309124Sdim (__v8di)(__m512i)(C), (int)(imm), \ 5663309124Sdim (__mmask8)-1, \ 5664309124Sdim _MM_FROUND_CUR_DIRECTION); }) 5665309124Sdim 5666309124Sdim#define _mm512_mask_fixupimm_pd(A, U, B, C, imm) __extension__ ({ \ 5667309124Sdim (__m512d)__builtin_ia32_fixupimmpd512_mask((__v8df)(__m512d)(A), \ 5668309124Sdim (__v8df)(__m512d)(B), \ 5669309124Sdim (__v8di)(__m512i)(C), (int)(imm), \ 5670309124Sdim (__mmask8)(U), \ 5671309124Sdim _MM_FROUND_CUR_DIRECTION); }) 5672309124Sdim 5673309124Sdim#define _mm512_maskz_fixupimm_round_pd(U, A, B, C, imm, R) __extension__ ({ \ 5674309124Sdim (__m512d)__builtin_ia32_fixupimmpd512_maskz((__v8df)(__m512d)(A), \ 5675309124Sdim (__v8df)(__m512d)(B), \ 5676309124Sdim (__v8di)(__m512i)(C), \ 5677309124Sdim (int)(imm), (__mmask8)(U), \ 5678309124Sdim (int)(R)); }) 5679309124Sdim 5680309124Sdim#define _mm512_maskz_fixupimm_pd(U, A, B, C, imm) __extension__ ({ \ 5681309124Sdim (__m512d)__builtin_ia32_fixupimmpd512_maskz((__v8df)(__m512d)(A), \ 5682309124Sdim (__v8df)(__m512d)(B), \ 5683309124Sdim (__v8di)(__m512i)(C), \ 5684309124Sdim (int)(imm), (__mmask8)(U), \ 5685309124Sdim _MM_FROUND_CUR_DIRECTION); }) 5686309124Sdim 5687309124Sdim#define _mm512_fixupimm_round_ps(A, B, C, imm, R) __extension__ ({ \ 5688309124Sdim (__m512)__builtin_ia32_fixupimmps512_mask((__v16sf)(__m512)(A), \ 5689309124Sdim (__v16sf)(__m512)(B), \ 5690309124Sdim (__v16si)(__m512i)(C), (int)(imm), \ 5691309124Sdim (__mmask16)-1, (int)(R)); }) 5692309124Sdim 5693309124Sdim#define _mm512_mask_fixupimm_round_ps(A, U, B, C, imm, R) __extension__ ({ \ 5694309124Sdim (__m512)__builtin_ia32_fixupimmps512_mask((__v16sf)(__m512)(A), \ 5695309124Sdim (__v16sf)(__m512)(B), \ 5696309124Sdim (__v16si)(__m512i)(C), (int)(imm), \ 5697309124Sdim (__mmask16)(U), (int)(R)); }) 5698309124Sdim 5699309124Sdim#define _mm512_fixupimm_ps(A, B, C, imm) __extension__ ({ \ 5700309124Sdim (__m512)__builtin_ia32_fixupimmps512_mask((__v16sf)(__m512)(A), \ 5701309124Sdim (__v16sf)(__m512)(B), \ 5702309124Sdim (__v16si)(__m512i)(C), (int)(imm), \ 5703309124Sdim (__mmask16)-1, \ 5704309124Sdim _MM_FROUND_CUR_DIRECTION); }) 5705309124Sdim 5706309124Sdim#define _mm512_mask_fixupimm_ps(A, U, B, C, imm) __extension__ ({ \ 5707309124Sdim (__m512)__builtin_ia32_fixupimmps512_mask((__v16sf)(__m512)(A), \ 5708309124Sdim (__v16sf)(__m512)(B), \ 5709309124Sdim (__v16si)(__m512i)(C), (int)(imm), \ 5710309124Sdim (__mmask16)(U), \ 5711309124Sdim _MM_FROUND_CUR_DIRECTION); }) 5712309124Sdim 5713309124Sdim#define _mm512_maskz_fixupimm_round_ps(U, A, B, C, imm, R) __extension__ ({ \ 5714309124Sdim (__m512)__builtin_ia32_fixupimmps512_maskz((__v16sf)(__m512)(A), \ 5715309124Sdim (__v16sf)(__m512)(B), \ 5716309124Sdim (__v16si)(__m512i)(C), \ 5717309124Sdim (int)(imm), (__mmask16)(U), \ 5718309124Sdim (int)(R)); }) 5719309124Sdim 5720309124Sdim#define _mm512_maskz_fixupimm_ps(U, A, B, C, imm) __extension__ ({ \ 5721309124Sdim (__m512)__builtin_ia32_fixupimmps512_maskz((__v16sf)(__m512)(A), \ 5722309124Sdim (__v16sf)(__m512)(B), \ 5723309124Sdim (__v16si)(__m512i)(C), \ 5724309124Sdim (int)(imm), (__mmask16)(U), \ 5725309124Sdim _MM_FROUND_CUR_DIRECTION); }) 5726309124Sdim 5727309124Sdim#define _mm_fixupimm_round_sd(A, B, C, imm, R) __extension__ ({ \ 5728309124Sdim (__m128d)__builtin_ia32_fixupimmsd_mask((__v2df)(__m128d)(A), \ 5729309124Sdim (__v2df)(__m128d)(B), \ 5730309124Sdim (__v2di)(__m128i)(C), (int)(imm), \ 5731309124Sdim (__mmask8)-1, (int)(R)); }) 5732309124Sdim 5733309124Sdim#define _mm_mask_fixupimm_round_sd(A, U, B, C, imm, R) __extension__ ({ \ 5734309124Sdim (__m128d)__builtin_ia32_fixupimmsd_mask((__v2df)(__m128d)(A), \ 5735309124Sdim (__v2df)(__m128d)(B), \ 5736309124Sdim (__v2di)(__m128i)(C), (int)(imm), \ 5737309124Sdim (__mmask8)(U), (int)(R)); }) 5738309124Sdim 5739309124Sdim#define _mm_fixupimm_sd(A, B, C, imm) __extension__ ({ \ 5740309124Sdim (__m128d)__builtin_ia32_fixupimmsd_mask((__v2df)(__m128d)(A), \ 5741309124Sdim (__v2df)(__m128d)(B), \ 5742309124Sdim (__v2di)(__m128i)(C), (int)(imm), \ 5743309124Sdim (__mmask8)-1, \ 5744309124Sdim _MM_FROUND_CUR_DIRECTION); }) 5745309124Sdim 5746309124Sdim#define _mm_mask_fixupimm_sd(A, U, B, C, imm) __extension__ ({ \ 5747309124Sdim (__m128d)__builtin_ia32_fixupimmsd_mask((__v2df)(__m128d)(A), \ 5748309124Sdim (__v2df)(__m128d)(B), \ 5749309124Sdim (__v2di)(__m128i)(C), (int)(imm), \ 5750309124Sdim (__mmask8)(U), \ 5751309124Sdim _MM_FROUND_CUR_DIRECTION); }) 5752309124Sdim 5753309124Sdim#define _mm_maskz_fixupimm_round_sd(U, A, B, C, imm, R) __extension__ ({ \ 5754309124Sdim (__m128d)__builtin_ia32_fixupimmsd_maskz((__v2df)(__m128d)(A), \ 5755309124Sdim (__v2df)(__m128d)(B), \ 5756309124Sdim (__v2di)(__m128i)(C), (int)(imm), \ 5757309124Sdim (__mmask8)(U), (int)(R)); }) 5758309124Sdim 5759309124Sdim#define _mm_maskz_fixupimm_sd(U, A, B, C, imm) __extension__ ({ \ 5760309124Sdim (__m128d)__builtin_ia32_fixupimmsd_maskz((__v2df)(__m128d)(A), \ 5761309124Sdim (__v2df)(__m128d)(B), \ 5762309124Sdim (__v2di)(__m128i)(C), (int)(imm), \ 5763309124Sdim (__mmask8)(U), \ 5764309124Sdim _MM_FROUND_CUR_DIRECTION); }) 5765309124Sdim 5766309124Sdim#define _mm_fixupimm_round_ss(A, B, C, imm, R) __extension__ ({ \ 5767309124Sdim (__m128)__builtin_ia32_fixupimmss_mask((__v4sf)(__m128)(A), \ 5768309124Sdim (__v4sf)(__m128)(B), \ 5769309124Sdim (__v4si)(__m128i)(C), (int)(imm), \ 5770309124Sdim (__mmask8)-1, (int)(R)); }) 5771309124Sdim 5772309124Sdim#define _mm_mask_fixupimm_round_ss(A, U, B, C, imm, R) __extension__ ({ \ 5773309124Sdim (__m128)__builtin_ia32_fixupimmss_mask((__v4sf)(__m128)(A), \ 5774309124Sdim (__v4sf)(__m128)(B), \ 5775309124Sdim (__v4si)(__m128i)(C), (int)(imm), \ 5776309124Sdim (__mmask8)(U), (int)(R)); }) 5777309124Sdim 5778309124Sdim#define _mm_fixupimm_ss(A, B, C, imm) __extension__ ({ \ 5779309124Sdim (__m128)__builtin_ia32_fixupimmss_mask((__v4sf)(__m128)(A), \ 5780309124Sdim (__v4sf)(__m128)(B), \ 5781309124Sdim (__v4si)(__m128i)(C), (int)(imm), \ 5782309124Sdim (__mmask8)-1, \ 5783309124Sdim _MM_FROUND_CUR_DIRECTION); }) 5784309124Sdim 5785309124Sdim#define _mm_mask_fixupimm_ss(A, U, B, C, imm) __extension__ ({ \ 5786309124Sdim (__m128)__builtin_ia32_fixupimmss_mask((__v4sf)(__m128)(A), \ 5787309124Sdim (__v4sf)(__m128)(B), \ 5788309124Sdim (__v4si)(__m128i)(C), (int)(imm), \ 5789309124Sdim (__mmask8)(U), \ 5790309124Sdim _MM_FROUND_CUR_DIRECTION); }) 5791309124Sdim 5792309124Sdim#define _mm_maskz_fixupimm_round_ss(U, A, B, C, imm, R) __extension__ ({ \ 5793309124Sdim (__m128)__builtin_ia32_fixupimmss_maskz((__v4sf)(__m128)(A), \ 5794309124Sdim (__v4sf)(__m128)(B), \ 5795309124Sdim (__v4si)(__m128i)(C), (int)(imm), \ 5796309124Sdim (__mmask8)(U), (int)(R)); }) 5797309124Sdim 5798309124Sdim#define _mm_maskz_fixupimm_ss(U, A, B, C, imm) __extension__ ({ \ 5799309124Sdim (__m128)__builtin_ia32_fixupimmss_maskz((__v4sf)(__m128)(A), \ 5800309124Sdim (__v4sf)(__m128)(B), \ 5801309124Sdim (__v4si)(__m128i)(C), (int)(imm), \ 5802309124Sdim (__mmask8)(U), \ 5803309124Sdim _MM_FROUND_CUR_DIRECTION); }) 5804309124Sdim 5805309124Sdim#define _mm_getexp_round_sd(A, B, R) __extension__ ({ \ 5806309124Sdim (__m128d)__builtin_ia32_getexpsd128_round_mask((__v2df)(__m128d)(A), \ 5807309124Sdim (__v2df)(__m128d)(B), \ 5808309124Sdim (__v2df)_mm_setzero_pd(), \ 5809309124Sdim (__mmask8)-1, (int)(R)); }) 5810309124Sdim 5811309124Sdim 5812309124Sdimstatic __inline__ __m128d __DEFAULT_FN_ATTRS 5813309124Sdim_mm_getexp_sd (__m128d __A, __m128d __B) 5814309124Sdim{ 5815309124Sdim return (__m128d) __builtin_ia32_getexpsd128_round_mask ((__v2df) __A, 5816309124Sdim (__v2df) __B, (__v2df) _mm_setzero_pd(), (__mmask8) -1, _MM_FROUND_CUR_DIRECTION); 5817309124Sdim} 5818309124Sdim 5819309124Sdimstatic __inline__ __m128d __DEFAULT_FN_ATTRS 5820309124Sdim_mm_mask_getexp_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) 5821309124Sdim{ 5822309124Sdim return (__m128d) __builtin_ia32_getexpsd128_round_mask ( (__v2df) __A, 5823309124Sdim (__v2df) __B, 5824309124Sdim (__v2df) __W, 5825309124Sdim (__mmask8) __U, 5826309124Sdim _MM_FROUND_CUR_DIRECTION); 5827309124Sdim} 5828309124Sdim 5829309124Sdim#define _mm_mask_getexp_round_sd(W, U, A, B, R) __extension__ ({\ 5830309124Sdim (__m128d)__builtin_ia32_getexpsd128_round_mask((__v2df)(__m128d)(A), \ 5831309124Sdim (__v2df)(__m128d)(B), \ 5832309124Sdim (__v2df)(__m128d)(W), \ 5833309124Sdim (__mmask8)(U), (int)(R)); }) 5834309124Sdim 5835309124Sdimstatic __inline__ __m128d __DEFAULT_FN_ATTRS 5836309124Sdim_mm_maskz_getexp_sd (__mmask8 __U, __m128d __A, __m128d __B) 5837309124Sdim{ 5838309124Sdim return (__m128d) __builtin_ia32_getexpsd128_round_mask ( (__v2df) __A, 5839309124Sdim (__v2df) __B, 5840309124Sdim (__v2df) _mm_setzero_pd (), 5841309124Sdim (__mmask8) __U, 5842309124Sdim _MM_FROUND_CUR_DIRECTION); 5843309124Sdim} 5844309124Sdim 5845309124Sdim#define _mm_maskz_getexp_round_sd(U, A, B, R) __extension__ ({\ 5846309124Sdim (__m128d)__builtin_ia32_getexpsd128_round_mask((__v2df)(__m128d)(A), \ 5847309124Sdim (__v2df)(__m128d)(B), \ 5848309124Sdim (__v2df)_mm_setzero_pd(), \ 5849309124Sdim (__mmask8)(U), (int)(R)); }) 5850309124Sdim 5851309124Sdim#define _mm_getexp_round_ss(A, B, R) __extension__ ({ \ 5852309124Sdim (__m128)__builtin_ia32_getexpss128_round_mask((__v4sf)(__m128)(A), \ 5853309124Sdim (__v4sf)(__m128)(B), \ 5854309124Sdim (__v4sf)_mm_setzero_ps(), \ 5855309124Sdim (__mmask8)-1, (int)(R)); }) 5856309124Sdim 5857309124Sdimstatic __inline__ __m128 __DEFAULT_FN_ATTRS 5858309124Sdim_mm_getexp_ss (__m128 __A, __m128 __B) 5859309124Sdim{ 5860309124Sdim return (__m128) __builtin_ia32_getexpss128_round_mask ((__v4sf) __A, 5861309124Sdim (__v4sf) __B, (__v4sf) _mm_setzero_ps(), (__mmask8) -1, _MM_FROUND_CUR_DIRECTION); 5862309124Sdim} 5863309124Sdim 5864309124Sdimstatic __inline__ __m128 __DEFAULT_FN_ATTRS 5865309124Sdim_mm_mask_getexp_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) 5866309124Sdim{ 5867309124Sdim return (__m128) __builtin_ia32_getexpss128_round_mask ((__v4sf) __A, 5868309124Sdim (__v4sf) __B, 5869309124Sdim (__v4sf) __W, 5870309124Sdim (__mmask8) __U, 5871309124Sdim _MM_FROUND_CUR_DIRECTION); 5872309124Sdim} 5873309124Sdim 5874309124Sdim#define _mm_mask_getexp_round_ss(W, U, A, B, R) __extension__ ({\ 5875309124Sdim (__m128)__builtin_ia32_getexpss128_round_mask((__v4sf)(__m128)(A), \ 5876309124Sdim (__v4sf)(__m128)(B), \ 5877309124Sdim (__v4sf)(__m128)(W), \ 5878309124Sdim (__mmask8)(U), (int)(R)); }) 5879309124Sdim 5880309124Sdimstatic __inline__ __m128 __DEFAULT_FN_ATTRS 5881309124Sdim_mm_maskz_getexp_ss (__mmask8 __U, __m128 __A, __m128 __B) 5882309124Sdim{ 5883309124Sdim return (__m128) __builtin_ia32_getexpss128_round_mask ((__v4sf) __A, 5884309124Sdim (__v4sf) __B, 5885309124Sdim (__v4sf) _mm_setzero_pd (), 5886309124Sdim (__mmask8) __U, 5887309124Sdim _MM_FROUND_CUR_DIRECTION); 5888309124Sdim} 5889309124Sdim 5890309124Sdim#define _mm_maskz_getexp_round_ss(U, A, B, R) __extension__ ({\ 5891309124Sdim (__m128)__builtin_ia32_getexpss128_round_mask((__v4sf)(__m128)(A), \ 5892309124Sdim (__v4sf)(__m128)(B), \ 5893309124Sdim (__v4sf)_mm_setzero_ps(), \ 5894309124Sdim (__mmask8)(U), (int)(R)); }) 5895309124Sdim 5896309124Sdim#define _mm_getmant_round_sd(A, B, C, D, R) __extension__ ({ \ 5897309124Sdim (__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \ 5898309124Sdim (__v2df)(__m128d)(B), \ 5899309124Sdim (int)(((D)<<2) | (C)), \ 5900309124Sdim (__v2df)_mm_setzero_pd(), \ 5901309124Sdim (__mmask8)-1, (int)(R)); }) 5902309124Sdim 5903309124Sdim#define _mm_getmant_sd(A, B, C, D) __extension__ ({ \ 5904309124Sdim (__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \ 5905309124Sdim (__v2df)(__m128d)(B), \ 5906309124Sdim (int)(((D)<<2) | (C)), \ 5907309124Sdim (__v2df)_mm_setzero_pd(), \ 5908309124Sdim (__mmask8)-1, \ 5909309124Sdim _MM_FROUND_CUR_DIRECTION); }) 5910309124Sdim 5911309124Sdim#define _mm_mask_getmant_sd(W, U, A, B, C, D) __extension__ ({\ 5912309124Sdim (__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \ 5913309124Sdim (__v2df)(__m128d)(B), \ 5914309124Sdim (int)(((D)<<2) | (C)), \ 5915309124Sdim (__v2df)(__m128d)(W), \ 5916309124Sdim (__mmask8)(U), \ 5917309124Sdim _MM_FROUND_CUR_DIRECTION); }) 5918309124Sdim 5919309124Sdim#define _mm_mask_getmant_round_sd(W, U, A, B, C, D, R)({\ 5920309124Sdim (__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \ 5921309124Sdim (__v2df)(__m128d)(B), \ 5922309124Sdim (int)(((D)<<2) | (C)), \ 5923309124Sdim (__v2df)(__m128d)(W), \ 5924309124Sdim (__mmask8)(U), (int)(R)); }) 5925309124Sdim 5926309124Sdim#define _mm_maskz_getmant_sd(U, A, B, C, D) __extension__ ({\ 5927309124Sdim (__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \ 5928309124Sdim (__v2df)(__m128d)(B), \ 5929309124Sdim (int)(((D)<<2) | (C)), \ 5930309124Sdim (__v2df)_mm_setzero_pd(), \ 5931309124Sdim (__mmask8)(U), \ 5932309124Sdim _MM_FROUND_CUR_DIRECTION); }) 5933309124Sdim 5934309124Sdim#define _mm_maskz_getmant_round_sd(U, A, B, C, D, R) __extension__ ({\ 5935309124Sdim (__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \ 5936309124Sdim (__v2df)(__m128d)(B), \ 5937309124Sdim (int)(((D)<<2) | (C)), \ 5938309124Sdim (__v2df)_mm_setzero_pd(), \ 5939309124Sdim (__mmask8)(U), (int)(R)); }) 5940309124Sdim 5941309124Sdim#define _mm_getmant_round_ss(A, B, C, D, R) __extension__ ({ \ 5942309124Sdim (__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \ 5943309124Sdim (__v4sf)(__m128)(B), \ 5944309124Sdim (int)(((D)<<2) | (C)), \ 5945309124Sdim (__v4sf)_mm_setzero_ps(), \ 5946309124Sdim (__mmask8)-1, (int)(R)); }) 5947309124Sdim 5948309124Sdim#define _mm_getmant_ss(A, B, C, D) __extension__ ({ \ 5949309124Sdim (__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \ 5950309124Sdim (__v4sf)(__m128)(B), \ 5951309124Sdim (int)(((D)<<2) | (C)), \ 5952309124Sdim (__v4sf)_mm_setzero_ps(), \ 5953309124Sdim (__mmask8)-1, \ 5954309124Sdim _MM_FROUND_CUR_DIRECTION); }) 5955309124Sdim 5956309124Sdim#define _mm_mask_getmant_ss(W, U, A, B, C, D) __extension__ ({\ 5957309124Sdim (__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \ 5958309124Sdim (__v4sf)(__m128)(B), \ 5959309124Sdim (int)(((D)<<2) | (C)), \ 5960309124Sdim (__v4sf)(__m128)(W), \ 5961309124Sdim (__mmask8)(U), \ 5962309124Sdim _MM_FROUND_CUR_DIRECTION); }) 5963309124Sdim 5964309124Sdim#define _mm_mask_getmant_round_ss(W, U, A, B, C, D, R)({\ 5965309124Sdim (__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \ 5966309124Sdim (__v4sf)(__m128)(B), \ 5967309124Sdim (int)(((D)<<2) | (C)), \ 5968309124Sdim (__v4sf)(__m128)(W), \ 5969309124Sdim (__mmask8)(U), (int)(R)); }) 5970309124Sdim 5971309124Sdim#define _mm_maskz_getmant_ss(U, A, B, C, D) __extension__ ({\ 5972309124Sdim (__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \ 5973309124Sdim (__v4sf)(__m128)(B), \ 5974309124Sdim (int)(((D)<<2) | (C)), \ 5975309124Sdim (__v4sf)_mm_setzero_pd(), \ 5976309124Sdim (__mmask8)(U), \ 5977309124Sdim _MM_FROUND_CUR_DIRECTION); }) 5978309124Sdim 5979309124Sdim#define _mm_maskz_getmant_round_ss(U, A, B, C, D, R) __extension__ ({\ 5980309124Sdim (__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \ 5981309124Sdim (__v4sf)(__m128)(B), \ 5982309124Sdim (int)(((D)<<2) | (C)), \ 5983309124Sdim (__v4sf)_mm_setzero_ps(), \ 5984309124Sdim (__mmask8)(U), (int)(R)); }) 5985309124Sdim 5986309124Sdimstatic __inline__ __mmask16 __DEFAULT_FN_ATTRS 5987309124Sdim_mm512_kmov (__mmask16 __A) 5988309124Sdim{ 5989309124Sdim return __A; 5990309124Sdim} 5991309124Sdim 5992309124Sdim#define _mm_comi_round_sd(A, B, P, R) __extension__ ({\ 5993309124Sdim (int)__builtin_ia32_vcomisd((__v2df)(__m128d)(A), (__v2df)(__m128d)(B), \ 5994309124Sdim (int)(P), (int)(R)); }) 5995309124Sdim 5996309124Sdim#define _mm_comi_round_ss(A, B, P, R) __extension__ ({\ 5997309124Sdim (int)__builtin_ia32_vcomiss((__v4sf)(__m128)(A), (__v4sf)(__m128)(B), \ 5998309124Sdim (int)(P), (int)(R)); }) 5999309124Sdim 6000314564Sdim#ifdef __x86_64__ 6001309124Sdim#define _mm_cvt_roundsd_si64(A, R) __extension__ ({ \ 6002309124Sdim (long long)__builtin_ia32_vcvtsd2si64((__v2df)(__m128d)(A), (int)(R)); }) 6003314564Sdim#endif 6004309124Sdim 6005309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 6006309124Sdim_mm512_mask2_permutex2var_epi32 (__m512i __A, __m512i __I, 6007309124Sdim __mmask16 __U, __m512i __B) 6008309124Sdim{ 6009309124Sdim return (__m512i) __builtin_ia32_vpermi2vard512_mask ((__v16si) __A, 6010309124Sdim (__v16si) __I 6011309124Sdim /* idx */ , 6012309124Sdim (__v16si) __B, 6013309124Sdim (__mmask16) __U); 6014309124Sdim} 6015309124Sdim 6016309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 6017314564Sdim_mm512_sll_epi32(__m512i __A, __m128i __B) 6018309124Sdim{ 6019314564Sdim return (__m512i)__builtin_ia32_pslld512((__v16si) __A, (__v4si)__B); 6020309124Sdim} 6021309124Sdim 6022309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 6023314564Sdim_mm512_mask_sll_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m128i __B) 6024309124Sdim{ 6025314564Sdim return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, 6026314564Sdim (__v16si)_mm512_sll_epi32(__A, __B), 6027314564Sdim (__v16si)__W); 6028309124Sdim} 6029309124Sdim 6030309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 6031314564Sdim_mm512_maskz_sll_epi32(__mmask16 __U, __m512i __A, __m128i __B) 6032309124Sdim{ 6033314564Sdim return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, 6034314564Sdim (__v16si)_mm512_sll_epi32(__A, __B), 6035314564Sdim (__v16si)_mm512_setzero_si512()); 6036309124Sdim} 6037309124Sdim 6038309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 6039314564Sdim_mm512_sll_epi64(__m512i __A, __m128i __B) 6040309124Sdim{ 6041314564Sdim return (__m512i)__builtin_ia32_psllq512((__v8di)__A, (__v2di)__B); 6042309124Sdim} 6043309124Sdim 6044309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 6045314564Sdim_mm512_mask_sll_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m128i __B) 6046309124Sdim{ 6047314564Sdim return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, 6048314564Sdim (__v8di)_mm512_sll_epi64(__A, __B), 6049314564Sdim (__v8di)__W); 6050309124Sdim} 6051309124Sdim 6052309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 6053314564Sdim_mm512_maskz_sll_epi64(__mmask8 __U, __m512i __A, __m128i __B) 6054309124Sdim{ 6055314564Sdim return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, 6056314564Sdim (__v8di)_mm512_sll_epi64(__A, __B), 6057314564Sdim (__v8di)_mm512_setzero_si512()); 6058309124Sdim} 6059309124Sdim 6060309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 6061314564Sdim_mm512_sllv_epi32(__m512i __X, __m512i __Y) 6062309124Sdim{ 6063314564Sdim return (__m512i)__builtin_ia32_psllv16si((__v16si)__X, (__v16si)__Y); 6064309124Sdim} 6065309124Sdim 6066309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 6067314564Sdim_mm512_mask_sllv_epi32(__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y) 6068309124Sdim{ 6069314564Sdim return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, 6070314564Sdim (__v16si)_mm512_sllv_epi32(__X, __Y), 6071314564Sdim (__v16si)__W); 6072309124Sdim} 6073309124Sdim 6074309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 6075314564Sdim_mm512_maskz_sllv_epi32(__mmask16 __U, __m512i __X, __m512i __Y) 6076309124Sdim{ 6077314564Sdim return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, 6078314564Sdim (__v16si)_mm512_sllv_epi32(__X, __Y), 6079314564Sdim (__v16si)_mm512_setzero_si512()); 6080309124Sdim} 6081309124Sdim 6082309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 6083314564Sdim_mm512_sllv_epi64(__m512i __X, __m512i __Y) 6084309124Sdim{ 6085314564Sdim return (__m512i)__builtin_ia32_psllv8di((__v8di)__X, (__v8di)__Y); 6086309124Sdim} 6087309124Sdim 6088309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 6089314564Sdim_mm512_mask_sllv_epi64(__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y) 6090309124Sdim{ 6091314564Sdim return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, 6092314564Sdim (__v8di)_mm512_sllv_epi64(__X, __Y), 6093314564Sdim (__v8di)__W); 6094309124Sdim} 6095309124Sdim 6096309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 6097314564Sdim_mm512_maskz_sllv_epi64(__mmask8 __U, __m512i __X, __m512i __Y) 6098309124Sdim{ 6099314564Sdim return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, 6100314564Sdim (__v8di)_mm512_sllv_epi64(__X, __Y), 6101314564Sdim (__v8di)_mm512_setzero_si512()); 6102309124Sdim} 6103309124Sdim 6104309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 6105314564Sdim_mm512_sra_epi32(__m512i __A, __m128i __B) 6106309124Sdim{ 6107314564Sdim return (__m512i)__builtin_ia32_psrad512((__v16si) __A, (__v4si)__B); 6108309124Sdim} 6109309124Sdim 6110309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 6111314564Sdim_mm512_mask_sra_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m128i __B) 6112309124Sdim{ 6113314564Sdim return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, 6114314564Sdim (__v16si)_mm512_sra_epi32(__A, __B), 6115314564Sdim (__v16si)__W); 6116309124Sdim} 6117309124Sdim 6118309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 6119314564Sdim_mm512_maskz_sra_epi32(__mmask16 __U, __m512i __A, __m128i __B) 6120309124Sdim{ 6121314564Sdim return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, 6122314564Sdim (__v16si)_mm512_sra_epi32(__A, __B), 6123314564Sdim (__v16si)_mm512_setzero_si512()); 6124309124Sdim} 6125309124Sdim 6126309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 6127314564Sdim_mm512_sra_epi64(__m512i __A, __m128i __B) 6128309124Sdim{ 6129314564Sdim return (__m512i)__builtin_ia32_psraq512((__v8di)__A, (__v2di)__B); 6130309124Sdim} 6131309124Sdim 6132309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 6133314564Sdim_mm512_mask_sra_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m128i __B) 6134309124Sdim{ 6135314564Sdim return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, 6136314564Sdim (__v8di)_mm512_sra_epi64(__A, __B), 6137314564Sdim (__v8di)__W); 6138309124Sdim} 6139309124Sdim 6140309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 6141314564Sdim_mm512_maskz_sra_epi64(__mmask8 __U, __m512i __A, __m128i __B) 6142309124Sdim{ 6143314564Sdim return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, 6144314564Sdim (__v8di)_mm512_sra_epi64(__A, __B), 6145314564Sdim (__v8di)_mm512_setzero_si512()); 6146309124Sdim} 6147309124Sdim 6148309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 6149314564Sdim_mm512_srav_epi32(__m512i __X, __m512i __Y) 6150309124Sdim{ 6151314564Sdim return (__m512i)__builtin_ia32_psrav16si((__v16si)__X, (__v16si)__Y); 6152309124Sdim} 6153309124Sdim 6154309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 6155314564Sdim_mm512_mask_srav_epi32(__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y) 6156309124Sdim{ 6157314564Sdim return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, 6158314564Sdim (__v16si)_mm512_srav_epi32(__X, __Y), 6159314564Sdim (__v16si)__W); 6160309124Sdim} 6161309124Sdim 6162309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 6163314564Sdim_mm512_maskz_srav_epi32(__mmask16 __U, __m512i __X, __m512i __Y) 6164309124Sdim{ 6165314564Sdim return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, 6166314564Sdim (__v16si)_mm512_srav_epi32(__X, __Y), 6167314564Sdim (__v16si)_mm512_setzero_si512()); 6168309124Sdim} 6169309124Sdim 6170309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 6171314564Sdim_mm512_srav_epi64(__m512i __X, __m512i __Y) 6172309124Sdim{ 6173314564Sdim return (__m512i)__builtin_ia32_psrav8di((__v8di)__X, (__v8di)__Y); 6174309124Sdim} 6175309124Sdim 6176309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 6177314564Sdim_mm512_mask_srav_epi64(__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y) 6178309124Sdim{ 6179314564Sdim return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, 6180314564Sdim (__v8di)_mm512_srav_epi64(__X, __Y), 6181314564Sdim (__v8di)__W); 6182309124Sdim} 6183309124Sdim 6184309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 6185314564Sdim_mm512_maskz_srav_epi64(__mmask8 __U, __m512i __X, __m512i __Y) 6186309124Sdim{ 6187314564Sdim return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, 6188314564Sdim (__v8di)_mm512_srav_epi64(__X, __Y), 6189314564Sdim (__v8di)_mm512_setzero_si512()); 6190309124Sdim} 6191309124Sdim 6192309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 6193314564Sdim_mm512_srl_epi32(__m512i __A, __m128i __B) 6194309124Sdim{ 6195314564Sdim return (__m512i)__builtin_ia32_psrld512((__v16si) __A, (__v4si)__B); 6196309124Sdim} 6197309124Sdim 6198309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 6199314564Sdim_mm512_mask_srl_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m128i __B) 6200309124Sdim{ 6201314564Sdim return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, 6202314564Sdim (__v16si)_mm512_srl_epi32(__A, __B), 6203314564Sdim (__v16si)__W); 6204309124Sdim} 6205309124Sdim 6206309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 6207314564Sdim_mm512_maskz_srl_epi32(__mmask16 __U, __m512i __A, __m128i __B) 6208309124Sdim{ 6209314564Sdim return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, 6210314564Sdim (__v16si)_mm512_srl_epi32(__A, __B), 6211314564Sdim (__v16si)_mm512_setzero_si512()); 6212309124Sdim} 6213309124Sdim 6214309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 6215314564Sdim_mm512_srl_epi64(__m512i __A, __m128i __B) 6216309124Sdim{ 6217314564Sdim return (__m512i)__builtin_ia32_psrlq512((__v8di)__A, (__v2di)__B); 6218309124Sdim} 6219309124Sdim 6220309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 6221314564Sdim_mm512_mask_srl_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m128i __B) 6222309124Sdim{ 6223314564Sdim return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, 6224314564Sdim (__v8di)_mm512_srl_epi64(__A, __B), 6225314564Sdim (__v8di)__W); 6226309124Sdim} 6227309124Sdim 6228309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 6229314564Sdim_mm512_maskz_srl_epi64(__mmask8 __U, __m512i __A, __m128i __B) 6230309124Sdim{ 6231314564Sdim return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, 6232314564Sdim (__v8di)_mm512_srl_epi64(__A, __B), 6233314564Sdim (__v8di)_mm512_setzero_si512()); 6234309124Sdim} 6235309124Sdim 6236309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 6237314564Sdim_mm512_srlv_epi32(__m512i __X, __m512i __Y) 6238309124Sdim{ 6239314564Sdim return (__m512i)__builtin_ia32_psrlv16si((__v16si)__X, (__v16si)__Y); 6240309124Sdim} 6241309124Sdim 6242309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 6243314564Sdim_mm512_mask_srlv_epi32(__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y) 6244309124Sdim{ 6245314564Sdim return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, 6246314564Sdim (__v16si)_mm512_srlv_epi32(__X, __Y), 6247314564Sdim (__v16si)__W); 6248309124Sdim} 6249309124Sdim 6250309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 6251314564Sdim_mm512_maskz_srlv_epi32(__mmask16 __U, __m512i __X, __m512i __Y) 6252309124Sdim{ 6253314564Sdim return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, 6254314564Sdim (__v16si)_mm512_srlv_epi32(__X, __Y), 6255314564Sdim (__v16si)_mm512_setzero_si512()); 6256309124Sdim} 6257309124Sdim 6258309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 6259309124Sdim_mm512_srlv_epi64 (__m512i __X, __m512i __Y) 6260309124Sdim{ 6261314564Sdim return (__m512i)__builtin_ia32_psrlv8di((__v8di)__X, (__v8di)__Y); 6262309124Sdim} 6263309124Sdim 6264309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 6265314564Sdim_mm512_mask_srlv_epi64(__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y) 6266309124Sdim{ 6267314564Sdim return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, 6268314564Sdim (__v8di)_mm512_srlv_epi64(__X, __Y), 6269314564Sdim (__v8di)__W); 6270309124Sdim} 6271309124Sdim 6272309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 6273314564Sdim_mm512_maskz_srlv_epi64(__mmask8 __U, __m512i __X, __m512i __Y) 6274309124Sdim{ 6275314564Sdim return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, 6276314564Sdim (__v8di)_mm512_srlv_epi64(__X, __Y), 6277314564Sdim (__v8di)_mm512_setzero_si512()); 6278309124Sdim} 6279309124Sdim 6280309124Sdim#define _mm512_ternarylogic_epi32(A, B, C, imm) __extension__ ({ \ 6281309124Sdim (__m512i)__builtin_ia32_pternlogd512_mask((__v16si)(__m512i)(A), \ 6282309124Sdim (__v16si)(__m512i)(B), \ 6283309124Sdim (__v16si)(__m512i)(C), (int)(imm), \ 6284309124Sdim (__mmask16)-1); }) 6285309124Sdim 6286309124Sdim#define _mm512_mask_ternarylogic_epi32(A, U, B, C, imm) __extension__ ({ \ 6287309124Sdim (__m512i)__builtin_ia32_pternlogd512_mask((__v16si)(__m512i)(A), \ 6288309124Sdim (__v16si)(__m512i)(B), \ 6289309124Sdim (__v16si)(__m512i)(C), (int)(imm), \ 6290309124Sdim (__mmask16)(U)); }) 6291309124Sdim 6292309124Sdim#define _mm512_maskz_ternarylogic_epi32(U, A, B, C, imm) __extension__ ({ \ 6293309124Sdim (__m512i)__builtin_ia32_pternlogd512_maskz((__v16si)(__m512i)(A), \ 6294309124Sdim (__v16si)(__m512i)(B), \ 6295309124Sdim (__v16si)(__m512i)(C), \ 6296309124Sdim (int)(imm), (__mmask16)(U)); }) 6297309124Sdim 6298309124Sdim#define _mm512_ternarylogic_epi64(A, B, C, imm) __extension__ ({ \ 6299309124Sdim (__m512i)__builtin_ia32_pternlogq512_mask((__v8di)(__m512i)(A), \ 6300309124Sdim (__v8di)(__m512i)(B), \ 6301309124Sdim (__v8di)(__m512i)(C), (int)(imm), \ 6302309124Sdim (__mmask8)-1); }) 6303309124Sdim 6304309124Sdim#define _mm512_mask_ternarylogic_epi64(A, U, B, C, imm) __extension__ ({ \ 6305309124Sdim (__m512i)__builtin_ia32_pternlogq512_mask((__v8di)(__m512i)(A), \ 6306309124Sdim (__v8di)(__m512i)(B), \ 6307309124Sdim (__v8di)(__m512i)(C), (int)(imm), \ 6308309124Sdim (__mmask8)(U)); }) 6309309124Sdim 6310309124Sdim#define _mm512_maskz_ternarylogic_epi64(U, A, B, C, imm) __extension__ ({ \ 6311309124Sdim (__m512i)__builtin_ia32_pternlogq512_maskz((__v8di)(__m512i)(A), \ 6312309124Sdim (__v8di)(__m512i)(B), \ 6313309124Sdim (__v8di)(__m512i)(C), (int)(imm), \ 6314309124Sdim (__mmask8)(U)); }) 6315309124Sdim 6316314564Sdim#ifdef __x86_64__ 6317309124Sdim#define _mm_cvt_roundsd_i64(A, R) __extension__ ({ \ 6318309124Sdim (long long)__builtin_ia32_vcvtsd2si64((__v2df)(__m128d)(A), (int)(R)); }) 6319314564Sdim#endif 6320309124Sdim 6321309124Sdim#define _mm_cvt_roundsd_si32(A, R) __extension__ ({ \ 6322309124Sdim (int)__builtin_ia32_vcvtsd2si32((__v2df)(__m128d)(A), (int)(R)); }) 6323309124Sdim 6324309124Sdim#define _mm_cvt_roundsd_i32(A, R) __extension__ ({ \ 6325309124Sdim (int)__builtin_ia32_vcvtsd2si32((__v2df)(__m128d)(A), (int)(R)); }) 6326309124Sdim 6327309124Sdim#define _mm_cvt_roundsd_u32(A, R) __extension__ ({ \ 6328309124Sdim (unsigned int)__builtin_ia32_vcvtsd2usi32((__v2df)(__m128d)(A), (int)(R)); }) 6329309124Sdim 6330309124Sdimstatic __inline__ unsigned __DEFAULT_FN_ATTRS 6331309124Sdim_mm_cvtsd_u32 (__m128d __A) 6332309124Sdim{ 6333309124Sdim return (unsigned) __builtin_ia32_vcvtsd2usi32 ((__v2df) __A, 6334309124Sdim _MM_FROUND_CUR_DIRECTION); 6335309124Sdim} 6336309124Sdim 6337314564Sdim#ifdef __x86_64__ 6338309124Sdim#define _mm_cvt_roundsd_u64(A, R) __extension__ ({ \ 6339309124Sdim (unsigned long long)__builtin_ia32_vcvtsd2usi64((__v2df)(__m128d)(A), \ 6340309124Sdim (int)(R)); }) 6341309124Sdim 6342309124Sdimstatic __inline__ unsigned long long __DEFAULT_FN_ATTRS 6343309124Sdim_mm_cvtsd_u64 (__m128d __A) 6344309124Sdim{ 6345309124Sdim return (unsigned long long) __builtin_ia32_vcvtsd2usi64 ((__v2df) 6346309124Sdim __A, 6347309124Sdim _MM_FROUND_CUR_DIRECTION); 6348309124Sdim} 6349314564Sdim#endif 6350309124Sdim 6351309124Sdim#define _mm_cvt_roundss_si32(A, R) __extension__ ({ \ 6352309124Sdim (int)__builtin_ia32_vcvtss2si32((__v4sf)(__m128)(A), (int)(R)); }) 6353309124Sdim 6354309124Sdim#define _mm_cvt_roundss_i32(A, R) __extension__ ({ \ 6355309124Sdim (int)__builtin_ia32_vcvtss2si32((__v4sf)(__m128)(A), (int)(R)); }) 6356309124Sdim 6357314564Sdim#ifdef __x86_64__ 6358309124Sdim#define _mm_cvt_roundss_si64(A, R) __extension__ ({ \ 6359309124Sdim (long long)__builtin_ia32_vcvtss2si64((__v4sf)(__m128)(A), (int)(R)); }) 6360309124Sdim 6361309124Sdim#define _mm_cvt_roundss_i64(A, R) __extension__ ({ \ 6362309124Sdim (long long)__builtin_ia32_vcvtss2si64((__v4sf)(__m128)(A), (int)(R)); }) 6363314564Sdim#endif 6364309124Sdim 6365309124Sdim#define _mm_cvt_roundss_u32(A, R) __extension__ ({ \ 6366309124Sdim (unsigned int)__builtin_ia32_vcvtss2usi32((__v4sf)(__m128)(A), (int)(R)); }) 6367309124Sdim 6368309124Sdimstatic __inline__ unsigned __DEFAULT_FN_ATTRS 6369309124Sdim_mm_cvtss_u32 (__m128 __A) 6370309124Sdim{ 6371309124Sdim return (unsigned) __builtin_ia32_vcvtss2usi32 ((__v4sf) __A, 6372309124Sdim _MM_FROUND_CUR_DIRECTION); 6373309124Sdim} 6374309124Sdim 6375314564Sdim#ifdef __x86_64__ 6376309124Sdim#define _mm_cvt_roundss_u64(A, R) __extension__ ({ \ 6377309124Sdim (unsigned long long)__builtin_ia32_vcvtss2usi64((__v4sf)(__m128)(A), \ 6378309124Sdim (int)(R)); }) 6379309124Sdim 6380309124Sdimstatic __inline__ unsigned long long __DEFAULT_FN_ATTRS 6381309124Sdim_mm_cvtss_u64 (__m128 __A) 6382309124Sdim{ 6383309124Sdim return (unsigned long long) __builtin_ia32_vcvtss2usi64 ((__v4sf) 6384309124Sdim __A, 6385309124Sdim _MM_FROUND_CUR_DIRECTION); 6386309124Sdim} 6387314564Sdim#endif 6388309124Sdim 6389309124Sdim#define _mm_cvtt_roundsd_i32(A, R) __extension__ ({ \ 6390309124Sdim (int)__builtin_ia32_vcvttsd2si32((__v2df)(__m128d)(A), (int)(R)); }) 6391309124Sdim 6392309124Sdim#define _mm_cvtt_roundsd_si32(A, R) __extension__ ({ \ 6393309124Sdim (int)__builtin_ia32_vcvttsd2si32((__v2df)(__m128d)(A), (int)(R)); }) 6394309124Sdim 6395309124Sdimstatic __inline__ int __DEFAULT_FN_ATTRS 6396309124Sdim_mm_cvttsd_i32 (__m128d __A) 6397309124Sdim{ 6398309124Sdim return (int) __builtin_ia32_vcvttsd2si32 ((__v2df) __A, 6399309124Sdim _MM_FROUND_CUR_DIRECTION); 6400309124Sdim} 6401309124Sdim 6402314564Sdim#ifdef __x86_64__ 6403309124Sdim#define _mm_cvtt_roundsd_si64(A, R) __extension__ ({ \ 6404309124Sdim (long long)__builtin_ia32_vcvttsd2si64((__v2df)(__m128d)(A), (int)(R)); }) 6405309124Sdim 6406309124Sdim#define _mm_cvtt_roundsd_i64(A, R) __extension__ ({ \ 6407309124Sdim (long long)__builtin_ia32_vcvttsd2si64((__v2df)(__m128d)(A), (int)(R)); }) 6408309124Sdim 6409309124Sdimstatic __inline__ long long __DEFAULT_FN_ATTRS 6410309124Sdim_mm_cvttsd_i64 (__m128d __A) 6411309124Sdim{ 6412309124Sdim return (long long) __builtin_ia32_vcvttsd2si64 ((__v2df) __A, 6413309124Sdim _MM_FROUND_CUR_DIRECTION); 6414309124Sdim} 6415314564Sdim#endif 6416309124Sdim 6417309124Sdim#define _mm_cvtt_roundsd_u32(A, R) __extension__ ({ \ 6418309124Sdim (unsigned int)__builtin_ia32_vcvttsd2usi32((__v2df)(__m128d)(A), (int)(R)); }) 6419309124Sdim 6420309124Sdimstatic __inline__ unsigned __DEFAULT_FN_ATTRS 6421309124Sdim_mm_cvttsd_u32 (__m128d __A) 6422309124Sdim{ 6423309124Sdim return (unsigned) __builtin_ia32_vcvttsd2usi32 ((__v2df) __A, 6424309124Sdim _MM_FROUND_CUR_DIRECTION); 6425309124Sdim} 6426309124Sdim 6427314564Sdim#ifdef __x86_64__ 6428309124Sdim#define _mm_cvtt_roundsd_u64(A, R) __extension__ ({ \ 6429309124Sdim (unsigned long long)__builtin_ia32_vcvttsd2usi64((__v2df)(__m128d)(A), \ 6430309124Sdim (int)(R)); }) 6431309124Sdim 6432309124Sdimstatic __inline__ unsigned long long __DEFAULT_FN_ATTRS 6433309124Sdim_mm_cvttsd_u64 (__m128d __A) 6434309124Sdim{ 6435309124Sdim return (unsigned long long) __builtin_ia32_vcvttsd2usi64 ((__v2df) 6436309124Sdim __A, 6437309124Sdim _MM_FROUND_CUR_DIRECTION); 6438309124Sdim} 6439314564Sdim#endif 6440309124Sdim 6441309124Sdim#define _mm_cvtt_roundss_i32(A, R) __extension__ ({ \ 6442309124Sdim (int)__builtin_ia32_vcvttss2si32((__v4sf)(__m128)(A), (int)(R)); }) 6443309124Sdim 6444309124Sdim#define _mm_cvtt_roundss_si32(A, R) __extension__ ({ \ 6445309124Sdim (int)__builtin_ia32_vcvttss2si32((__v4sf)(__m128)(A), (int)(R)); }) 6446309124Sdim 6447309124Sdimstatic __inline__ int __DEFAULT_FN_ATTRS 6448309124Sdim_mm_cvttss_i32 (__m128 __A) 6449309124Sdim{ 6450309124Sdim return (int) __builtin_ia32_vcvttss2si32 ((__v4sf) __A, 6451309124Sdim _MM_FROUND_CUR_DIRECTION); 6452309124Sdim} 6453309124Sdim 6454314564Sdim#ifdef __x86_64__ 6455309124Sdim#define _mm_cvtt_roundss_i64(A, R) __extension__ ({ \ 6456309124Sdim (long long)__builtin_ia32_vcvttss2si64((__v4sf)(__m128)(A), (int)(R)); }) 6457309124Sdim 6458309124Sdim#define _mm_cvtt_roundss_si64(A, R) __extension__ ({ \ 6459309124Sdim (long long)__builtin_ia32_vcvttss2si64((__v4sf)(__m128)(A), (int)(R)); }) 6460309124Sdim 6461309124Sdimstatic __inline__ long long __DEFAULT_FN_ATTRS 6462309124Sdim_mm_cvttss_i64 (__m128 __A) 6463309124Sdim{ 6464309124Sdim return (long long) __builtin_ia32_vcvttss2si64 ((__v4sf) __A, 6465309124Sdim _MM_FROUND_CUR_DIRECTION); 6466309124Sdim} 6467314564Sdim#endif 6468309124Sdim 6469309124Sdim#define _mm_cvtt_roundss_u32(A, R) __extension__ ({ \ 6470309124Sdim (unsigned int)__builtin_ia32_vcvttss2usi32((__v4sf)(__m128)(A), (int)(R)); }) 6471309124Sdim 6472309124Sdimstatic __inline__ unsigned __DEFAULT_FN_ATTRS 6473309124Sdim_mm_cvttss_u32 (__m128 __A) 6474309124Sdim{ 6475309124Sdim return (unsigned) __builtin_ia32_vcvttss2usi32 ((__v4sf) __A, 6476309124Sdim _MM_FROUND_CUR_DIRECTION); 6477309124Sdim} 6478309124Sdim 6479314564Sdim#ifdef __x86_64__ 6480309124Sdim#define _mm_cvtt_roundss_u64(A, R) __extension__ ({ \ 6481309124Sdim (unsigned long long)__builtin_ia32_vcvttss2usi64((__v4sf)(__m128)(A), \ 6482309124Sdim (int)(R)); }) 6483309124Sdim 6484309124Sdimstatic __inline__ unsigned long long __DEFAULT_FN_ATTRS 6485309124Sdim_mm_cvttss_u64 (__m128 __A) 6486309124Sdim{ 6487309124Sdim return (unsigned long long) __builtin_ia32_vcvttss2usi64 ((__v4sf) 6488309124Sdim __A, 6489309124Sdim _MM_FROUND_CUR_DIRECTION); 6490309124Sdim} 6491314564Sdim#endif 6492309124Sdim 6493309124Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS 6494309124Sdim_mm512_mask2_permutex2var_pd (__m512d __A, __m512i __I, __mmask8 __U, 6495309124Sdim __m512d __B) 6496309124Sdim{ 6497309124Sdim return (__m512d) __builtin_ia32_vpermi2varpd512_mask ((__v8df) __A, 6498309124Sdim (__v8di) __I 6499309124Sdim /* idx */ , 6500309124Sdim (__v8df) __B, 6501309124Sdim (__mmask8) __U); 6502309124Sdim} 6503309124Sdim 6504309124Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS 6505309124Sdim_mm512_mask2_permutex2var_ps (__m512 __A, __m512i __I, __mmask16 __U, 6506309124Sdim __m512 __B) 6507309124Sdim{ 6508309124Sdim return (__m512) __builtin_ia32_vpermi2varps512_mask ((__v16sf) __A, 6509309124Sdim (__v16si) __I 6510309124Sdim /* idx */ , 6511309124Sdim (__v16sf) __B, 6512309124Sdim (__mmask16) __U); 6513309124Sdim} 6514309124Sdim 6515309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 6516309124Sdim_mm512_mask2_permutex2var_epi64 (__m512i __A, __m512i __I, 6517309124Sdim __mmask8 __U, __m512i __B) 6518309124Sdim{ 6519309124Sdim return (__m512i) __builtin_ia32_vpermi2varq512_mask ((__v8di) __A, 6520309124Sdim (__v8di) __I 6521309124Sdim /* idx */ , 6522309124Sdim (__v8di) __B, 6523309124Sdim (__mmask8) __U); 6524309124Sdim} 6525309124Sdim 6526309124Sdim#define _mm512_permute_pd(X, C) __extension__ ({ \ 6527309124Sdim (__m512d)__builtin_shufflevector((__v8df)(__m512d)(X), \ 6528309124Sdim (__v8df)_mm512_undefined_pd(), \ 6529309124Sdim 0 + (((C) >> 0) & 0x1), \ 6530309124Sdim 0 + (((C) >> 1) & 0x1), \ 6531309124Sdim 2 + (((C) >> 2) & 0x1), \ 6532309124Sdim 2 + (((C) >> 3) & 0x1), \ 6533309124Sdim 4 + (((C) >> 4) & 0x1), \ 6534309124Sdim 4 + (((C) >> 5) & 0x1), \ 6535309124Sdim 6 + (((C) >> 6) & 0x1), \ 6536309124Sdim 6 + (((C) >> 7) & 0x1)); }) 6537309124Sdim 6538309124Sdim#define _mm512_mask_permute_pd(W, U, X, C) __extension__ ({ \ 6539309124Sdim (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ 6540309124Sdim (__v8df)_mm512_permute_pd((X), (C)), \ 6541309124Sdim (__v8df)(__m512d)(W)); }) 6542309124Sdim 6543309124Sdim#define _mm512_maskz_permute_pd(U, X, C) __extension__ ({ \ 6544309124Sdim (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ 6545309124Sdim (__v8df)_mm512_permute_pd((X), (C)), \ 6546309124Sdim (__v8df)_mm512_setzero_pd()); }) 6547309124Sdim 6548309124Sdim#define _mm512_permute_ps(X, C) __extension__ ({ \ 6549309124Sdim (__m512)__builtin_shufflevector((__v16sf)(__m512)(X), \ 6550309124Sdim (__v16sf)_mm512_undefined_ps(), \ 6551309124Sdim 0 + (((C) >> 0) & 0x3), \ 6552309124Sdim 0 + (((C) >> 2) & 0x3), \ 6553309124Sdim 0 + (((C) >> 4) & 0x3), \ 6554309124Sdim 0 + (((C) >> 6) & 0x3), \ 6555309124Sdim 4 + (((C) >> 0) & 0x3), \ 6556309124Sdim 4 + (((C) >> 2) & 0x3), \ 6557309124Sdim 4 + (((C) >> 4) & 0x3), \ 6558309124Sdim 4 + (((C) >> 6) & 0x3), \ 6559309124Sdim 8 + (((C) >> 0) & 0x3), \ 6560309124Sdim 8 + (((C) >> 2) & 0x3), \ 6561309124Sdim 8 + (((C) >> 4) & 0x3), \ 6562309124Sdim 8 + (((C) >> 6) & 0x3), \ 6563309124Sdim 12 + (((C) >> 0) & 0x3), \ 6564309124Sdim 12 + (((C) >> 2) & 0x3), \ 6565309124Sdim 12 + (((C) >> 4) & 0x3), \ 6566309124Sdim 12 + (((C) >> 6) & 0x3)); }) 6567309124Sdim 6568309124Sdim#define _mm512_mask_permute_ps(W, U, X, C) __extension__ ({ \ 6569309124Sdim (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ 6570309124Sdim (__v16sf)_mm512_permute_ps((X), (C)), \ 6571309124Sdim (__v16sf)(__m512)(W)); }) 6572309124Sdim 6573309124Sdim#define _mm512_maskz_permute_ps(U, X, C) __extension__ ({ \ 6574309124Sdim (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ 6575309124Sdim (__v16sf)_mm512_permute_ps((X), (C)), \ 6576309124Sdim (__v16sf)_mm512_setzero_ps()); }) 6577309124Sdim 6578309124Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS 6579314564Sdim_mm512_permutevar_pd(__m512d __A, __m512i __C) 6580309124Sdim{ 6581314564Sdim return (__m512d)__builtin_ia32_vpermilvarpd512((__v8df)__A, (__v8di)__C); 6582309124Sdim} 6583309124Sdim 6584309124Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS 6585314564Sdim_mm512_mask_permutevar_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512i __C) 6586309124Sdim{ 6587314564Sdim return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, 6588314564Sdim (__v8df)_mm512_permutevar_pd(__A, __C), 6589314564Sdim (__v8df)__W); 6590309124Sdim} 6591309124Sdim 6592309124Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS 6593314564Sdim_mm512_maskz_permutevar_pd(__mmask8 __U, __m512d __A, __m512i __C) 6594309124Sdim{ 6595314564Sdim return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, 6596314564Sdim (__v8df)_mm512_permutevar_pd(__A, __C), 6597314564Sdim (__v8df)_mm512_setzero_pd()); 6598309124Sdim} 6599309124Sdim 6600309124Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS 6601314564Sdim_mm512_permutevar_ps(__m512 __A, __m512i __C) 6602309124Sdim{ 6603314564Sdim return (__m512)__builtin_ia32_vpermilvarps512((__v16sf)__A, (__v16si)__C); 6604309124Sdim} 6605309124Sdim 6606309124Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS 6607314564Sdim_mm512_mask_permutevar_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512i __C) 6608309124Sdim{ 6609314564Sdim return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, 6610314564Sdim (__v16sf)_mm512_permutevar_ps(__A, __C), 6611314564Sdim (__v16sf)__W); 6612309124Sdim} 6613309124Sdim 6614309124Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS 6615314564Sdim_mm512_maskz_permutevar_ps(__mmask16 __U, __m512 __A, __m512i __C) 6616309124Sdim{ 6617314564Sdim return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, 6618314564Sdim (__v16sf)_mm512_permutevar_ps(__A, __C), 6619314564Sdim (__v16sf)_mm512_setzero_ps()); 6620309124Sdim} 6621309124Sdim 6622309124Sdimstatic __inline __m512d __DEFAULT_FN_ATTRS 6623309124Sdim_mm512_permutex2var_pd(__m512d __A, __m512i __I, __m512d __B) 6624309124Sdim{ 6625309124Sdim return (__m512d) __builtin_ia32_vpermt2varpd512_mask ((__v8di) __I 6626309124Sdim /* idx */ , 6627309124Sdim (__v8df) __A, 6628309124Sdim (__v8df) __B, 6629309124Sdim (__mmask8) -1); 6630309124Sdim} 6631309124Sdim 6632309124Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS 6633309124Sdim_mm512_mask_permutex2var_pd (__m512d __A, __mmask8 __U, __m512i __I, __m512d __B) 6634309124Sdim{ 6635309124Sdim return (__m512d) __builtin_ia32_vpermt2varpd512_mask ((__v8di) __I 6636309124Sdim /* idx */ , 6637309124Sdim (__v8df) __A, 6638309124Sdim (__v8df) __B, 6639309124Sdim (__mmask8) __U); 6640309124Sdim} 6641309124Sdim 6642309124Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS 6643309124Sdim_mm512_maskz_permutex2var_pd (__mmask8 __U, __m512d __A, __m512i __I, 6644309124Sdim __m512d __B) 6645309124Sdim{ 6646309124Sdim return (__m512d) __builtin_ia32_vpermt2varpd512_maskz ((__v8di) __I 6647309124Sdim /* idx */ , 6648309124Sdim (__v8df) __A, 6649309124Sdim (__v8df) __B, 6650309124Sdim (__mmask8) __U); 6651309124Sdim} 6652309124Sdim 6653309124Sdimstatic __inline __m512 __DEFAULT_FN_ATTRS 6654309124Sdim_mm512_permutex2var_ps(__m512 __A, __m512i __I, __m512 __B) 6655309124Sdim{ 6656309124Sdim return (__m512) __builtin_ia32_vpermt2varps512_mask ((__v16si) __I 6657309124Sdim /* idx */ , 6658309124Sdim (__v16sf) __A, 6659309124Sdim (__v16sf) __B, 6660309124Sdim (__mmask16) -1); 6661309124Sdim} 6662309124Sdim 6663309124Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS 6664309124Sdim_mm512_mask_permutex2var_ps (__m512 __A, __mmask16 __U, __m512i __I, __m512 __B) 6665309124Sdim{ 6666309124Sdim return (__m512) __builtin_ia32_vpermt2varps512_mask ((__v16si) __I 6667309124Sdim /* idx */ , 6668309124Sdim (__v16sf) __A, 6669309124Sdim (__v16sf) __B, 6670309124Sdim (__mmask16) __U); 6671309124Sdim} 6672309124Sdim 6673309124Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS 6674309124Sdim_mm512_maskz_permutex2var_ps (__mmask16 __U, __m512 __A, __m512i __I, 6675309124Sdim __m512 __B) 6676309124Sdim{ 6677309124Sdim return (__m512) __builtin_ia32_vpermt2varps512_maskz ((__v16si) __I 6678309124Sdim /* idx */ , 6679309124Sdim (__v16sf) __A, 6680309124Sdim (__v16sf) __B, 6681309124Sdim (__mmask16) __U); 6682309124Sdim} 6683309124Sdim 6684309124Sdimstatic __inline__ __mmask16 __DEFAULT_FN_ATTRS 6685309124Sdim_mm512_testn_epi32_mask (__m512i __A, __m512i __B) 6686309124Sdim{ 6687309124Sdim return (__mmask16) __builtin_ia32_ptestnmd512 ((__v16si) __A, 6688309124Sdim (__v16si) __B, 6689309124Sdim (__mmask16) -1); 6690309124Sdim} 6691309124Sdim 6692309124Sdimstatic __inline__ __mmask16 __DEFAULT_FN_ATTRS 6693309124Sdim_mm512_mask_testn_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B) 6694309124Sdim{ 6695309124Sdim return (__mmask16) __builtin_ia32_ptestnmd512 ((__v16si) __A, 6696309124Sdim (__v16si) __B, __U); 6697309124Sdim} 6698309124Sdim 6699309124Sdimstatic __inline__ __mmask8 __DEFAULT_FN_ATTRS 6700309124Sdim_mm512_testn_epi64_mask (__m512i __A, __m512i __B) 6701309124Sdim{ 6702309124Sdim return (__mmask8) __builtin_ia32_ptestnmq512 ((__v8di) __A, 6703309124Sdim (__v8di) __B, 6704309124Sdim (__mmask8) -1); 6705309124Sdim} 6706309124Sdim 6707309124Sdimstatic __inline__ __mmask8 __DEFAULT_FN_ATTRS 6708309124Sdim_mm512_mask_testn_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B) 6709309124Sdim{ 6710309124Sdim return (__mmask8) __builtin_ia32_ptestnmq512 ((__v8di) __A, 6711309124Sdim (__v8di) __B, __U); 6712309124Sdim} 6713309124Sdim 6714309124Sdim#define _mm512_cvtt_roundpd_epu32(A, R) __extension__ ({ \ 6715309124Sdim (__m256i)__builtin_ia32_cvttpd2udq512_mask((__v8df)(__m512d)(A), \ 6716309124Sdim (__v8si)_mm256_undefined_si256(), \ 6717309124Sdim (__mmask8)-1, (int)(R)); }) 6718309124Sdim 6719309124Sdim#define _mm512_mask_cvtt_roundpd_epu32(W, U, A, R) __extension__ ({ \ 6720309124Sdim (__m256i)__builtin_ia32_cvttpd2udq512_mask((__v8df)(__m512d)(A), \ 6721309124Sdim (__v8si)(__m256i)(W), \ 6722309124Sdim (__mmask8)(U), (int)(R)); }) 6723309124Sdim 6724309124Sdim#define _mm512_maskz_cvtt_roundpd_epu32(U, A, R) __extension__ ({ \ 6725309124Sdim (__m256i)__builtin_ia32_cvttpd2udq512_mask((__v8df)(__m512d)(A), \ 6726309124Sdim (__v8si)_mm256_setzero_si256(), \ 6727309124Sdim (__mmask8)(U), (int)(R)); }) 6728309124Sdim 6729309124Sdimstatic __inline__ __m256i __DEFAULT_FN_ATTRS 6730309124Sdim_mm512_cvttpd_epu32 (__m512d __A) 6731309124Sdim{ 6732309124Sdim return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A, 6733309124Sdim (__v8si) 6734309124Sdim _mm256_undefined_si256 (), 6735309124Sdim (__mmask8) -1, 6736309124Sdim _MM_FROUND_CUR_DIRECTION); 6737309124Sdim} 6738309124Sdim 6739309124Sdimstatic __inline__ __m256i __DEFAULT_FN_ATTRS 6740309124Sdim_mm512_mask_cvttpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A) 6741309124Sdim{ 6742309124Sdim return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A, 6743309124Sdim (__v8si) __W, 6744309124Sdim (__mmask8) __U, 6745309124Sdim _MM_FROUND_CUR_DIRECTION); 6746309124Sdim} 6747309124Sdim 6748309124Sdimstatic __inline__ __m256i __DEFAULT_FN_ATTRS 6749309124Sdim_mm512_maskz_cvttpd_epu32 (__mmask8 __U, __m512d __A) 6750309124Sdim{ 6751309124Sdim return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A, 6752309124Sdim (__v8si) 6753309124Sdim _mm256_setzero_si256 (), 6754309124Sdim (__mmask8) __U, 6755309124Sdim _MM_FROUND_CUR_DIRECTION); 6756309124Sdim} 6757309124Sdim 6758309124Sdim#define _mm_roundscale_round_sd(A, B, imm, R) __extension__ ({ \ 6759309124Sdim (__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \ 6760309124Sdim (__v2df)(__m128d)(B), \ 6761309124Sdim (__v2df)_mm_setzero_pd(), \ 6762309124Sdim (__mmask8)-1, (int)(imm), \ 6763309124Sdim (int)(R)); }) 6764309124Sdim 6765309124Sdim#define _mm_roundscale_sd(A, B, imm) __extension__ ({ \ 6766309124Sdim (__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \ 6767309124Sdim (__v2df)(__m128d)(B), \ 6768309124Sdim (__v2df)_mm_setzero_pd(), \ 6769309124Sdim (__mmask8)-1, (int)(imm), \ 6770309124Sdim _MM_FROUND_CUR_DIRECTION); }) 6771309124Sdim 6772309124Sdim#define _mm_mask_roundscale_sd(W, U, A, B, imm) __extension__ ({ \ 6773309124Sdim (__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \ 6774309124Sdim (__v2df)(__m128d)(B), \ 6775309124Sdim (__v2df)(__m128d)(W), \ 6776309124Sdim (__mmask8)(U), (int)(imm), \ 6777309124Sdim _MM_FROUND_CUR_DIRECTION); }) 6778309124Sdim 6779309124Sdim#define _mm_mask_roundscale_round_sd(W, U, A, B, I, R) __extension__ ({ \ 6780309124Sdim (__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \ 6781309124Sdim (__v2df)(__m128d)(B), \ 6782309124Sdim (__v2df)(__m128d)(W), \ 6783309124Sdim (__mmask8)(U), (int)(I), \ 6784309124Sdim (int)(R)); }) 6785309124Sdim 6786309124Sdim#define _mm_maskz_roundscale_sd(U, A, B, I) __extension__ ({ \ 6787309124Sdim (__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \ 6788309124Sdim (__v2df)(__m128d)(B), \ 6789309124Sdim (__v2df)_mm_setzero_pd(), \ 6790309124Sdim (__mmask8)(U), (int)(I), \ 6791309124Sdim _MM_FROUND_CUR_DIRECTION); }) 6792309124Sdim 6793309124Sdim#define _mm_maskz_roundscale_round_sd(U, A, B, I, R) __extension__ ({ \ 6794309124Sdim (__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \ 6795309124Sdim (__v2df)(__m128d)(B), \ 6796309124Sdim (__v2df)_mm_setzero_pd(), \ 6797309124Sdim (__mmask8)(U), (int)(I), \ 6798309124Sdim (int)(R)); }) 6799309124Sdim 6800309124Sdim#define _mm_roundscale_round_ss(A, B, imm, R) __extension__ ({ \ 6801309124Sdim (__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \ 6802309124Sdim (__v4sf)(__m128)(B), \ 6803309124Sdim (__v4sf)_mm_setzero_ps(), \ 6804309124Sdim (__mmask8)-1, (int)(imm), \ 6805309124Sdim (int)(R)); }) 6806309124Sdim 6807309124Sdim#define _mm_roundscale_ss(A, B, imm) __extension__ ({ \ 6808309124Sdim (__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \ 6809309124Sdim (__v4sf)(__m128)(B), \ 6810309124Sdim (__v4sf)_mm_setzero_ps(), \ 6811309124Sdim (__mmask8)-1, (int)(imm), \ 6812309124Sdim _MM_FROUND_CUR_DIRECTION); }) 6813309124Sdim 6814309124Sdim#define _mm_mask_roundscale_ss(W, U, A, B, I) __extension__ ({ \ 6815309124Sdim (__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \ 6816309124Sdim (__v4sf)(__m128)(B), \ 6817309124Sdim (__v4sf)(__m128)(W), \ 6818309124Sdim (__mmask8)(U), (int)(I), \ 6819309124Sdim _MM_FROUND_CUR_DIRECTION); }) 6820309124Sdim 6821309124Sdim#define _mm_mask_roundscale_round_ss(W, U, A, B, I, R) __extension__ ({ \ 6822309124Sdim (__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \ 6823309124Sdim (__v4sf)(__m128)(B), \ 6824309124Sdim (__v4sf)(__m128)(W), \ 6825309124Sdim (__mmask8)(U), (int)(I), \ 6826309124Sdim (int)(R)); }) 6827309124Sdim 6828309124Sdim#define _mm_maskz_roundscale_ss(U, A, B, I) __extension__ ({ \ 6829309124Sdim (__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \ 6830309124Sdim (__v4sf)(__m128)(B), \ 6831309124Sdim (__v4sf)_mm_setzero_ps(), \ 6832309124Sdim (__mmask8)(U), (int)(I), \ 6833309124Sdim _MM_FROUND_CUR_DIRECTION); }) 6834309124Sdim 6835309124Sdim#define _mm_maskz_roundscale_round_ss(U, A, B, I, R) __extension__ ({ \ 6836309124Sdim (__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \ 6837309124Sdim (__v4sf)(__m128)(B), \ 6838309124Sdim (__v4sf)_mm_setzero_ps(), \ 6839309124Sdim (__mmask8)(U), (int)(I), \ 6840309124Sdim (int)(R)); }) 6841309124Sdim 6842309124Sdim#define _mm512_scalef_round_pd(A, B, R) __extension__ ({ \ 6843309124Sdim (__m512d)__builtin_ia32_scalefpd512_mask((__v8df)(__m512d)(A), \ 6844309124Sdim (__v8df)(__m512d)(B), \ 6845309124Sdim (__v8df)_mm512_undefined_pd(), \ 6846309124Sdim (__mmask8)-1, (int)(R)); }) 6847309124Sdim 6848309124Sdim#define _mm512_mask_scalef_round_pd(W, U, A, B, R) __extension__ ({ \ 6849309124Sdim (__m512d)__builtin_ia32_scalefpd512_mask((__v8df)(__m512d)(A), \ 6850309124Sdim (__v8df)(__m512d)(B), \ 6851309124Sdim (__v8df)(__m512d)(W), \ 6852309124Sdim (__mmask8)(U), (int)(R)); }) 6853309124Sdim 6854309124Sdim#define _mm512_maskz_scalef_round_pd(U, A, B, R) __extension__ ({ \ 6855309124Sdim (__m512d)__builtin_ia32_scalefpd512_mask((__v8df)(__m512d)(A), \ 6856309124Sdim (__v8df)(__m512d)(B), \ 6857309124Sdim (__v8df)_mm512_setzero_pd(), \ 6858309124Sdim (__mmask8)(U), (int)(R)); }) 6859309124Sdim 6860309124Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS 6861309124Sdim_mm512_scalef_pd (__m512d __A, __m512d __B) 6862309124Sdim{ 6863309124Sdim return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A, 6864309124Sdim (__v8df) __B, 6865309124Sdim (__v8df) 6866309124Sdim _mm512_undefined_pd (), 6867309124Sdim (__mmask8) -1, 6868309124Sdim _MM_FROUND_CUR_DIRECTION); 6869309124Sdim} 6870309124Sdim 6871309124Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS 6872309124Sdim_mm512_mask_scalef_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) 6873309124Sdim{ 6874309124Sdim return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A, 6875309124Sdim (__v8df) __B, 6876309124Sdim (__v8df) __W, 6877309124Sdim (__mmask8) __U, 6878309124Sdim _MM_FROUND_CUR_DIRECTION); 6879309124Sdim} 6880309124Sdim 6881309124Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS 6882309124Sdim_mm512_maskz_scalef_pd (__mmask8 __U, __m512d __A, __m512d __B) 6883309124Sdim{ 6884309124Sdim return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A, 6885309124Sdim (__v8df) __B, 6886309124Sdim (__v8df) 6887309124Sdim _mm512_setzero_pd (), 6888309124Sdim (__mmask8) __U, 6889309124Sdim _MM_FROUND_CUR_DIRECTION); 6890309124Sdim} 6891309124Sdim 6892309124Sdim#define _mm512_scalef_round_ps(A, B, R) __extension__ ({ \ 6893309124Sdim (__m512)__builtin_ia32_scalefps512_mask((__v16sf)(__m512)(A), \ 6894309124Sdim (__v16sf)(__m512)(B), \ 6895309124Sdim (__v16sf)_mm512_undefined_ps(), \ 6896309124Sdim (__mmask16)-1, (int)(R)); }) 6897309124Sdim 6898309124Sdim#define _mm512_mask_scalef_round_ps(W, U, A, B, R) __extension__ ({ \ 6899309124Sdim (__m512)__builtin_ia32_scalefps512_mask((__v16sf)(__m512)(A), \ 6900309124Sdim (__v16sf)(__m512)(B), \ 6901309124Sdim (__v16sf)(__m512)(W), \ 6902309124Sdim (__mmask16)(U), (int)(R)); }) 6903309124Sdim 6904309124Sdim#define _mm512_maskz_scalef_round_ps(U, A, B, R) __extension__ ({ \ 6905309124Sdim (__m512)__builtin_ia32_scalefps512_mask((__v16sf)(__m512)(A), \ 6906309124Sdim (__v16sf)(__m512)(B), \ 6907309124Sdim (__v16sf)_mm512_setzero_ps(), \ 6908309124Sdim (__mmask16)(U), (int)(R)); }) 6909309124Sdim 6910309124Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS 6911309124Sdim_mm512_scalef_ps (__m512 __A, __m512 __B) 6912309124Sdim{ 6913309124Sdim return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A, 6914309124Sdim (__v16sf) __B, 6915309124Sdim (__v16sf) 6916309124Sdim _mm512_undefined_ps (), 6917309124Sdim (__mmask16) -1, 6918309124Sdim _MM_FROUND_CUR_DIRECTION); 6919309124Sdim} 6920309124Sdim 6921309124Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS 6922309124Sdim_mm512_mask_scalef_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) 6923309124Sdim{ 6924309124Sdim return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A, 6925309124Sdim (__v16sf) __B, 6926309124Sdim (__v16sf) __W, 6927309124Sdim (__mmask16) __U, 6928309124Sdim _MM_FROUND_CUR_DIRECTION); 6929309124Sdim} 6930309124Sdim 6931309124Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS 6932309124Sdim_mm512_maskz_scalef_ps (__mmask16 __U, __m512 __A, __m512 __B) 6933309124Sdim{ 6934309124Sdim return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A, 6935309124Sdim (__v16sf) __B, 6936309124Sdim (__v16sf) 6937309124Sdim _mm512_setzero_ps (), 6938309124Sdim (__mmask16) __U, 6939309124Sdim _MM_FROUND_CUR_DIRECTION); 6940309124Sdim} 6941309124Sdim 6942309124Sdim#define _mm_scalef_round_sd(A, B, R) __extension__ ({ \ 6943309124Sdim (__m128d)__builtin_ia32_scalefsd_round_mask((__v2df)(__m128d)(A), \ 6944309124Sdim (__v2df)(__m128d)(B), \ 6945309124Sdim (__v2df)_mm_setzero_pd(), \ 6946309124Sdim (__mmask8)-1, (int)(R)); }) 6947309124Sdim 6948309124Sdimstatic __inline__ __m128d __DEFAULT_FN_ATTRS 6949309124Sdim_mm_scalef_sd (__m128d __A, __m128d __B) 6950309124Sdim{ 6951309124Sdim return (__m128d) __builtin_ia32_scalefsd_round_mask ((__v2df) __A, 6952309124Sdim (__v2df)( __B), (__v2df) _mm_setzero_pd(), 6953309124Sdim (__mmask8) -1, 6954309124Sdim _MM_FROUND_CUR_DIRECTION); 6955309124Sdim} 6956309124Sdim 6957309124Sdimstatic __inline__ __m128d __DEFAULT_FN_ATTRS 6958309124Sdim_mm_mask_scalef_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) 6959309124Sdim{ 6960309124Sdim return (__m128d) __builtin_ia32_scalefsd_round_mask ( (__v2df) __A, 6961309124Sdim (__v2df) __B, 6962309124Sdim (__v2df) __W, 6963309124Sdim (__mmask8) __U, 6964309124Sdim _MM_FROUND_CUR_DIRECTION); 6965309124Sdim} 6966309124Sdim 6967309124Sdim#define _mm_mask_scalef_round_sd(W, U, A, B, R) __extension__ ({ \ 6968309124Sdim (__m128d)__builtin_ia32_scalefsd_round_mask((__v2df)(__m128d)(A), \ 6969309124Sdim (__v2df)(__m128d)(B), \ 6970309124Sdim (__v2df)(__m128d)(W), \ 6971309124Sdim (__mmask8)(U), (int)(R)); }) 6972309124Sdim 6973309124Sdimstatic __inline__ __m128d __DEFAULT_FN_ATTRS 6974309124Sdim_mm_maskz_scalef_sd (__mmask8 __U, __m128d __A, __m128d __B) 6975309124Sdim{ 6976309124Sdim return (__m128d) __builtin_ia32_scalefsd_round_mask ( (__v2df) __A, 6977309124Sdim (__v2df) __B, 6978309124Sdim (__v2df) _mm_setzero_pd (), 6979309124Sdim (__mmask8) __U, 6980309124Sdim _MM_FROUND_CUR_DIRECTION); 6981309124Sdim} 6982309124Sdim 6983309124Sdim#define _mm_maskz_scalef_round_sd(U, A, B, R) __extension__ ({ \ 6984309124Sdim (__m128d)__builtin_ia32_scalefsd_round_mask((__v2df)(__m128d)(A), \ 6985309124Sdim (__v2df)(__m128d)(B), \ 6986309124Sdim (__v2df)_mm_setzero_pd(), \ 6987309124Sdim (__mmask8)(U), (int)(R)); }) 6988309124Sdim 6989309124Sdim#define _mm_scalef_round_ss(A, B, R) __extension__ ({ \ 6990309124Sdim (__m128)__builtin_ia32_scalefss_round_mask((__v4sf)(__m128)(A), \ 6991309124Sdim (__v4sf)(__m128)(B), \ 6992309124Sdim (__v4sf)_mm_setzero_ps(), \ 6993309124Sdim (__mmask8)-1, (int)(R)); }) 6994309124Sdim 6995309124Sdimstatic __inline__ __m128 __DEFAULT_FN_ATTRS 6996309124Sdim_mm_scalef_ss (__m128 __A, __m128 __B) 6997309124Sdim{ 6998309124Sdim return (__m128) __builtin_ia32_scalefss_round_mask ((__v4sf) __A, 6999309124Sdim (__v4sf)( __B), (__v4sf) _mm_setzero_ps(), 7000309124Sdim (__mmask8) -1, 7001309124Sdim _MM_FROUND_CUR_DIRECTION); 7002309124Sdim} 7003309124Sdim 7004309124Sdimstatic __inline__ __m128 __DEFAULT_FN_ATTRS 7005309124Sdim_mm_mask_scalef_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) 7006309124Sdim{ 7007309124Sdim return (__m128) __builtin_ia32_scalefss_round_mask ( (__v4sf) __A, 7008309124Sdim (__v4sf) __B, 7009309124Sdim (__v4sf) __W, 7010309124Sdim (__mmask8) __U, 7011309124Sdim _MM_FROUND_CUR_DIRECTION); 7012309124Sdim} 7013309124Sdim 7014309124Sdim#define _mm_mask_scalef_round_ss(W, U, A, B, R) __extension__ ({ \ 7015309124Sdim (__m128)__builtin_ia32_scalefss_round_mask((__v4sf)(__m128)(A), \ 7016309124Sdim (__v4sf)(__m128)(B), \ 7017309124Sdim (__v4sf)(__m128)(W), \ 7018309124Sdim (__mmask8)(U), (int)(R)); }) 7019309124Sdim 7020309124Sdimstatic __inline__ __m128 __DEFAULT_FN_ATTRS 7021309124Sdim_mm_maskz_scalef_ss (__mmask8 __U, __m128 __A, __m128 __B) 7022309124Sdim{ 7023309124Sdim return (__m128) __builtin_ia32_scalefss_round_mask ( (__v4sf) __A, 7024309124Sdim (__v4sf) __B, 7025309124Sdim (__v4sf) _mm_setzero_ps (), 7026309124Sdim (__mmask8) __U, 7027309124Sdim _MM_FROUND_CUR_DIRECTION); 7028309124Sdim} 7029309124Sdim 7030309124Sdim#define _mm_maskz_scalef_round_ss(U, A, B, R) __extension__ ({ \ 7031309124Sdim (__m128)__builtin_ia32_scalefss_round_mask((__v4sf)(__m128)(A), \ 7032309124Sdim (__v4sf)(__m128)(B), \ 7033309124Sdim (__v4sf)_mm_setzero_ps(), \ 7034309124Sdim (__mmask8)(U), \ 7035309124Sdim _MM_FROUND_CUR_DIRECTION); }) 7036309124Sdim 7037314564Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 7038314564Sdim_mm512_srai_epi32(__m512i __A, int __B) 7039314564Sdim{ 7040314564Sdim return (__m512i)__builtin_ia32_psradi512((__v16si)__A, __B); 7041314564Sdim} 7042309124Sdim 7043314564Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 7044314564Sdim_mm512_mask_srai_epi32(__m512i __W, __mmask16 __U, __m512i __A, int __B) 7045314564Sdim{ 7046314564Sdim return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, \ 7047314564Sdim (__v16si)_mm512_srai_epi32(__A, __B), \ 7048314564Sdim (__v16si)__W); 7049314564Sdim} 7050309124Sdim 7051314564Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 7052314564Sdim_mm512_maskz_srai_epi32(__mmask16 __U, __m512i __A, int __B) { 7053314564Sdim return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, \ 7054314564Sdim (__v16si)_mm512_srai_epi32(__A, __B), \ 7055314564Sdim (__v16si)_mm512_setzero_si512()); 7056314564Sdim} 7057309124Sdim 7058314564Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 7059314564Sdim_mm512_srai_epi64(__m512i __A, int __B) 7060314564Sdim{ 7061314564Sdim return (__m512i)__builtin_ia32_psraqi512((__v8di)__A, __B); 7062314564Sdim} 7063309124Sdim 7064314564Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 7065314564Sdim_mm512_mask_srai_epi64(__m512i __W, __mmask8 __U, __m512i __A, int __B) 7066314564Sdim{ 7067314564Sdim return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, \ 7068314564Sdim (__v8di)_mm512_srai_epi64(__A, __B), \ 7069314564Sdim (__v8di)__W); 7070314564Sdim} 7071309124Sdim 7072314564Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 7073314564Sdim_mm512_maskz_srai_epi64(__mmask8 __U, __m512i __A, int __B) 7074314564Sdim{ 7075314564Sdim return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, \ 7076314564Sdim (__v8di)_mm512_srai_epi64(__A, __B), \ 7077314564Sdim (__v8di)_mm512_setzero_si512()); 7078314564Sdim} 7079309124Sdim 7080309124Sdim#define _mm512_shuffle_f32x4(A, B, imm) __extension__ ({ \ 7081309124Sdim (__m512)__builtin_ia32_shuf_f32x4_mask((__v16sf)(__m512)(A), \ 7082309124Sdim (__v16sf)(__m512)(B), (int)(imm), \ 7083309124Sdim (__v16sf)_mm512_undefined_ps(), \ 7084309124Sdim (__mmask16)-1); }) 7085309124Sdim 7086309124Sdim#define _mm512_mask_shuffle_f32x4(W, U, A, B, imm) __extension__ ({ \ 7087309124Sdim (__m512)__builtin_ia32_shuf_f32x4_mask((__v16sf)(__m512)(A), \ 7088309124Sdim (__v16sf)(__m512)(B), (int)(imm), \ 7089309124Sdim (__v16sf)(__m512)(W), \ 7090309124Sdim (__mmask16)(U)); }) 7091309124Sdim 7092309124Sdim#define _mm512_maskz_shuffle_f32x4(U, A, B, imm) __extension__ ({ \ 7093309124Sdim (__m512)__builtin_ia32_shuf_f32x4_mask((__v16sf)(__m512)(A), \ 7094309124Sdim (__v16sf)(__m512)(B), (int)(imm), \ 7095309124Sdim (__v16sf)_mm512_setzero_ps(), \ 7096309124Sdim (__mmask16)(U)); }) 7097309124Sdim 7098309124Sdim#define _mm512_shuffle_f64x2(A, B, imm) __extension__ ({ \ 7099309124Sdim (__m512d)__builtin_ia32_shuf_f64x2_mask((__v8df)(__m512d)(A), \ 7100309124Sdim (__v8df)(__m512d)(B), (int)(imm), \ 7101309124Sdim (__v8df)_mm512_undefined_pd(), \ 7102309124Sdim (__mmask8)-1); }) 7103309124Sdim 7104309124Sdim#define _mm512_mask_shuffle_f64x2(W, U, A, B, imm) __extension__ ({ \ 7105309124Sdim (__m512d)__builtin_ia32_shuf_f64x2_mask((__v8df)(__m512d)(A), \ 7106309124Sdim (__v8df)(__m512d)(B), (int)(imm), \ 7107309124Sdim (__v8df)(__m512d)(W), \ 7108309124Sdim (__mmask8)(U)); }) 7109309124Sdim 7110309124Sdim#define _mm512_maskz_shuffle_f64x2(U, A, B, imm) __extension__ ({ \ 7111309124Sdim (__m512d)__builtin_ia32_shuf_f64x2_mask((__v8df)(__m512d)(A), \ 7112309124Sdim (__v8df)(__m512d)(B), (int)(imm), \ 7113309124Sdim (__v8df)_mm512_setzero_pd(), \ 7114309124Sdim (__mmask8)(U)); }) 7115309124Sdim 7116309124Sdim#define _mm512_shuffle_i32x4(A, B, imm) __extension__ ({ \ 7117309124Sdim (__m512i)__builtin_ia32_shuf_i32x4_mask((__v16si)(__m512i)(A), \ 7118309124Sdim (__v16si)(__m512i)(B), (int)(imm), \ 7119309124Sdim (__v16si)_mm512_setzero_si512(), \ 7120309124Sdim (__mmask16)-1); }) 7121309124Sdim 7122309124Sdim#define _mm512_mask_shuffle_i32x4(W, U, A, B, imm) __extension__ ({ \ 7123309124Sdim (__m512i)__builtin_ia32_shuf_i32x4_mask((__v16si)(__m512i)(A), \ 7124309124Sdim (__v16si)(__m512i)(B), (int)(imm), \ 7125309124Sdim (__v16si)(__m512i)(W), \ 7126309124Sdim (__mmask16)(U)); }) 7127309124Sdim 7128309124Sdim#define _mm512_maskz_shuffle_i32x4(U, A, B, imm) __extension__ ({ \ 7129309124Sdim (__m512i)__builtin_ia32_shuf_i32x4_mask((__v16si)(__m512i)(A), \ 7130309124Sdim (__v16si)(__m512i)(B), (int)(imm), \ 7131309124Sdim (__v16si)_mm512_setzero_si512(), \ 7132309124Sdim (__mmask16)(U)); }) 7133309124Sdim 7134309124Sdim#define _mm512_shuffle_i64x2(A, B, imm) __extension__ ({ \ 7135309124Sdim (__m512i)__builtin_ia32_shuf_i64x2_mask((__v8di)(__m512i)(A), \ 7136309124Sdim (__v8di)(__m512i)(B), (int)(imm), \ 7137309124Sdim (__v8di)_mm512_setzero_si512(), \ 7138309124Sdim (__mmask8)-1); }) 7139309124Sdim 7140309124Sdim#define _mm512_mask_shuffle_i64x2(W, U, A, B, imm) __extension__ ({ \ 7141309124Sdim (__m512i)__builtin_ia32_shuf_i64x2_mask((__v8di)(__m512i)(A), \ 7142309124Sdim (__v8di)(__m512i)(B), (int)(imm), \ 7143309124Sdim (__v8di)(__m512i)(W), \ 7144309124Sdim (__mmask8)(U)); }) 7145309124Sdim 7146309124Sdim#define _mm512_maskz_shuffle_i64x2(U, A, B, imm) __extension__ ({ \ 7147309124Sdim (__m512i)__builtin_ia32_shuf_i64x2_mask((__v8di)(__m512i)(A), \ 7148309124Sdim (__v8di)(__m512i)(B), (int)(imm), \ 7149309124Sdim (__v8di)_mm512_setzero_si512(), \ 7150309124Sdim (__mmask8)(U)); }) 7151309124Sdim 7152309124Sdim#define _mm512_shuffle_pd(A, B, M) __extension__ ({ \ 7153309124Sdim (__m512d)__builtin_shufflevector((__v8df)(__m512d)(A), \ 7154309124Sdim (__v8df)(__m512d)(B), \ 7155309124Sdim 0 + (((M) >> 0) & 0x1), \ 7156309124Sdim 8 + (((M) >> 1) & 0x1), \ 7157309124Sdim 2 + (((M) >> 2) & 0x1), \ 7158309124Sdim 10 + (((M) >> 3) & 0x1), \ 7159309124Sdim 4 + (((M) >> 4) & 0x1), \ 7160309124Sdim 12 + (((M) >> 5) & 0x1), \ 7161309124Sdim 6 + (((M) >> 6) & 0x1), \ 7162309124Sdim 14 + (((M) >> 7) & 0x1)); }) 7163309124Sdim 7164309124Sdim#define _mm512_mask_shuffle_pd(W, U, A, B, M) __extension__ ({ \ 7165309124Sdim (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ 7166309124Sdim (__v8df)_mm512_shuffle_pd((A), (B), (M)), \ 7167309124Sdim (__v8df)(__m512d)(W)); }) 7168309124Sdim 7169309124Sdim#define _mm512_maskz_shuffle_pd(U, A, B, M) __extension__ ({ \ 7170309124Sdim (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ 7171309124Sdim (__v8df)_mm512_shuffle_pd((A), (B), (M)), \ 7172309124Sdim (__v8df)_mm512_setzero_pd()); }) 7173309124Sdim 7174309124Sdim#define _mm512_shuffle_ps(A, B, M) __extension__ ({ \ 7175309124Sdim (__m512d)__builtin_shufflevector((__v16sf)(__m512)(A), \ 7176309124Sdim (__v16sf)(__m512)(B), \ 7177309124Sdim 0 + (((M) >> 0) & 0x3), \ 7178309124Sdim 0 + (((M) >> 2) & 0x3), \ 7179309124Sdim 16 + (((M) >> 4) & 0x3), \ 7180309124Sdim 16 + (((M) >> 6) & 0x3), \ 7181309124Sdim 4 + (((M) >> 0) & 0x3), \ 7182309124Sdim 4 + (((M) >> 2) & 0x3), \ 7183309124Sdim 20 + (((M) >> 4) & 0x3), \ 7184309124Sdim 20 + (((M) >> 6) & 0x3), \ 7185309124Sdim 8 + (((M) >> 0) & 0x3), \ 7186309124Sdim 8 + (((M) >> 2) & 0x3), \ 7187309124Sdim 24 + (((M) >> 4) & 0x3), \ 7188309124Sdim 24 + (((M) >> 6) & 0x3), \ 7189309124Sdim 12 + (((M) >> 0) & 0x3), \ 7190309124Sdim 12 + (((M) >> 2) & 0x3), \ 7191309124Sdim 28 + (((M) >> 4) & 0x3), \ 7192309124Sdim 28 + (((M) >> 6) & 0x3)); }) 7193309124Sdim 7194309124Sdim#define _mm512_mask_shuffle_ps(W, U, A, B, M) __extension__ ({ \ 7195309124Sdim (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ 7196309124Sdim (__v16sf)_mm512_shuffle_ps((A), (B), (M)), \ 7197309124Sdim (__v16sf)(__m512)(W)); }) 7198309124Sdim 7199309124Sdim#define _mm512_maskz_shuffle_ps(U, A, B, M) __extension__ ({ \ 7200309124Sdim (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ 7201309124Sdim (__v16sf)_mm512_shuffle_ps((A), (B), (M)), \ 7202309124Sdim (__v16sf)_mm512_setzero_ps()); }) 7203309124Sdim 7204309124Sdim#define _mm_sqrt_round_sd(A, B, R) __extension__ ({ \ 7205309124Sdim (__m128d)__builtin_ia32_sqrtsd_round_mask((__v2df)(__m128d)(A), \ 7206309124Sdim (__v2df)(__m128d)(B), \ 7207309124Sdim (__v2df)_mm_setzero_pd(), \ 7208309124Sdim (__mmask8)-1, (int)(R)); }) 7209309124Sdim 7210309124Sdimstatic __inline__ __m128d __DEFAULT_FN_ATTRS 7211309124Sdim_mm_mask_sqrt_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) 7212309124Sdim{ 7213309124Sdim return (__m128d) __builtin_ia32_sqrtsd_round_mask ( (__v2df) __A, 7214309124Sdim (__v2df) __B, 7215309124Sdim (__v2df) __W, 7216309124Sdim (__mmask8) __U, 7217309124Sdim _MM_FROUND_CUR_DIRECTION); 7218309124Sdim} 7219309124Sdim 7220309124Sdim#define _mm_mask_sqrt_round_sd(W, U, A, B, R) __extension__ ({ \ 7221309124Sdim (__m128d)__builtin_ia32_sqrtsd_round_mask((__v2df)(__m128d)(A), \ 7222309124Sdim (__v2df)(__m128d)(B), \ 7223309124Sdim (__v2df)(__m128d)(W), \ 7224309124Sdim (__mmask8)(U), (int)(R)); }) 7225309124Sdim 7226309124Sdimstatic __inline__ __m128d __DEFAULT_FN_ATTRS 7227309124Sdim_mm_maskz_sqrt_sd (__mmask8 __U, __m128d __A, __m128d __B) 7228309124Sdim{ 7229309124Sdim return (__m128d) __builtin_ia32_sqrtsd_round_mask ( (__v2df) __A, 7230309124Sdim (__v2df) __B, 7231309124Sdim (__v2df) _mm_setzero_pd (), 7232309124Sdim (__mmask8) __U, 7233309124Sdim _MM_FROUND_CUR_DIRECTION); 7234309124Sdim} 7235309124Sdim 7236309124Sdim#define _mm_maskz_sqrt_round_sd(U, A, B, R) __extension__ ({ \ 7237309124Sdim (__m128d)__builtin_ia32_sqrtsd_round_mask((__v2df)(__m128d)(A), \ 7238309124Sdim (__v2df)(__m128d)(B), \ 7239309124Sdim (__v2df)_mm_setzero_pd(), \ 7240309124Sdim (__mmask8)(U), (int)(R)); }) 7241309124Sdim 7242309124Sdim#define _mm_sqrt_round_ss(A, B, R) __extension__ ({ \ 7243309124Sdim (__m128)__builtin_ia32_sqrtss_round_mask((__v4sf)(__m128)(A), \ 7244309124Sdim (__v4sf)(__m128)(B), \ 7245309124Sdim (__v4sf)_mm_setzero_ps(), \ 7246309124Sdim (__mmask8)-1, (int)(R)); }) 7247309124Sdim 7248309124Sdimstatic __inline__ __m128 __DEFAULT_FN_ATTRS 7249309124Sdim_mm_mask_sqrt_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) 7250309124Sdim{ 7251309124Sdim return (__m128) __builtin_ia32_sqrtss_round_mask ( (__v4sf) __A, 7252309124Sdim (__v4sf) __B, 7253309124Sdim (__v4sf) __W, 7254309124Sdim (__mmask8) __U, 7255309124Sdim _MM_FROUND_CUR_DIRECTION); 7256309124Sdim} 7257309124Sdim 7258309124Sdim#define _mm_mask_sqrt_round_ss(W, U, A, B, R) __extension__ ({ \ 7259309124Sdim (__m128)__builtin_ia32_sqrtss_round_mask((__v4sf)(__m128)(A), \ 7260309124Sdim (__v4sf)(__m128)(B), \ 7261309124Sdim (__v4sf)(__m128)(W), (__mmask8)(U), \ 7262309124Sdim (int)(R)); }) 7263309124Sdim 7264309124Sdimstatic __inline__ __m128 __DEFAULT_FN_ATTRS 7265309124Sdim_mm_maskz_sqrt_ss (__mmask8 __U, __m128 __A, __m128 __B) 7266309124Sdim{ 7267309124Sdim return (__m128) __builtin_ia32_sqrtss_round_mask ( (__v4sf) __A, 7268309124Sdim (__v4sf) __B, 7269309124Sdim (__v4sf) _mm_setzero_ps (), 7270309124Sdim (__mmask8) __U, 7271309124Sdim _MM_FROUND_CUR_DIRECTION); 7272309124Sdim} 7273309124Sdim 7274309124Sdim#define _mm_maskz_sqrt_round_ss(U, A, B, R) __extension__ ({ \ 7275309124Sdim (__m128)__builtin_ia32_sqrtss_round_mask((__v4sf)(__m128)(A), \ 7276309124Sdim (__v4sf)(__m128)(B), \ 7277309124Sdim (__v4sf)_mm_setzero_ps(), \ 7278309124Sdim (__mmask8)(U), (int)(R)); }) 7279309124Sdim 7280309124Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS 7281309124Sdim_mm512_broadcast_f32x4 (__m128 __A) 7282309124Sdim{ 7283309124Sdim return (__m512) __builtin_ia32_broadcastf32x4_512 ((__v4sf) __A, 7284309124Sdim (__v16sf) 7285309124Sdim _mm512_undefined_ps (), 7286309124Sdim (__mmask16) -1); 7287309124Sdim} 7288309124Sdim 7289309124Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS 7290309124Sdim_mm512_mask_broadcast_f32x4 (__m512 __O, __mmask16 __M, __m128 __A) 7291309124Sdim{ 7292309124Sdim return (__m512) __builtin_ia32_broadcastf32x4_512 ((__v4sf) __A, 7293309124Sdim (__v16sf) __O, 7294309124Sdim __M); 7295309124Sdim} 7296309124Sdim 7297309124Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS 7298309124Sdim_mm512_maskz_broadcast_f32x4 (__mmask16 __M, __m128 __A) 7299309124Sdim{ 7300309124Sdim return (__m512) __builtin_ia32_broadcastf32x4_512 ((__v4sf) __A, 7301309124Sdim (__v16sf) 7302309124Sdim _mm512_setzero_ps (), 7303309124Sdim __M); 7304309124Sdim} 7305309124Sdim 7306309124Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS 7307309124Sdim_mm512_broadcast_f64x4 (__m256d __A) 7308309124Sdim{ 7309309124Sdim return (__m512d) __builtin_ia32_broadcastf64x4_512 ((__v4df) __A, 7310309124Sdim (__v8df) 7311309124Sdim _mm512_undefined_pd (), 7312309124Sdim (__mmask8) -1); 7313309124Sdim} 7314309124Sdim 7315309124Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS 7316309124Sdim_mm512_mask_broadcast_f64x4 (__m512d __O, __mmask8 __M, __m256d __A) 7317309124Sdim{ 7318309124Sdim return (__m512d) __builtin_ia32_broadcastf64x4_512 ((__v4df) __A, 7319309124Sdim (__v8df) __O, 7320309124Sdim __M); 7321309124Sdim} 7322309124Sdim 7323309124Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS 7324309124Sdim_mm512_maskz_broadcast_f64x4 (__mmask8 __M, __m256d __A) 7325309124Sdim{ 7326309124Sdim return (__m512d) __builtin_ia32_broadcastf64x4_512 ((__v4df) __A, 7327309124Sdim (__v8df) 7328309124Sdim _mm512_setzero_pd (), 7329309124Sdim __M); 7330309124Sdim} 7331309124Sdim 7332309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 7333309124Sdim_mm512_broadcast_i32x4 (__m128i __A) 7334309124Sdim{ 7335309124Sdim return (__m512i) __builtin_ia32_broadcasti32x4_512 ((__v4si) __A, 7336309124Sdim (__v16si) 7337309124Sdim _mm512_undefined_epi32 (), 7338309124Sdim (__mmask16) -1); 7339309124Sdim} 7340309124Sdim 7341309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 7342309124Sdim_mm512_mask_broadcast_i32x4 (__m512i __O, __mmask16 __M, __m128i __A) 7343309124Sdim{ 7344309124Sdim return (__m512i) __builtin_ia32_broadcasti32x4_512 ((__v4si) __A, 7345309124Sdim (__v16si) __O, 7346309124Sdim __M); 7347309124Sdim} 7348309124Sdim 7349309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 7350309124Sdim_mm512_maskz_broadcast_i32x4 (__mmask16 __M, __m128i __A) 7351309124Sdim{ 7352309124Sdim return (__m512i) __builtin_ia32_broadcasti32x4_512 ((__v4si) __A, 7353309124Sdim (__v16si) 7354309124Sdim _mm512_setzero_si512 (), 7355309124Sdim __M); 7356309124Sdim} 7357309124Sdim 7358309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 7359309124Sdim_mm512_broadcast_i64x4 (__m256i __A) 7360309124Sdim{ 7361309124Sdim return (__m512i) __builtin_ia32_broadcasti64x4_512 ((__v4di) __A, 7362309124Sdim (__v8di) 7363309124Sdim _mm512_undefined_epi32 (), 7364309124Sdim (__mmask8) -1); 7365309124Sdim} 7366309124Sdim 7367309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 7368309124Sdim_mm512_mask_broadcast_i64x4 (__m512i __O, __mmask8 __M, __m256i __A) 7369309124Sdim{ 7370309124Sdim return (__m512i) __builtin_ia32_broadcasti64x4_512 ((__v4di) __A, 7371309124Sdim (__v8di) __O, 7372309124Sdim __M); 7373309124Sdim} 7374309124Sdim 7375309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 7376309124Sdim_mm512_maskz_broadcast_i64x4 (__mmask8 __M, __m256i __A) 7377309124Sdim{ 7378309124Sdim return (__m512i) __builtin_ia32_broadcasti64x4_512 ((__v4di) __A, 7379309124Sdim (__v8di) 7380309124Sdim _mm512_setzero_si512 (), 7381309124Sdim __M); 7382309124Sdim} 7383309124Sdim 7384309124Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS 7385309124Sdim_mm512_mask_broadcastsd_pd (__m512d __O, __mmask8 __M, __m128d __A) 7386309124Sdim{ 7387309124Sdim return (__m512d)__builtin_ia32_selectpd_512(__M, 7388309124Sdim (__v8df) _mm512_broadcastsd_pd(__A), 7389309124Sdim (__v8df) __O); 7390309124Sdim} 7391309124Sdim 7392309124Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS 7393309124Sdim_mm512_maskz_broadcastsd_pd (__mmask8 __M, __m128d __A) 7394309124Sdim{ 7395309124Sdim return (__m512d)__builtin_ia32_selectpd_512(__M, 7396309124Sdim (__v8df) _mm512_broadcastsd_pd(__A), 7397309124Sdim (__v8df) _mm512_setzero_pd()); 7398309124Sdim} 7399309124Sdim 7400309124Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS 7401309124Sdim_mm512_mask_broadcastss_ps (__m512 __O, __mmask16 __M, __m128 __A) 7402309124Sdim{ 7403309124Sdim return (__m512)__builtin_ia32_selectps_512(__M, 7404309124Sdim (__v16sf) _mm512_broadcastss_ps(__A), 7405309124Sdim (__v16sf) __O); 7406309124Sdim} 7407309124Sdim 7408309124Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS 7409309124Sdim_mm512_maskz_broadcastss_ps (__mmask16 __M, __m128 __A) 7410309124Sdim{ 7411309124Sdim return (__m512)__builtin_ia32_selectps_512(__M, 7412309124Sdim (__v16sf) _mm512_broadcastss_ps(__A), 7413309124Sdim (__v16sf) _mm512_setzero_ps()); 7414309124Sdim} 7415309124Sdim 7416309124Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS 7417309124Sdim_mm512_cvtsepi32_epi8 (__m512i __A) 7418309124Sdim{ 7419309124Sdim return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A, 7420309124Sdim (__v16qi) _mm_undefined_si128 (), 7421309124Sdim (__mmask16) -1); 7422309124Sdim} 7423309124Sdim 7424309124Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS 7425309124Sdim_mm512_mask_cvtsepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A) 7426309124Sdim{ 7427309124Sdim return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A, 7428309124Sdim (__v16qi) __O, __M); 7429309124Sdim} 7430309124Sdim 7431309124Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS 7432309124Sdim_mm512_maskz_cvtsepi32_epi8 (__mmask16 __M, __m512i __A) 7433309124Sdim{ 7434309124Sdim return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A, 7435309124Sdim (__v16qi) _mm_setzero_si128 (), 7436309124Sdim __M); 7437309124Sdim} 7438309124Sdim 7439309124Sdimstatic __inline__ void __DEFAULT_FN_ATTRS 7440309124Sdim_mm512_mask_cvtsepi32_storeu_epi8 (void * __P, __mmask16 __M, __m512i __A) 7441309124Sdim{ 7442309124Sdim __builtin_ia32_pmovsdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M); 7443309124Sdim} 7444309124Sdim 7445309124Sdimstatic __inline__ __m256i __DEFAULT_FN_ATTRS 7446309124Sdim_mm512_cvtsepi32_epi16 (__m512i __A) 7447309124Sdim{ 7448309124Sdim return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A, 7449309124Sdim (__v16hi) _mm256_undefined_si256 (), 7450309124Sdim (__mmask16) -1); 7451309124Sdim} 7452309124Sdim 7453309124Sdimstatic __inline__ __m256i __DEFAULT_FN_ATTRS 7454309124Sdim_mm512_mask_cvtsepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A) 7455309124Sdim{ 7456309124Sdim return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A, 7457309124Sdim (__v16hi) __O, __M); 7458309124Sdim} 7459309124Sdim 7460309124Sdimstatic __inline__ __m256i __DEFAULT_FN_ATTRS 7461309124Sdim_mm512_maskz_cvtsepi32_epi16 (__mmask16 __M, __m512i __A) 7462309124Sdim{ 7463309124Sdim return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A, 7464309124Sdim (__v16hi) _mm256_setzero_si256 (), 7465309124Sdim __M); 7466309124Sdim} 7467309124Sdim 7468309124Sdimstatic __inline__ void __DEFAULT_FN_ATTRS 7469309124Sdim_mm512_mask_cvtsepi32_storeu_epi16 (void *__P, __mmask16 __M, __m512i __A) 7470309124Sdim{ 7471309124Sdim __builtin_ia32_pmovsdw512mem_mask ((__v16hi*) __P, (__v16si) __A, __M); 7472309124Sdim} 7473309124Sdim 7474309124Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS 7475309124Sdim_mm512_cvtsepi64_epi8 (__m512i __A) 7476309124Sdim{ 7477309124Sdim return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A, 7478309124Sdim (__v16qi) _mm_undefined_si128 (), 7479309124Sdim (__mmask8) -1); 7480309124Sdim} 7481309124Sdim 7482309124Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS 7483309124Sdim_mm512_mask_cvtsepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A) 7484309124Sdim{ 7485309124Sdim return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A, 7486309124Sdim (__v16qi) __O, __M); 7487309124Sdim} 7488309124Sdim 7489309124Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS 7490309124Sdim_mm512_maskz_cvtsepi64_epi8 (__mmask8 __M, __m512i __A) 7491309124Sdim{ 7492309124Sdim return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A, 7493309124Sdim (__v16qi) _mm_setzero_si128 (), 7494309124Sdim __M); 7495309124Sdim} 7496309124Sdim 7497309124Sdimstatic __inline__ void __DEFAULT_FN_ATTRS 7498309124Sdim_mm512_mask_cvtsepi64_storeu_epi8 (void * __P, __mmask8 __M, __m512i __A) 7499309124Sdim{ 7500309124Sdim __builtin_ia32_pmovsqb512mem_mask ((__v16qi *) __P, (__v8di) __A, __M); 7501309124Sdim} 7502309124Sdim 7503309124Sdimstatic __inline__ __m256i __DEFAULT_FN_ATTRS 7504309124Sdim_mm512_cvtsepi64_epi32 (__m512i __A) 7505309124Sdim{ 7506309124Sdim return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A, 7507309124Sdim (__v8si) _mm256_undefined_si256 (), 7508309124Sdim (__mmask8) -1); 7509309124Sdim} 7510309124Sdim 7511309124Sdimstatic __inline__ __m256i __DEFAULT_FN_ATTRS 7512309124Sdim_mm512_mask_cvtsepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A) 7513309124Sdim{ 7514309124Sdim return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A, 7515309124Sdim (__v8si) __O, __M); 7516309124Sdim} 7517309124Sdim 7518309124Sdimstatic __inline__ __m256i __DEFAULT_FN_ATTRS 7519309124Sdim_mm512_maskz_cvtsepi64_epi32 (__mmask8 __M, __m512i __A) 7520309124Sdim{ 7521309124Sdim return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A, 7522309124Sdim (__v8si) _mm256_setzero_si256 (), 7523309124Sdim __M); 7524309124Sdim} 7525309124Sdim 7526309124Sdimstatic __inline__ void __DEFAULT_FN_ATTRS 7527309124Sdim_mm512_mask_cvtsepi64_storeu_epi32 (void *__P, __mmask8 __M, __m512i __A) 7528309124Sdim{ 7529309124Sdim __builtin_ia32_pmovsqd512mem_mask ((__v8si *) __P, (__v8di) __A, __M); 7530309124Sdim} 7531309124Sdim 7532309124Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS 7533309124Sdim_mm512_cvtsepi64_epi16 (__m512i __A) 7534309124Sdim{ 7535309124Sdim return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A, 7536309124Sdim (__v8hi) _mm_undefined_si128 (), 7537309124Sdim (__mmask8) -1); 7538309124Sdim} 7539309124Sdim 7540309124Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS 7541309124Sdim_mm512_mask_cvtsepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A) 7542309124Sdim{ 7543309124Sdim return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A, 7544309124Sdim (__v8hi) __O, __M); 7545309124Sdim} 7546309124Sdim 7547309124Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS 7548309124Sdim_mm512_maskz_cvtsepi64_epi16 (__mmask8 __M, __m512i __A) 7549309124Sdim{ 7550309124Sdim return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A, 7551309124Sdim (__v8hi) _mm_setzero_si128 (), 7552309124Sdim __M); 7553309124Sdim} 7554309124Sdim 7555309124Sdimstatic __inline__ void __DEFAULT_FN_ATTRS 7556309124Sdim_mm512_mask_cvtsepi64_storeu_epi16 (void * __P, __mmask8 __M, __m512i __A) 7557309124Sdim{ 7558309124Sdim __builtin_ia32_pmovsqw512mem_mask ((__v8hi *) __P, (__v8di) __A, __M); 7559309124Sdim} 7560309124Sdim 7561309124Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS 7562309124Sdim_mm512_cvtusepi32_epi8 (__m512i __A) 7563309124Sdim{ 7564309124Sdim return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A, 7565309124Sdim (__v16qi) _mm_undefined_si128 (), 7566309124Sdim (__mmask16) -1); 7567309124Sdim} 7568309124Sdim 7569309124Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS 7570309124Sdim_mm512_mask_cvtusepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A) 7571309124Sdim{ 7572309124Sdim return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A, 7573309124Sdim (__v16qi) __O, 7574309124Sdim __M); 7575309124Sdim} 7576309124Sdim 7577309124Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS 7578309124Sdim_mm512_maskz_cvtusepi32_epi8 (__mmask16 __M, __m512i __A) 7579309124Sdim{ 7580309124Sdim return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A, 7581309124Sdim (__v16qi) _mm_setzero_si128 (), 7582309124Sdim __M); 7583309124Sdim} 7584309124Sdim 7585309124Sdimstatic __inline__ void __DEFAULT_FN_ATTRS 7586309124Sdim_mm512_mask_cvtusepi32_storeu_epi8 (void * __P, __mmask16 __M, __m512i __A) 7587309124Sdim{ 7588309124Sdim __builtin_ia32_pmovusdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M); 7589309124Sdim} 7590309124Sdim 7591309124Sdimstatic __inline__ __m256i __DEFAULT_FN_ATTRS 7592309124Sdim_mm512_cvtusepi32_epi16 (__m512i __A) 7593309124Sdim{ 7594309124Sdim return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A, 7595309124Sdim (__v16hi) _mm256_undefined_si256 (), 7596309124Sdim (__mmask16) -1); 7597309124Sdim} 7598309124Sdim 7599309124Sdimstatic __inline__ __m256i __DEFAULT_FN_ATTRS 7600309124Sdim_mm512_mask_cvtusepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A) 7601309124Sdim{ 7602309124Sdim return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A, 7603309124Sdim (__v16hi) __O, 7604309124Sdim __M); 7605309124Sdim} 7606309124Sdim 7607309124Sdimstatic __inline__ __m256i __DEFAULT_FN_ATTRS 7608309124Sdim_mm512_maskz_cvtusepi32_epi16 (__mmask16 __M, __m512i __A) 7609309124Sdim{ 7610309124Sdim return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A, 7611309124Sdim (__v16hi) _mm256_setzero_si256 (), 7612309124Sdim __M); 7613309124Sdim} 7614309124Sdim 7615309124Sdimstatic __inline__ void __DEFAULT_FN_ATTRS 7616309124Sdim_mm512_mask_cvtusepi32_storeu_epi16 (void *__P, __mmask16 __M, __m512i __A) 7617309124Sdim{ 7618309124Sdim __builtin_ia32_pmovusdw512mem_mask ((__v16hi*) __P, (__v16si) __A, __M); 7619309124Sdim} 7620309124Sdim 7621309124Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS 7622309124Sdim_mm512_cvtusepi64_epi8 (__m512i __A) 7623309124Sdim{ 7624309124Sdim return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A, 7625309124Sdim (__v16qi) _mm_undefined_si128 (), 7626309124Sdim (__mmask8) -1); 7627309124Sdim} 7628309124Sdim 7629309124Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS 7630309124Sdim_mm512_mask_cvtusepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A) 7631309124Sdim{ 7632309124Sdim return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A, 7633309124Sdim (__v16qi) __O, 7634309124Sdim __M); 7635309124Sdim} 7636309124Sdim 7637309124Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS 7638309124Sdim_mm512_maskz_cvtusepi64_epi8 (__mmask8 __M, __m512i __A) 7639309124Sdim{ 7640309124Sdim return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A, 7641309124Sdim (__v16qi) _mm_setzero_si128 (), 7642309124Sdim __M); 7643309124Sdim} 7644309124Sdim 7645309124Sdimstatic __inline__ void __DEFAULT_FN_ATTRS 7646309124Sdim_mm512_mask_cvtusepi64_storeu_epi8 (void * __P, __mmask8 __M, __m512i __A) 7647309124Sdim{ 7648309124Sdim __builtin_ia32_pmovusqb512mem_mask ((__v16qi *) __P, (__v8di) __A, __M); 7649309124Sdim} 7650309124Sdim 7651309124Sdimstatic __inline__ __m256i __DEFAULT_FN_ATTRS 7652309124Sdim_mm512_cvtusepi64_epi32 (__m512i __A) 7653309124Sdim{ 7654309124Sdim return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A, 7655309124Sdim (__v8si) _mm256_undefined_si256 (), 7656309124Sdim (__mmask8) -1); 7657309124Sdim} 7658309124Sdim 7659309124Sdimstatic __inline__ __m256i __DEFAULT_FN_ATTRS 7660309124Sdim_mm512_mask_cvtusepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A) 7661309124Sdim{ 7662309124Sdim return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A, 7663309124Sdim (__v8si) __O, __M); 7664309124Sdim} 7665309124Sdim 7666309124Sdimstatic __inline__ __m256i __DEFAULT_FN_ATTRS 7667309124Sdim_mm512_maskz_cvtusepi64_epi32 (__mmask8 __M, __m512i __A) 7668309124Sdim{ 7669309124Sdim return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A, 7670309124Sdim (__v8si) _mm256_setzero_si256 (), 7671309124Sdim __M); 7672309124Sdim} 7673309124Sdim 7674309124Sdimstatic __inline__ void __DEFAULT_FN_ATTRS 7675309124Sdim_mm512_mask_cvtusepi64_storeu_epi32 (void* __P, __mmask8 __M, __m512i __A) 7676309124Sdim{ 7677309124Sdim __builtin_ia32_pmovusqd512mem_mask ((__v8si*) __P, (__v8di) __A, __M); 7678309124Sdim} 7679309124Sdim 7680309124Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS 7681309124Sdim_mm512_cvtusepi64_epi16 (__m512i __A) 7682309124Sdim{ 7683309124Sdim return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A, 7684309124Sdim (__v8hi) _mm_undefined_si128 (), 7685309124Sdim (__mmask8) -1); 7686309124Sdim} 7687309124Sdim 7688309124Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS 7689309124Sdim_mm512_mask_cvtusepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A) 7690309124Sdim{ 7691309124Sdim return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A, 7692309124Sdim (__v8hi) __O, __M); 7693309124Sdim} 7694309124Sdim 7695309124Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS 7696309124Sdim_mm512_maskz_cvtusepi64_epi16 (__mmask8 __M, __m512i __A) 7697309124Sdim{ 7698309124Sdim return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A, 7699309124Sdim (__v8hi) _mm_setzero_si128 (), 7700309124Sdim __M); 7701309124Sdim} 7702309124Sdim 7703309124Sdimstatic __inline__ void __DEFAULT_FN_ATTRS 7704309124Sdim_mm512_mask_cvtusepi64_storeu_epi16 (void *__P, __mmask8 __M, __m512i __A) 7705309124Sdim{ 7706309124Sdim __builtin_ia32_pmovusqw512mem_mask ((__v8hi*) __P, (__v8di) __A, __M); 7707309124Sdim} 7708309124Sdim 7709309124Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS 7710309124Sdim_mm512_cvtepi32_epi8 (__m512i __A) 7711309124Sdim{ 7712309124Sdim return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A, 7713309124Sdim (__v16qi) _mm_undefined_si128 (), 7714309124Sdim (__mmask16) -1); 7715309124Sdim} 7716309124Sdim 7717309124Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS 7718309124Sdim_mm512_mask_cvtepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A) 7719309124Sdim{ 7720309124Sdim return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A, 7721309124Sdim (__v16qi) __O, __M); 7722309124Sdim} 7723309124Sdim 7724309124Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS 7725309124Sdim_mm512_maskz_cvtepi32_epi8 (__mmask16 __M, __m512i __A) 7726309124Sdim{ 7727309124Sdim return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A, 7728309124Sdim (__v16qi) _mm_setzero_si128 (), 7729309124Sdim __M); 7730309124Sdim} 7731309124Sdim 7732309124Sdimstatic __inline__ void __DEFAULT_FN_ATTRS 7733309124Sdim_mm512_mask_cvtepi32_storeu_epi8 (void * __P, __mmask16 __M, __m512i __A) 7734309124Sdim{ 7735309124Sdim __builtin_ia32_pmovdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M); 7736309124Sdim} 7737309124Sdim 7738309124Sdimstatic __inline__ __m256i __DEFAULT_FN_ATTRS 7739309124Sdim_mm512_cvtepi32_epi16 (__m512i __A) 7740309124Sdim{ 7741309124Sdim return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A, 7742309124Sdim (__v16hi) _mm256_undefined_si256 (), 7743309124Sdim (__mmask16) -1); 7744309124Sdim} 7745309124Sdim 7746309124Sdimstatic __inline__ __m256i __DEFAULT_FN_ATTRS 7747309124Sdim_mm512_mask_cvtepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A) 7748309124Sdim{ 7749309124Sdim return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A, 7750309124Sdim (__v16hi) __O, __M); 7751309124Sdim} 7752309124Sdim 7753309124Sdimstatic __inline__ __m256i __DEFAULT_FN_ATTRS 7754309124Sdim_mm512_maskz_cvtepi32_epi16 (__mmask16 __M, __m512i __A) 7755309124Sdim{ 7756309124Sdim return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A, 7757309124Sdim (__v16hi) _mm256_setzero_si256 (), 7758309124Sdim __M); 7759309124Sdim} 7760309124Sdim 7761309124Sdimstatic __inline__ void __DEFAULT_FN_ATTRS 7762309124Sdim_mm512_mask_cvtepi32_storeu_epi16 (void * __P, __mmask16 __M, __m512i __A) 7763309124Sdim{ 7764309124Sdim __builtin_ia32_pmovdw512mem_mask ((__v16hi *) __P, (__v16si) __A, __M); 7765309124Sdim} 7766309124Sdim 7767309124Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS 7768309124Sdim_mm512_cvtepi64_epi8 (__m512i __A) 7769309124Sdim{ 7770309124Sdim return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A, 7771309124Sdim (__v16qi) _mm_undefined_si128 (), 7772309124Sdim (__mmask8) -1); 7773309124Sdim} 7774309124Sdim 7775309124Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS 7776309124Sdim_mm512_mask_cvtepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A) 7777309124Sdim{ 7778309124Sdim return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A, 7779309124Sdim (__v16qi) __O, __M); 7780309124Sdim} 7781309124Sdim 7782309124Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS 7783309124Sdim_mm512_maskz_cvtepi64_epi8 (__mmask8 __M, __m512i __A) 7784309124Sdim{ 7785309124Sdim return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A, 7786309124Sdim (__v16qi) _mm_setzero_si128 (), 7787309124Sdim __M); 7788309124Sdim} 7789309124Sdim 7790309124Sdimstatic __inline__ void __DEFAULT_FN_ATTRS 7791309124Sdim_mm512_mask_cvtepi64_storeu_epi8 (void * __P, __mmask8 __M, __m512i __A) 7792309124Sdim{ 7793309124Sdim __builtin_ia32_pmovqb512mem_mask ((__v16qi *) __P, (__v8di) __A, __M); 7794309124Sdim} 7795309124Sdim 7796309124Sdimstatic __inline__ __m256i __DEFAULT_FN_ATTRS 7797309124Sdim_mm512_cvtepi64_epi32 (__m512i __A) 7798309124Sdim{ 7799309124Sdim return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A, 7800309124Sdim (__v8si) _mm256_undefined_si256 (), 7801309124Sdim (__mmask8) -1); 7802309124Sdim} 7803309124Sdim 7804309124Sdimstatic __inline__ __m256i __DEFAULT_FN_ATTRS 7805309124Sdim_mm512_mask_cvtepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A) 7806309124Sdim{ 7807309124Sdim return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A, 7808309124Sdim (__v8si) __O, __M); 7809309124Sdim} 7810309124Sdim 7811309124Sdimstatic __inline__ __m256i __DEFAULT_FN_ATTRS 7812309124Sdim_mm512_maskz_cvtepi64_epi32 (__mmask8 __M, __m512i __A) 7813309124Sdim{ 7814309124Sdim return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A, 7815309124Sdim (__v8si) _mm256_setzero_si256 (), 7816309124Sdim __M); 7817309124Sdim} 7818309124Sdim 7819309124Sdimstatic __inline__ void __DEFAULT_FN_ATTRS 7820309124Sdim_mm512_mask_cvtepi64_storeu_epi32 (void* __P, __mmask8 __M, __m512i __A) 7821309124Sdim{ 7822309124Sdim __builtin_ia32_pmovqd512mem_mask ((__v8si *) __P, (__v8di) __A, __M); 7823309124Sdim} 7824309124Sdim 7825309124Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS 7826309124Sdim_mm512_cvtepi64_epi16 (__m512i __A) 7827309124Sdim{ 7828309124Sdim return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A, 7829309124Sdim (__v8hi) _mm_undefined_si128 (), 7830309124Sdim (__mmask8) -1); 7831309124Sdim} 7832309124Sdim 7833309124Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS 7834309124Sdim_mm512_mask_cvtepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A) 7835309124Sdim{ 7836309124Sdim return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A, 7837309124Sdim (__v8hi) __O, __M); 7838309124Sdim} 7839309124Sdim 7840309124Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS 7841309124Sdim_mm512_maskz_cvtepi64_epi16 (__mmask8 __M, __m512i __A) 7842309124Sdim{ 7843309124Sdim return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A, 7844309124Sdim (__v8hi) _mm_setzero_si128 (), 7845309124Sdim __M); 7846309124Sdim} 7847309124Sdim 7848309124Sdimstatic __inline__ void __DEFAULT_FN_ATTRS 7849309124Sdim_mm512_mask_cvtepi64_storeu_epi16 (void *__P, __mmask8 __M, __m512i __A) 7850309124Sdim{ 7851309124Sdim __builtin_ia32_pmovqw512mem_mask ((__v8hi *) __P, (__v8di) __A, __M); 7852309124Sdim} 7853309124Sdim 7854314564Sdim#define _mm512_extracti32x4_epi32(A, imm) __extension__ ({ \ 7855314564Sdim (__m128i)__builtin_shufflevector((__v16si)(__m512i)(A), \ 7856314564Sdim (__v16si)_mm512_undefined_epi32(), \ 7857314564Sdim 0 + ((imm) & 0x3) * 4, \ 7858314564Sdim 1 + ((imm) & 0x3) * 4, \ 7859314564Sdim 2 + ((imm) & 0x3) * 4, \ 7860314564Sdim 3 + ((imm) & 0x3) * 4); }) 7861309124Sdim 7862309124Sdim#define _mm512_mask_extracti32x4_epi32(W, U, A, imm) __extension__ ({ \ 7863314564Sdim (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, \ 7864314564Sdim (__v4si)_mm512_extracti32x4_epi32((A), (imm)), \ 7865314564Sdim (__v4si)__W); }) 7866309124Sdim 7867309124Sdim#define _mm512_maskz_extracti32x4_epi32(U, A, imm) __extension__ ({ \ 7868314564Sdim (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, \ 7869314564Sdim (__v4si)_mm512_extracti32x4_epi32((A), (imm)), \ 7870314564Sdim (__v4si)_mm_setzero_si128()); }) 7871309124Sdim 7872314564Sdim#define _mm512_extracti64x4_epi64(A, imm) __extension__ ({ \ 7873314564Sdim (__m256i)__builtin_shufflevector((__v8di)(__m512i)(A), \ 7874314564Sdim (__v8di)_mm512_undefined_epi32(), \ 7875314564Sdim ((imm) & 1) ? 4 : 0, \ 7876314564Sdim ((imm) & 1) ? 5 : 1, \ 7877314564Sdim ((imm) & 1) ? 6 : 2, \ 7878314564Sdim ((imm) & 1) ? 7 : 3); }) 7879309124Sdim 7880309124Sdim#define _mm512_mask_extracti64x4_epi64(W, U, A, imm) __extension__ ({ \ 7881314564Sdim (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, \ 7882314564Sdim (__v4di)_mm512_extracti64x4_epi64((A), (imm)), \ 7883314564Sdim (__v4di)__W); }) 7884309124Sdim 7885309124Sdim#define _mm512_maskz_extracti64x4_epi64(U, A, imm) __extension__ ({ \ 7886314564Sdim (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, \ 7887314564Sdim (__v4di)_mm512_extracti64x4_epi64((A), (imm)), \ 7888314564Sdim (__v4di)_mm256_setzero_si256()); }) 7889309124Sdim 7890309124Sdim#define _mm512_insertf64x4(A, B, imm) __extension__ ({ \ 7891314564Sdim (__m512d)__builtin_shufflevector((__v8df)(__m512d)(A), \ 7892314564Sdim (__v8df)_mm512_castpd256_pd512((__m256d)(B)), \ 7893314564Sdim ((imm) & 0x1) ? 0 : 8, \ 7894314564Sdim ((imm) & 0x1) ? 1 : 9, \ 7895314564Sdim ((imm) & 0x1) ? 2 : 10, \ 7896314564Sdim ((imm) & 0x1) ? 3 : 11, \ 7897314564Sdim ((imm) & 0x1) ? 8 : 4, \ 7898314564Sdim ((imm) & 0x1) ? 9 : 5, \ 7899314564Sdim ((imm) & 0x1) ? 10 : 6, \ 7900314564Sdim ((imm) & 0x1) ? 11 : 7); }) 7901309124Sdim 7902309124Sdim#define _mm512_mask_insertf64x4(W, U, A, B, imm) __extension__ ({ \ 7903314564Sdim (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ 7904314564Sdim (__v8df)_mm512_insertf64x4((A), (B), (imm)), \ 7905314564Sdim (__v8df)(W)); }) 7906309124Sdim 7907309124Sdim#define _mm512_maskz_insertf64x4(U, A, B, imm) __extension__ ({ \ 7908314564Sdim (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ 7909314564Sdim (__v8df)_mm512_insertf64x4((A), (B), (imm)), \ 7910314564Sdim (__v8df)_mm512_setzero_pd()); }) 7911309124Sdim 7912309124Sdim#define _mm512_inserti64x4(A, B, imm) __extension__ ({ \ 7913314564Sdim (__m512i)__builtin_shufflevector((__v8di)(__m512i)(A), \ 7914314564Sdim (__v8di)_mm512_castsi256_si512((__m256i)(B)), \ 7915314564Sdim ((imm) & 0x1) ? 0 : 8, \ 7916314564Sdim ((imm) & 0x1) ? 1 : 9, \ 7917314564Sdim ((imm) & 0x1) ? 2 : 10, \ 7918314564Sdim ((imm) & 0x1) ? 3 : 11, \ 7919314564Sdim ((imm) & 0x1) ? 8 : 4, \ 7920314564Sdim ((imm) & 0x1) ? 9 : 5, \ 7921314564Sdim ((imm) & 0x1) ? 10 : 6, \ 7922314564Sdim ((imm) & 0x1) ? 11 : 7); }) 7923309124Sdim 7924309124Sdim#define _mm512_mask_inserti64x4(W, U, A, B, imm) __extension__ ({ \ 7925314564Sdim (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \ 7926314564Sdim (__v8di)_mm512_inserti64x4((A), (B), (imm)), \ 7927314564Sdim (__v8di)(W)); }) 7928309124Sdim 7929309124Sdim#define _mm512_maskz_inserti64x4(U, A, B, imm) __extension__ ({ \ 7930314564Sdim (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \ 7931314564Sdim (__v8di)_mm512_inserti64x4((A), (B), (imm)), \ 7932314564Sdim (__v8di)_mm512_setzero_si512()); }) 7933309124Sdim 7934309124Sdim#define _mm512_insertf32x4(A, B, imm) __extension__ ({ \ 7935314564Sdim (__m512)__builtin_shufflevector((__v16sf)(__m512)(A), \ 7936314564Sdim (__v16sf)_mm512_castps128_ps512((__m128)(B)),\ 7937314564Sdim (((imm) & 0x3) == 0) ? 16 : 0, \ 7938314564Sdim (((imm) & 0x3) == 0) ? 17 : 1, \ 7939314564Sdim (((imm) & 0x3) == 0) ? 18 : 2, \ 7940314564Sdim (((imm) & 0x3) == 0) ? 19 : 3, \ 7941314564Sdim (((imm) & 0x3) == 1) ? 16 : 4, \ 7942314564Sdim (((imm) & 0x3) == 1) ? 17 : 5, \ 7943314564Sdim (((imm) & 0x3) == 1) ? 18 : 6, \ 7944314564Sdim (((imm) & 0x3) == 1) ? 19 : 7, \ 7945314564Sdim (((imm) & 0x3) == 2) ? 16 : 8, \ 7946314564Sdim (((imm) & 0x3) == 2) ? 17 : 9, \ 7947314564Sdim (((imm) & 0x3) == 2) ? 18 : 10, \ 7948314564Sdim (((imm) & 0x3) == 2) ? 19 : 11, \ 7949314564Sdim (((imm) & 0x3) == 3) ? 16 : 12, \ 7950314564Sdim (((imm) & 0x3) == 3) ? 17 : 13, \ 7951314564Sdim (((imm) & 0x3) == 3) ? 18 : 14, \ 7952314564Sdim (((imm) & 0x3) == 3) ? 19 : 15); }) 7953309124Sdim 7954309124Sdim#define _mm512_mask_insertf32x4(W, U, A, B, imm) __extension__ ({ \ 7955314564Sdim (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ 7956314564Sdim (__v16sf)_mm512_insertf32x4((A), (B), (imm)), \ 7957314564Sdim (__v16sf)(W)); }) 7958309124Sdim 7959309124Sdim#define _mm512_maskz_insertf32x4(U, A, B, imm) __extension__ ({ \ 7960314564Sdim (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ 7961314564Sdim (__v16sf)_mm512_insertf32x4((A), (B), (imm)), \ 7962314564Sdim (__v16sf)_mm512_setzero_ps()); }) 7963309124Sdim 7964309124Sdim#define _mm512_inserti32x4(A, B, imm) __extension__ ({ \ 7965314564Sdim (__m512i)__builtin_shufflevector((__v16si)(__m512i)(A), \ 7966314564Sdim (__v16si)_mm512_castsi128_si512((__m128i)(B)),\ 7967314564Sdim (((imm) & 0x3) == 0) ? 16 : 0, \ 7968314564Sdim (((imm) & 0x3) == 0) ? 17 : 1, \ 7969314564Sdim (((imm) & 0x3) == 0) ? 18 : 2, \ 7970314564Sdim (((imm) & 0x3) == 0) ? 19 : 3, \ 7971314564Sdim (((imm) & 0x3) == 1) ? 16 : 4, \ 7972314564Sdim (((imm) & 0x3) == 1) ? 17 : 5, \ 7973314564Sdim (((imm) & 0x3) == 1) ? 18 : 6, \ 7974314564Sdim (((imm) & 0x3) == 1) ? 19 : 7, \ 7975314564Sdim (((imm) & 0x3) == 2) ? 16 : 8, \ 7976314564Sdim (((imm) & 0x3) == 2) ? 17 : 9, \ 7977314564Sdim (((imm) & 0x3) == 2) ? 18 : 10, \ 7978314564Sdim (((imm) & 0x3) == 2) ? 19 : 11, \ 7979314564Sdim (((imm) & 0x3) == 3) ? 16 : 12, \ 7980314564Sdim (((imm) & 0x3) == 3) ? 17 : 13, \ 7981314564Sdim (((imm) & 0x3) == 3) ? 18 : 14, \ 7982314564Sdim (((imm) & 0x3) == 3) ? 19 : 15); }) 7983309124Sdim 7984309124Sdim#define _mm512_mask_inserti32x4(W, U, A, B, imm) __extension__ ({ \ 7985314564Sdim (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \ 7986314564Sdim (__v16si)_mm512_inserti32x4((A), (B), (imm)), \ 7987314564Sdim (__v16si)(W)); }) 7988309124Sdim 7989309124Sdim#define _mm512_maskz_inserti32x4(U, A, B, imm) __extension__ ({ \ 7990314564Sdim (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \ 7991314564Sdim (__v16si)_mm512_inserti32x4((A), (B), (imm)), \ 7992314564Sdim (__v16si)_mm512_setzero_si512()); }) 7993309124Sdim 7994309124Sdim#define _mm512_getmant_round_pd(A, B, C, R) __extension__ ({ \ 7995309124Sdim (__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \ 7996309124Sdim (int)(((C)<<2) | (B)), \ 7997309124Sdim (__v8df)_mm512_undefined_pd(), \ 7998309124Sdim (__mmask8)-1, (int)(R)); }) 7999309124Sdim 8000309124Sdim#define _mm512_mask_getmant_round_pd(W, U, A, B, C, R) __extension__ ({ \ 8001309124Sdim (__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \ 8002309124Sdim (int)(((C)<<2) | (B)), \ 8003309124Sdim (__v8df)(__m512d)(W), \ 8004309124Sdim (__mmask8)(U), (int)(R)); }) 8005309124Sdim 8006309124Sdim#define _mm512_maskz_getmant_round_pd(U, A, B, C, R) __extension__ ({ \ 8007309124Sdim (__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \ 8008309124Sdim (int)(((C)<<2) | (B)), \ 8009309124Sdim (__v8df)_mm512_setzero_pd(), \ 8010309124Sdim (__mmask8)(U), (int)(R)); }) 8011309124Sdim 8012309124Sdim#define _mm512_getmant_pd(A, B, C) __extension__ ({ \ 8013309124Sdim (__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \ 8014309124Sdim (int)(((C)<<2) | (B)), \ 8015309124Sdim (__v8df)_mm512_setzero_pd(), \ 8016309124Sdim (__mmask8)-1, \ 8017309124Sdim _MM_FROUND_CUR_DIRECTION); }) 8018309124Sdim 8019309124Sdim#define _mm512_mask_getmant_pd(W, U, A, B, C) __extension__ ({ \ 8020309124Sdim (__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \ 8021309124Sdim (int)(((C)<<2) | (B)), \ 8022309124Sdim (__v8df)(__m512d)(W), \ 8023309124Sdim (__mmask8)(U), \ 8024309124Sdim _MM_FROUND_CUR_DIRECTION); }) 8025309124Sdim 8026309124Sdim#define _mm512_maskz_getmant_pd(U, A, B, C) __extension__ ({ \ 8027309124Sdim (__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \ 8028309124Sdim (int)(((C)<<2) | (B)), \ 8029309124Sdim (__v8df)_mm512_setzero_pd(), \ 8030309124Sdim (__mmask8)(U), \ 8031309124Sdim _MM_FROUND_CUR_DIRECTION); }) 8032309124Sdim 8033309124Sdim#define _mm512_getmant_round_ps(A, B, C, R) __extension__ ({ \ 8034309124Sdim (__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \ 8035309124Sdim (int)(((C)<<2) | (B)), \ 8036309124Sdim (__v16sf)_mm512_undefined_ps(), \ 8037309124Sdim (__mmask16)-1, (int)(R)); }) 8038309124Sdim 8039309124Sdim#define _mm512_mask_getmant_round_ps(W, U, A, B, C, R) __extension__ ({ \ 8040309124Sdim (__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \ 8041309124Sdim (int)(((C)<<2) | (B)), \ 8042309124Sdim (__v16sf)(__m512)(W), \ 8043309124Sdim (__mmask16)(U), (int)(R)); }) 8044309124Sdim 8045309124Sdim#define _mm512_maskz_getmant_round_ps(U, A, B, C, R) __extension__ ({ \ 8046309124Sdim (__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \ 8047309124Sdim (int)(((C)<<2) | (B)), \ 8048309124Sdim (__v16sf)_mm512_setzero_ps(), \ 8049309124Sdim (__mmask16)(U), (int)(R)); }) 8050309124Sdim 8051309124Sdim#define _mm512_getmant_ps(A, B, C) __extension__ ({ \ 8052309124Sdim (__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \ 8053309124Sdim (int)(((C)<<2)|(B)), \ 8054309124Sdim (__v16sf)_mm512_undefined_ps(), \ 8055309124Sdim (__mmask16)-1, \ 8056309124Sdim _MM_FROUND_CUR_DIRECTION); }) 8057309124Sdim 8058309124Sdim#define _mm512_mask_getmant_ps(W, U, A, B, C) __extension__ ({ \ 8059309124Sdim (__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \ 8060309124Sdim (int)(((C)<<2)|(B)), \ 8061309124Sdim (__v16sf)(__m512)(W), \ 8062309124Sdim (__mmask16)(U), \ 8063309124Sdim _MM_FROUND_CUR_DIRECTION); }) 8064309124Sdim 8065309124Sdim#define _mm512_maskz_getmant_ps(U, A, B, C) __extension__ ({ \ 8066309124Sdim (__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \ 8067309124Sdim (int)(((C)<<2)|(B)), \ 8068309124Sdim (__v16sf)_mm512_setzero_ps(), \ 8069309124Sdim (__mmask16)(U), \ 8070309124Sdim _MM_FROUND_CUR_DIRECTION); }) 8071309124Sdim 8072309124Sdim#define _mm512_getexp_round_pd(A, R) __extension__ ({ \ 8073309124Sdim (__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \ 8074309124Sdim (__v8df)_mm512_undefined_pd(), \ 8075309124Sdim (__mmask8)-1, (int)(R)); }) 8076309124Sdim 8077309124Sdim#define _mm512_mask_getexp_round_pd(W, U, A, R) __extension__ ({ \ 8078309124Sdim (__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \ 8079309124Sdim (__v8df)(__m512d)(W), \ 8080309124Sdim (__mmask8)(U), (int)(R)); }) 8081309124Sdim 8082309124Sdim#define _mm512_maskz_getexp_round_pd(U, A, R) __extension__ ({ \ 8083309124Sdim (__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \ 8084309124Sdim (__v8df)_mm512_setzero_pd(), \ 8085309124Sdim (__mmask8)(U), (int)(R)); }) 8086309124Sdim 8087309124Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS 8088309124Sdim_mm512_getexp_pd (__m512d __A) 8089309124Sdim{ 8090309124Sdim return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A, 8091309124Sdim (__v8df) _mm512_undefined_pd (), 8092309124Sdim (__mmask8) -1, 8093309124Sdim _MM_FROUND_CUR_DIRECTION); 8094309124Sdim} 8095309124Sdim 8096309124Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS 8097309124Sdim_mm512_mask_getexp_pd (__m512d __W, __mmask8 __U, __m512d __A) 8098309124Sdim{ 8099309124Sdim return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A, 8100309124Sdim (__v8df) __W, 8101309124Sdim (__mmask8) __U, 8102309124Sdim _MM_FROUND_CUR_DIRECTION); 8103309124Sdim} 8104309124Sdim 8105309124Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS 8106309124Sdim_mm512_maskz_getexp_pd (__mmask8 __U, __m512d __A) 8107309124Sdim{ 8108309124Sdim return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A, 8109309124Sdim (__v8df) _mm512_setzero_pd (), 8110309124Sdim (__mmask8) __U, 8111309124Sdim _MM_FROUND_CUR_DIRECTION); 8112309124Sdim} 8113309124Sdim 8114309124Sdim#define _mm512_getexp_round_ps(A, R) __extension__ ({ \ 8115309124Sdim (__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \ 8116309124Sdim (__v16sf)_mm512_undefined_ps(), \ 8117309124Sdim (__mmask16)-1, (int)(R)); }) 8118309124Sdim 8119309124Sdim#define _mm512_mask_getexp_round_ps(W, U, A, R) __extension__ ({ \ 8120309124Sdim (__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \ 8121309124Sdim (__v16sf)(__m512)(W), \ 8122309124Sdim (__mmask16)(U), (int)(R)); }) 8123309124Sdim 8124309124Sdim#define _mm512_maskz_getexp_round_ps(U, A, R) __extension__ ({ \ 8125309124Sdim (__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \ 8126309124Sdim (__v16sf)_mm512_setzero_ps(), \ 8127309124Sdim (__mmask16)(U), (int)(R)); }) 8128309124Sdim 8129309124Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS 8130309124Sdim_mm512_getexp_ps (__m512 __A) 8131309124Sdim{ 8132309124Sdim return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A, 8133309124Sdim (__v16sf) _mm512_undefined_ps (), 8134309124Sdim (__mmask16) -1, 8135309124Sdim _MM_FROUND_CUR_DIRECTION); 8136309124Sdim} 8137309124Sdim 8138309124Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS 8139309124Sdim_mm512_mask_getexp_ps (__m512 __W, __mmask16 __U, __m512 __A) 8140309124Sdim{ 8141309124Sdim return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A, 8142309124Sdim (__v16sf) __W, 8143309124Sdim (__mmask16) __U, 8144309124Sdim _MM_FROUND_CUR_DIRECTION); 8145309124Sdim} 8146309124Sdim 8147309124Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS 8148309124Sdim_mm512_maskz_getexp_ps (__mmask16 __U, __m512 __A) 8149309124Sdim{ 8150309124Sdim return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A, 8151309124Sdim (__v16sf) _mm512_setzero_ps (), 8152309124Sdim (__mmask16) __U, 8153309124Sdim _MM_FROUND_CUR_DIRECTION); 8154309124Sdim} 8155309124Sdim 8156309124Sdim#define _mm512_i64gather_ps(index, addr, scale) __extension__ ({ \ 8157309124Sdim (__m256)__builtin_ia32_gatherdiv16sf((__v8sf)_mm256_undefined_ps(), \ 8158309124Sdim (float const *)(addr), \ 8159309124Sdim (__v8di)(__m512i)(index), (__mmask8)-1, \ 8160309124Sdim (int)(scale)); }) 8161309124Sdim 8162309124Sdim#define _mm512_mask_i64gather_ps( __v1_old, __mask, __index,\ 8163309124Sdim __addr, __scale) __extension__({\ 8164309124Sdim__builtin_ia32_gatherdiv16sf ((__v8sf) __v1_old,\ 8165309124Sdim __addr,(__v8di) __index, __mask, __scale);\ 8166309124Sdim}) 8167309124Sdim 8168309124Sdim#define _mm512_i64gather_epi32(index, addr, scale) __extension__ ({\ 8169309124Sdim (__m256i)__builtin_ia32_gatherdiv16si((__v8si)_mm256_undefined_ps(), \ 8170309124Sdim (int const *)(addr), \ 8171309124Sdim (__v8di)(__m512i)(index), \ 8172309124Sdim (__mmask8)-1, (int)(scale)); }) 8173309124Sdim 8174309124Sdim#define _mm512_mask_i64gather_epi32(v1_old, mask, index, addr, scale) __extension__ ({\ 8175309124Sdim (__m256i)__builtin_ia32_gatherdiv16si((__v8si)(__m256i)(v1_old), \ 8176309124Sdim (int const *)(addr), \ 8177309124Sdim (__v8di)(__m512i)(index), \ 8178309124Sdim (__mmask8)(mask), (int)(scale)); }) 8179309124Sdim 8180309124Sdim#define _mm512_i64gather_pd(index, addr, scale) __extension__ ({\ 8181309124Sdim (__m512d)__builtin_ia32_gatherdiv8df((__v8df)_mm512_undefined_pd(), \ 8182309124Sdim (double const *)(addr), \ 8183309124Sdim (__v8di)(__m512i)(index), (__mmask8)-1, \ 8184309124Sdim (int)(scale)); }) 8185309124Sdim 8186309124Sdim#define _mm512_mask_i64gather_pd(v1_old, mask, index, addr, scale) __extension__ ({\ 8187309124Sdim (__m512d)__builtin_ia32_gatherdiv8df((__v8df)(__m512d)(v1_old), \ 8188309124Sdim (double const *)(addr), \ 8189309124Sdim (__v8di)(__m512i)(index), \ 8190309124Sdim (__mmask8)(mask), (int)(scale)); }) 8191309124Sdim 8192309124Sdim#define _mm512_i64gather_epi64(index, addr, scale) __extension__ ({\ 8193309124Sdim (__m512i)__builtin_ia32_gatherdiv8di((__v8di)_mm512_undefined_pd(), \ 8194309124Sdim (long long const *)(addr), \ 8195309124Sdim (__v8di)(__m512i)(index), (__mmask8)-1, \ 8196309124Sdim (int)(scale)); }) 8197309124Sdim 8198309124Sdim#define _mm512_mask_i64gather_epi64(v1_old, mask, index, addr, scale) __extension__ ({\ 8199309124Sdim (__m512i)__builtin_ia32_gatherdiv8di((__v8di)(__m512i)(v1_old), \ 8200309124Sdim (long long const *)(addr), \ 8201309124Sdim (__v8di)(__m512i)(index), \ 8202309124Sdim (__mmask8)(mask), (int)(scale)); }) 8203309124Sdim 8204309124Sdim#define _mm512_i32gather_ps(index, addr, scale) __extension__ ({\ 8205309124Sdim (__m512)__builtin_ia32_gathersiv16sf((__v16sf)_mm512_undefined_ps(), \ 8206309124Sdim (float const *)(addr), \ 8207309124Sdim (__v16sf)(__m512)(index), \ 8208309124Sdim (__mmask16)-1, (int)(scale)); }) 8209309124Sdim 8210309124Sdim#define _mm512_mask_i32gather_ps(v1_old, mask, index, addr, scale) __extension__ ({\ 8211309124Sdim (__m512)__builtin_ia32_gathersiv16sf((__v16sf)(__m512)(v1_old), \ 8212309124Sdim (float const *)(addr), \ 8213309124Sdim (__v16sf)(__m512)(index), \ 8214309124Sdim (__mmask16)(mask), (int)(scale)); }) 8215309124Sdim 8216309124Sdim#define _mm512_i32gather_epi32(index, addr, scale) __extension__ ({\ 8217309124Sdim (__m512i)__builtin_ia32_gathersiv16si((__v16si)_mm512_undefined_epi32(), \ 8218309124Sdim (int const *)(addr), \ 8219309124Sdim (__v16si)(__m512i)(index), \ 8220309124Sdim (__mmask16)-1, (int)(scale)); }) 8221309124Sdim 8222309124Sdim#define _mm512_mask_i32gather_epi32(v1_old, mask, index, addr, scale) __extension__ ({\ 8223309124Sdim (__m512i)__builtin_ia32_gathersiv16si((__v16si)(__m512i)(v1_old), \ 8224309124Sdim (int const *)(addr), \ 8225309124Sdim (__v16si)(__m512i)(index), \ 8226309124Sdim (__mmask16)(mask), (int)(scale)); }) 8227309124Sdim 8228309124Sdim#define _mm512_i32gather_pd(index, addr, scale) __extension__ ({\ 8229309124Sdim (__m512d)__builtin_ia32_gathersiv8df((__v8df)_mm512_undefined_pd(), \ 8230309124Sdim (double const *)(addr), \ 8231309124Sdim (__v8si)(__m256i)(index), (__mmask8)-1, \ 8232309124Sdim (int)(scale)); }) 8233309124Sdim 8234309124Sdim#define _mm512_mask_i32gather_pd(v1_old, mask, index, addr, scale) __extension__ ({\ 8235309124Sdim (__m512d)__builtin_ia32_gathersiv8df((__v8df)(__m512d)(v1_old), \ 8236309124Sdim (double const *)(addr), \ 8237309124Sdim (__v8si)(__m256i)(index), \ 8238309124Sdim (__mmask8)(mask), (int)(scale)); }) 8239309124Sdim 8240309124Sdim#define _mm512_i32gather_epi64(index, addr, scale) __extension__ ({\ 8241309124Sdim (__m512i)__builtin_ia32_gathersiv8di((__v8di)_mm512_undefined_epi32(), \ 8242309124Sdim (long long const *)(addr), \ 8243309124Sdim (__v8si)(__m256i)(index), (__mmask8)-1, \ 8244309124Sdim (int)(scale)); }) 8245309124Sdim 8246309124Sdim#define _mm512_mask_i32gather_epi64(v1_old, mask, index, addr, scale) __extension__ ({\ 8247309124Sdim (__m512i)__builtin_ia32_gathersiv8di((__v8di)(__m512i)(v1_old), \ 8248309124Sdim (long long const *)(addr), \ 8249309124Sdim (__v8si)(__m256i)(index), \ 8250309124Sdim (__mmask8)(mask), (int)(scale)); }) 8251309124Sdim 8252309124Sdim#define _mm512_i64scatter_ps(addr, index, v1, scale) __extension__ ({\ 8253309124Sdim __builtin_ia32_scatterdiv16sf((float *)(addr), (__mmask8)-1, \ 8254309124Sdim (__v8di)(__m512i)(index), \ 8255309124Sdim (__v8sf)(__m256)(v1), (int)(scale)); }) 8256309124Sdim 8257309124Sdim#define _mm512_mask_i64scatter_ps(addr, mask, index, v1, scale) __extension__ ({\ 8258309124Sdim __builtin_ia32_scatterdiv16sf((float *)(addr), (__mmask8)(mask), \ 8259309124Sdim (__v8di)(__m512i)(index), \ 8260309124Sdim (__v8sf)(__m256)(v1), (int)(scale)); }) 8261309124Sdim 8262309124Sdim#define _mm512_i64scatter_epi32(addr, index, v1, scale) __extension__ ({\ 8263309124Sdim __builtin_ia32_scatterdiv16si((int *)(addr), (__mmask8)-1, \ 8264309124Sdim (__v8di)(__m512i)(index), \ 8265309124Sdim (__v8si)(__m256i)(v1), (int)(scale)); }) 8266309124Sdim 8267309124Sdim#define _mm512_mask_i64scatter_epi32(addr, mask, index, v1, scale) __extension__ ({\ 8268309124Sdim __builtin_ia32_scatterdiv16si((int *)(addr), (__mmask8)(mask), \ 8269309124Sdim (__v8di)(__m512i)(index), \ 8270309124Sdim (__v8si)(__m256i)(v1), (int)(scale)); }) 8271309124Sdim 8272309124Sdim#define _mm512_i64scatter_pd(addr, index, v1, scale) __extension__ ({\ 8273309124Sdim __builtin_ia32_scatterdiv8df((double *)(addr), (__mmask8)-1, \ 8274309124Sdim (__v8di)(__m512i)(index), \ 8275309124Sdim (__v8df)(__m512d)(v1), (int)(scale)); }) 8276309124Sdim 8277309124Sdim#define _mm512_mask_i64scatter_pd(addr, mask, index, v1, scale) __extension__ ({\ 8278309124Sdim __builtin_ia32_scatterdiv8df((double *)(addr), (__mmask8)(mask), \ 8279309124Sdim (__v8di)(__m512i)(index), \ 8280309124Sdim (__v8df)(__m512d)(v1), (int)(scale)); }) 8281309124Sdim 8282309124Sdim#define _mm512_i64scatter_epi64(addr, index, v1, scale) __extension__ ({\ 8283309124Sdim __builtin_ia32_scatterdiv8di((long long *)(addr), (__mmask8)-1, \ 8284309124Sdim (__v8di)(__m512i)(index), \ 8285309124Sdim (__v8di)(__m512i)(v1), (int)(scale)); }) 8286309124Sdim 8287309124Sdim#define _mm512_mask_i64scatter_epi64(addr, mask, index, v1, scale) __extension__ ({\ 8288309124Sdim __builtin_ia32_scatterdiv8di((long long *)(addr), (__mmask8)(mask), \ 8289309124Sdim (__v8di)(__m512i)(index), \ 8290309124Sdim (__v8di)(__m512i)(v1), (int)(scale)); }) 8291309124Sdim 8292309124Sdim#define _mm512_i32scatter_ps(addr, index, v1, scale) __extension__ ({\ 8293309124Sdim __builtin_ia32_scattersiv16sf((float *)(addr), (__mmask16)-1, \ 8294309124Sdim (__v16si)(__m512i)(index), \ 8295309124Sdim (__v16sf)(__m512)(v1), (int)(scale)); }) 8296309124Sdim 8297309124Sdim#define _mm512_mask_i32scatter_ps(addr, mask, index, v1, scale) __extension__ ({\ 8298309124Sdim __builtin_ia32_scattersiv16sf((float *)(addr), (__mmask16)(mask), \ 8299309124Sdim (__v16si)(__m512i)(index), \ 8300309124Sdim (__v16sf)(__m512)(v1), (int)(scale)); }) 8301309124Sdim 8302309124Sdim#define _mm512_i32scatter_epi32(addr, index, v1, scale) __extension__ ({\ 8303309124Sdim __builtin_ia32_scattersiv16si((int *)(addr), (__mmask16)-1, \ 8304309124Sdim (__v16si)(__m512i)(index), \ 8305309124Sdim (__v16si)(__m512i)(v1), (int)(scale)); }) 8306309124Sdim 8307309124Sdim#define _mm512_mask_i32scatter_epi32(addr, mask, index, v1, scale) __extension__ ({\ 8308309124Sdim __builtin_ia32_scattersiv16si((int *)(addr), (__mmask16)(mask), \ 8309309124Sdim (__v16si)(__m512i)(index), \ 8310309124Sdim (__v16si)(__m512i)(v1), (int)(scale)); }) 8311309124Sdim 8312309124Sdim#define _mm512_i32scatter_pd(addr, index, v1, scale) __extension__ ({\ 8313309124Sdim __builtin_ia32_scattersiv8df((double *)(addr), (__mmask8)-1, \ 8314309124Sdim (__v8si)(__m256i)(index), \ 8315309124Sdim (__v8df)(__m512d)(v1), (int)(scale)); }) 8316309124Sdim 8317309124Sdim#define _mm512_mask_i32scatter_pd(addr, mask, index, v1, scale) __extension__ ({\ 8318309124Sdim __builtin_ia32_scattersiv8df((double *)(addr), (__mmask8)(mask), \ 8319309124Sdim (__v8si)(__m256i)(index), \ 8320309124Sdim (__v8df)(__m512d)(v1), (int)(scale)); }) 8321309124Sdim 8322309124Sdim#define _mm512_i32scatter_epi64(addr, index, v1, scale) __extension__ ({\ 8323309124Sdim __builtin_ia32_scattersiv8di((long long *)(addr), (__mmask8)-1, \ 8324309124Sdim (__v8si)(__m256i)(index), \ 8325309124Sdim (__v8di)(__m512i)(v1), (int)(scale)); }) 8326309124Sdim 8327309124Sdim#define _mm512_mask_i32scatter_epi64(addr, mask, index, v1, scale) __extension__ ({\ 8328309124Sdim __builtin_ia32_scattersiv8di((long long *)(addr), (__mmask8)(mask), \ 8329309124Sdim (__v8si)(__m256i)(index), \ 8330309124Sdim (__v8di)(__m512i)(v1), (int)(scale)); }) 8331309124Sdim 8332309124Sdimstatic __inline__ __m128 __DEFAULT_FN_ATTRS 8333309124Sdim_mm_mask_fmadd_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) 8334309124Sdim{ 8335314564Sdim return (__m128) __builtin_ia32_vfmaddss3_mask ((__v4sf) __W, 8336314564Sdim (__v4sf) __A, 8337309124Sdim (__v4sf) __B, 8338309124Sdim (__mmask8) __U, 8339309124Sdim _MM_FROUND_CUR_DIRECTION); 8340309124Sdim} 8341309124Sdim 8342309124Sdim#define _mm_mask_fmadd_round_ss(W, U, A, B, R) __extension__({\ 8343314564Sdim (__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(W), \ 8344314564Sdim (__v4sf)(__m128)(A), \ 8345314564Sdim (__v4sf)(__m128)(B), (__mmask8)(U), \ 8346309124Sdim (int)(R)); }) 8347309124Sdim 8348309124Sdimstatic __inline__ __m128 __DEFAULT_FN_ATTRS 8349309124Sdim_mm_maskz_fmadd_ss (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C) 8350309124Sdim{ 8351309124Sdim return (__m128) __builtin_ia32_vfmaddss3_maskz ((__v4sf) __A, 8352309124Sdim (__v4sf) __B, 8353309124Sdim (__v4sf) __C, 8354309124Sdim (__mmask8) __U, 8355309124Sdim _MM_FROUND_CUR_DIRECTION); 8356309124Sdim} 8357309124Sdim 8358309124Sdim#define _mm_maskz_fmadd_round_ss(U, A, B, C, R) __extension__ ({\ 8359309124Sdim (__m128)__builtin_ia32_vfmaddss3_maskz((__v4sf)(__m128)(A), \ 8360309124Sdim (__v4sf)(__m128)(B), \ 8361309124Sdim (__v4sf)(__m128)(C), (__mmask8)(U), \ 8362309124Sdim _MM_FROUND_CUR_DIRECTION); }) 8363309124Sdim 8364309124Sdimstatic __inline__ __m128 __DEFAULT_FN_ATTRS 8365309124Sdim_mm_mask3_fmadd_ss (__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U) 8366309124Sdim{ 8367309124Sdim return (__m128) __builtin_ia32_vfmaddss3_mask3 ((__v4sf) __W, 8368309124Sdim (__v4sf) __X, 8369309124Sdim (__v4sf) __Y, 8370309124Sdim (__mmask8) __U, 8371309124Sdim _MM_FROUND_CUR_DIRECTION); 8372309124Sdim} 8373309124Sdim 8374309124Sdim#define _mm_mask3_fmadd_round_ss(W, X, Y, U, R) __extension__ ({\ 8375309124Sdim (__m128)__builtin_ia32_vfmaddss3_mask3((__v4sf)(__m128)(W), \ 8376309124Sdim (__v4sf)(__m128)(X), \ 8377309124Sdim (__v4sf)(__m128)(Y), (__mmask8)(U), \ 8378309124Sdim (int)(R)); }) 8379309124Sdim 8380309124Sdimstatic __inline__ __m128 __DEFAULT_FN_ATTRS 8381309124Sdim_mm_mask_fmsub_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) 8382309124Sdim{ 8383314564Sdim return (__m128) __builtin_ia32_vfmaddss3_mask ((__v4sf) __W, 8384314564Sdim (__v4sf) __A, 8385309124Sdim -(__v4sf) __B, 8386309124Sdim (__mmask8) __U, 8387309124Sdim _MM_FROUND_CUR_DIRECTION); 8388309124Sdim} 8389309124Sdim 8390309124Sdim#define _mm_mask_fmsub_round_ss(W, U, A, B, R) __extension__ ({\ 8391314564Sdim (__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(W), \ 8392314564Sdim (__v4sf)(__m128)(A), \ 8393314564Sdim (__v4sf)(__m128)(B), (__mmask8)(U), \ 8394309124Sdim (int)(R)); }) 8395309124Sdim 8396309124Sdimstatic __inline__ __m128 __DEFAULT_FN_ATTRS 8397309124Sdim_mm_maskz_fmsub_ss (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C) 8398309124Sdim{ 8399309124Sdim return (__m128) __builtin_ia32_vfmaddss3_maskz ((__v4sf) __A, 8400309124Sdim (__v4sf) __B, 8401309124Sdim -(__v4sf) __C, 8402309124Sdim (__mmask8) __U, 8403309124Sdim _MM_FROUND_CUR_DIRECTION); 8404309124Sdim} 8405309124Sdim 8406309124Sdim#define _mm_maskz_fmsub_round_ss(U, A, B, C, R) __extension__ ({\ 8407309124Sdim (__m128)__builtin_ia32_vfmaddss3_maskz((__v4sf)(__m128)(A), \ 8408309124Sdim (__v4sf)(__m128)(B), \ 8409309124Sdim -(__v4sf)(__m128)(C), (__mmask8)(U), \ 8410309124Sdim (int)(R)); }) 8411309124Sdim 8412309124Sdimstatic __inline__ __m128 __DEFAULT_FN_ATTRS 8413309124Sdim_mm_mask3_fmsub_ss (__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U) 8414309124Sdim{ 8415314564Sdim return (__m128) __builtin_ia32_vfmsubss3_mask3 ((__v4sf) __W, 8416309124Sdim (__v4sf) __X, 8417314564Sdim (__v4sf) __Y, 8418309124Sdim (__mmask8) __U, 8419309124Sdim _MM_FROUND_CUR_DIRECTION); 8420309124Sdim} 8421309124Sdim 8422309124Sdim#define _mm_mask3_fmsub_round_ss(W, X, Y, U, R) __extension__ ({\ 8423314564Sdim (__m128)__builtin_ia32_vfmsubss3_mask3((__v4sf)(__m128)(W), \ 8424309124Sdim (__v4sf)(__m128)(X), \ 8425314564Sdim (__v4sf)(__m128)(Y), (__mmask8)(U), \ 8426309124Sdim (int)(R)); }) 8427309124Sdim 8428309124Sdimstatic __inline__ __m128 __DEFAULT_FN_ATTRS 8429309124Sdim_mm_mask_fnmadd_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) 8430309124Sdim{ 8431314564Sdim return (__m128) __builtin_ia32_vfmaddss3_mask ((__v4sf) __W, 8432314564Sdim -(__v4sf) __A, 8433309124Sdim (__v4sf) __B, 8434309124Sdim (__mmask8) __U, 8435309124Sdim _MM_FROUND_CUR_DIRECTION); 8436309124Sdim} 8437309124Sdim 8438309124Sdim#define _mm_mask_fnmadd_round_ss(W, U, A, B, R) __extension__ ({\ 8439314564Sdim (__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(W), \ 8440314564Sdim -(__v4sf)(__m128)(A), \ 8441314564Sdim (__v4sf)(__m128)(B), (__mmask8)(U), \ 8442309124Sdim (int)(R)); }) 8443309124Sdim 8444309124Sdimstatic __inline__ __m128 __DEFAULT_FN_ATTRS 8445309124Sdim_mm_maskz_fnmadd_ss (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C) 8446309124Sdim{ 8447309124Sdim return (__m128) __builtin_ia32_vfmaddss3_maskz (-(__v4sf) __A, 8448309124Sdim (__v4sf) __B, 8449309124Sdim (__v4sf) __C, 8450309124Sdim (__mmask8) __U, 8451309124Sdim _MM_FROUND_CUR_DIRECTION); 8452309124Sdim} 8453309124Sdim 8454309124Sdim#define _mm_maskz_fnmadd_round_ss(U, A, B, C, R) __extension__ ({\ 8455309124Sdim (__m128)__builtin_ia32_vfmaddss3_maskz(-(__v4sf)(__m128)(A), \ 8456309124Sdim (__v4sf)(__m128)(B), \ 8457309124Sdim (__v4sf)(__m128)(C), (__mmask8)(U), \ 8458309124Sdim (int)(R)); }) 8459309124Sdim 8460309124Sdimstatic __inline__ __m128 __DEFAULT_FN_ATTRS 8461309124Sdim_mm_mask3_fnmadd_ss (__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U) 8462309124Sdim{ 8463309124Sdim return (__m128) __builtin_ia32_vfmaddss3_mask3 (-(__v4sf) __W, 8464309124Sdim (__v4sf) __X, 8465309124Sdim (__v4sf) __Y, 8466309124Sdim (__mmask8) __U, 8467309124Sdim _MM_FROUND_CUR_DIRECTION); 8468309124Sdim} 8469309124Sdim 8470309124Sdim#define _mm_mask3_fnmadd_round_ss(W, X, Y, U, R) __extension__({\ 8471309124Sdim (__m128)__builtin_ia32_vfmaddss3_mask3(-(__v4sf)(__m128)(W), \ 8472309124Sdim (__v4sf)(__m128)(X), \ 8473309124Sdim (__v4sf)(__m128)(Y), (__mmask8)(U), \ 8474309124Sdim (int)(R)); }) 8475309124Sdim 8476309124Sdimstatic __inline__ __m128 __DEFAULT_FN_ATTRS 8477309124Sdim_mm_mask_fnmsub_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) 8478309124Sdim{ 8479314564Sdim return (__m128) __builtin_ia32_vfmaddss3_mask ((__v4sf) __W, 8480314564Sdim -(__v4sf) __A, 8481309124Sdim -(__v4sf) __B, 8482309124Sdim (__mmask8) __U, 8483309124Sdim _MM_FROUND_CUR_DIRECTION); 8484309124Sdim} 8485309124Sdim 8486309124Sdim#define _mm_mask_fnmsub_round_ss(W, U, A, B, R) __extension__ ({\ 8487314564Sdim (__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(W), \ 8488314564Sdim -(__v4sf)(__m128)(A), \ 8489314564Sdim -(__v4sf)(__m128)(B), (__mmask8)(U), \ 8490309124Sdim (int)(R)); }) 8491309124Sdim 8492309124Sdimstatic __inline__ __m128 __DEFAULT_FN_ATTRS 8493309124Sdim_mm_maskz_fnmsub_ss (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C) 8494309124Sdim{ 8495309124Sdim return (__m128) __builtin_ia32_vfmaddss3_maskz (-(__v4sf) __A, 8496309124Sdim (__v4sf) __B, 8497309124Sdim -(__v4sf) __C, 8498309124Sdim (__mmask8) __U, 8499309124Sdim _MM_FROUND_CUR_DIRECTION); 8500309124Sdim} 8501309124Sdim 8502309124Sdim#define _mm_maskz_fnmsub_round_ss(U, A, B, C, R) __extension__ ({\ 8503309124Sdim (__m128)__builtin_ia32_vfmaddss3_maskz(-(__v4sf)(__m128)(A), \ 8504309124Sdim (__v4sf)(__m128)(B), \ 8505309124Sdim -(__v4sf)(__m128)(C), (__mmask8)(U), \ 8506309124Sdim _MM_FROUND_CUR_DIRECTION); }) 8507309124Sdim 8508309124Sdimstatic __inline__ __m128 __DEFAULT_FN_ATTRS 8509309124Sdim_mm_mask3_fnmsub_ss (__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U) 8510309124Sdim{ 8511314564Sdim return (__m128) __builtin_ia32_vfnmsubss3_mask3 ((__v4sf) __W, 8512309124Sdim (__v4sf) __X, 8513314564Sdim (__v4sf) __Y, 8514309124Sdim (__mmask8) __U, 8515309124Sdim _MM_FROUND_CUR_DIRECTION); 8516309124Sdim} 8517309124Sdim 8518309124Sdim#define _mm_mask3_fnmsub_round_ss(W, X, Y, U, R) __extension__({\ 8519314564Sdim (__m128)__builtin_ia32_vfnmsubss3_mask3((__v4sf)(__m128)(W), \ 8520309124Sdim (__v4sf)(__m128)(X), \ 8521314564Sdim (__v4sf)(__m128)(Y), (__mmask8)(U), \ 8522309124Sdim (int)(R)); }) 8523309124Sdim 8524309124Sdimstatic __inline__ __m128d __DEFAULT_FN_ATTRS 8525309124Sdim_mm_mask_fmadd_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) 8526309124Sdim{ 8527314564Sdim return (__m128d) __builtin_ia32_vfmaddsd3_mask ( (__v2df) __W, 8528314564Sdim (__v2df) __A, 8529309124Sdim (__v2df) __B, 8530309124Sdim (__mmask8) __U, 8531309124Sdim _MM_FROUND_CUR_DIRECTION); 8532309124Sdim} 8533309124Sdim 8534309124Sdim#define _mm_mask_fmadd_round_sd(W, U, A, B, R) __extension__({\ 8535314564Sdim (__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(W), \ 8536314564Sdim (__v2df)(__m128d)(A), \ 8537314564Sdim (__v2df)(__m128d)(B), (__mmask8)(U), \ 8538309124Sdim (int)(R)); }) 8539309124Sdim 8540309124Sdimstatic __inline__ __m128d __DEFAULT_FN_ATTRS 8541309124Sdim_mm_maskz_fmadd_sd (__mmask8 __U, __m128d __A, __m128d __B, __m128d __C) 8542309124Sdim{ 8543309124Sdim return (__m128d) __builtin_ia32_vfmaddsd3_maskz ( (__v2df) __A, 8544309124Sdim (__v2df) __B, 8545309124Sdim (__v2df) __C, 8546309124Sdim (__mmask8) __U, 8547309124Sdim _MM_FROUND_CUR_DIRECTION); 8548309124Sdim} 8549309124Sdim 8550309124Sdim#define _mm_maskz_fmadd_round_sd(U, A, B, C, R) __extension__ ({\ 8551309124Sdim (__m128d)__builtin_ia32_vfmaddsd3_maskz((__v2df)(__m128d)(A), \ 8552309124Sdim (__v2df)(__m128d)(B), \ 8553309124Sdim (__v2df)(__m128d)(C), (__mmask8)(U), \ 8554309124Sdim _MM_FROUND_CUR_DIRECTION); }) 8555309124Sdim 8556309124Sdimstatic __inline__ __m128d __DEFAULT_FN_ATTRS 8557309124Sdim_mm_mask3_fmadd_sd (__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U) 8558309124Sdim{ 8559309124Sdim return (__m128d) __builtin_ia32_vfmaddsd3_mask3 ((__v2df) __W, 8560309124Sdim (__v2df) __X, 8561309124Sdim (__v2df) __Y, 8562309124Sdim (__mmask8) __U, 8563309124Sdim _MM_FROUND_CUR_DIRECTION); 8564309124Sdim} 8565309124Sdim 8566309124Sdim#define _mm_mask3_fmadd_round_sd(W, X, Y, U, R) __extension__ ({\ 8567309124Sdim (__m128d)__builtin_ia32_vfmaddsd3_mask3((__v2df)(__m128d)(W), \ 8568309124Sdim (__v2df)(__m128d)(X), \ 8569309124Sdim (__v2df)(__m128d)(Y), (__mmask8)(U), \ 8570309124Sdim (int)(R)); }) 8571309124Sdim 8572309124Sdimstatic __inline__ __m128d __DEFAULT_FN_ATTRS 8573309124Sdim_mm_mask_fmsub_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) 8574309124Sdim{ 8575314564Sdim return (__m128d) __builtin_ia32_vfmaddsd3_mask ( (__v2df) __W, 8576314564Sdim (__v2df) __A, 8577309124Sdim -(__v2df) __B, 8578309124Sdim (__mmask8) __U, 8579309124Sdim _MM_FROUND_CUR_DIRECTION); 8580309124Sdim} 8581309124Sdim 8582309124Sdim#define _mm_mask_fmsub_round_sd(W, U, A, B, R) __extension__ ({\ 8583314564Sdim (__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(W), \ 8584314564Sdim (__v2df)(__m128d)(A), \ 8585314564Sdim -(__v2df)(__m128d)(B), (__mmask8)(U), \ 8586309124Sdim (int)(R)); }) 8587309124Sdim 8588309124Sdimstatic __inline__ __m128d __DEFAULT_FN_ATTRS 8589309124Sdim_mm_maskz_fmsub_sd (__mmask8 __U, __m128d __A, __m128d __B, __m128d __C) 8590309124Sdim{ 8591309124Sdim return (__m128d) __builtin_ia32_vfmaddsd3_maskz ( (__v2df) __A, 8592309124Sdim (__v2df) __B, 8593309124Sdim -(__v2df) __C, 8594309124Sdim (__mmask8) __U, 8595309124Sdim _MM_FROUND_CUR_DIRECTION); 8596309124Sdim} 8597309124Sdim 8598309124Sdim#define _mm_maskz_fmsub_round_sd(U, A, B, C, R) __extension__ ({\ 8599309124Sdim (__m128d)__builtin_ia32_vfmaddsd3_maskz((__v2df)(__m128d)(A), \ 8600309124Sdim (__v2df)(__m128d)(B), \ 8601309124Sdim -(__v2df)(__m128d)(C), \ 8602309124Sdim (__mmask8)(U), (int)(R)); }) 8603309124Sdim 8604309124Sdimstatic __inline__ __m128d __DEFAULT_FN_ATTRS 8605309124Sdim_mm_mask3_fmsub_sd (__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U) 8606309124Sdim{ 8607314564Sdim return (__m128d) __builtin_ia32_vfmsubsd3_mask3 ((__v2df) __W, 8608309124Sdim (__v2df) __X, 8609314564Sdim (__v2df) __Y, 8610309124Sdim (__mmask8) __U, 8611309124Sdim _MM_FROUND_CUR_DIRECTION); 8612309124Sdim} 8613309124Sdim 8614309124Sdim#define _mm_mask3_fmsub_round_sd(W, X, Y, U, R) __extension__ ({\ 8615314564Sdim (__m128d)__builtin_ia32_vfmsubsd3_mask3((__v2df)(__m128d)(W), \ 8616309124Sdim (__v2df)(__m128d)(X), \ 8617314564Sdim (__v2df)(__m128d)(Y), \ 8618309124Sdim (__mmask8)(U), (int)(R)); }) 8619309124Sdim 8620309124Sdimstatic __inline__ __m128d __DEFAULT_FN_ATTRS 8621309124Sdim_mm_mask_fnmadd_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) 8622309124Sdim{ 8623314564Sdim return (__m128d) __builtin_ia32_vfmaddsd3_mask ( (__v2df) __W, 8624314564Sdim -(__v2df) __A, 8625309124Sdim (__v2df) __B, 8626309124Sdim (__mmask8) __U, 8627309124Sdim _MM_FROUND_CUR_DIRECTION); 8628309124Sdim} 8629309124Sdim 8630309124Sdim#define _mm_mask_fnmadd_round_sd(W, U, A, B, R) __extension__ ({\ 8631314564Sdim (__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(W), \ 8632314564Sdim -(__v2df)(__m128d)(A), \ 8633314564Sdim (__v2df)(__m128d)(B), (__mmask8)(U), \ 8634309124Sdim (int)(R)); }) 8635309124Sdim 8636309124Sdimstatic __inline__ __m128d __DEFAULT_FN_ATTRS 8637309124Sdim_mm_maskz_fnmadd_sd (__mmask8 __U, __m128d __A, __m128d __B, __m128d __C) 8638309124Sdim{ 8639309124Sdim return (__m128d) __builtin_ia32_vfmaddsd3_maskz ( -(__v2df) __A, 8640309124Sdim (__v2df) __B, 8641309124Sdim (__v2df) __C, 8642309124Sdim (__mmask8) __U, 8643309124Sdim _MM_FROUND_CUR_DIRECTION); 8644309124Sdim} 8645309124Sdim 8646309124Sdim#define _mm_maskz_fnmadd_round_sd(U, A, B, C, R) __extension__ ({\ 8647309124Sdim (__m128d)__builtin_ia32_vfmaddsd3_maskz(-(__v2df)(__m128d)(A), \ 8648309124Sdim (__v2df)(__m128d)(B), \ 8649309124Sdim (__v2df)(__m128d)(C), (__mmask8)(U), \ 8650309124Sdim (int)(R)); }) 8651309124Sdim 8652309124Sdimstatic __inline__ __m128d __DEFAULT_FN_ATTRS 8653309124Sdim_mm_mask3_fnmadd_sd (__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U) 8654309124Sdim{ 8655309124Sdim return (__m128d) __builtin_ia32_vfmaddsd3_mask3 (-(__v2df) __W, 8656309124Sdim (__v2df) __X, 8657309124Sdim (__v2df) __Y, 8658309124Sdim (__mmask8) __U, 8659309124Sdim _MM_FROUND_CUR_DIRECTION); 8660309124Sdim} 8661309124Sdim 8662309124Sdim#define _mm_mask3_fnmadd_round_sd(W, X, Y, U, R) __extension__({\ 8663309124Sdim (__m128d)__builtin_ia32_vfmaddsd3_mask3(-(__v2df)(__m128d)(W), \ 8664309124Sdim (__v2df)(__m128d)(X), \ 8665309124Sdim (__v2df)(__m128d)(Y), (__mmask8)(U), \ 8666309124Sdim (int)(R)); }) 8667309124Sdim 8668309124Sdimstatic __inline__ __m128d __DEFAULT_FN_ATTRS 8669309124Sdim_mm_mask_fnmsub_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) 8670309124Sdim{ 8671314564Sdim return (__m128d) __builtin_ia32_vfmaddsd3_mask ( (__v2df) __W, 8672314564Sdim -(__v2df) __A, 8673309124Sdim -(__v2df) __B, 8674309124Sdim (__mmask8) __U, 8675309124Sdim _MM_FROUND_CUR_DIRECTION); 8676309124Sdim} 8677309124Sdim 8678309124Sdim#define _mm_mask_fnmsub_round_sd(W, U, A, B, R) __extension__ ({\ 8679314564Sdim (__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(W), \ 8680314564Sdim -(__v2df)(__m128d)(A), \ 8681314564Sdim -(__v2df)(__m128d)(B), (__mmask8)(U), \ 8682309124Sdim (int)(R)); }) 8683309124Sdim 8684309124Sdimstatic __inline__ __m128d __DEFAULT_FN_ATTRS 8685309124Sdim_mm_maskz_fnmsub_sd (__mmask8 __U, __m128d __A, __m128d __B, __m128d __C) 8686309124Sdim{ 8687309124Sdim return (__m128d) __builtin_ia32_vfmaddsd3_maskz ( -(__v2df) __A, 8688309124Sdim (__v2df) __B, 8689309124Sdim -(__v2df) __C, 8690309124Sdim (__mmask8) __U, 8691309124Sdim _MM_FROUND_CUR_DIRECTION); 8692309124Sdim} 8693309124Sdim 8694309124Sdim#define _mm_maskz_fnmsub_round_sd(U, A, B, C, R) __extension__ ({\ 8695309124Sdim (__m128d)__builtin_ia32_vfmaddsd3_maskz(-(__v2df)(__m128d)(A), \ 8696309124Sdim (__v2df)(__m128d)(B), \ 8697309124Sdim -(__v2df)(__m128d)(C), \ 8698309124Sdim (__mmask8)(U), \ 8699309124Sdim _MM_FROUND_CUR_DIRECTION); }) 8700309124Sdim 8701309124Sdimstatic __inline__ __m128d __DEFAULT_FN_ATTRS 8702309124Sdim_mm_mask3_fnmsub_sd (__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U) 8703309124Sdim{ 8704314564Sdim return (__m128d) __builtin_ia32_vfnmsubsd3_mask3 ((__v2df) (__W), 8705309124Sdim (__v2df) __X, 8706314564Sdim (__v2df) (__Y), 8707309124Sdim (__mmask8) __U, 8708309124Sdim _MM_FROUND_CUR_DIRECTION); 8709309124Sdim} 8710309124Sdim 8711309124Sdim#define _mm_mask3_fnmsub_round_sd(W, X, Y, U, R) __extension__({\ 8712314564Sdim (__m128d)__builtin_ia32_vfnmsubsd3_mask3((__v2df)(__m128d)(W), \ 8713309124Sdim (__v2df)(__m128d)(X), \ 8714314564Sdim (__v2df)(__m128d)(Y), \ 8715309124Sdim (__mmask8)(U), (int)(R)); }) 8716309124Sdim 8717309124Sdim#define _mm512_permutex_pd(X, C) __extension__ ({ \ 8718309124Sdim (__m512d)__builtin_shufflevector((__v8df)(__m512d)(X), \ 8719309124Sdim (__v8df)_mm512_undefined_pd(), \ 8720309124Sdim 0 + (((C) >> 0) & 0x3), \ 8721309124Sdim 0 + (((C) >> 2) & 0x3), \ 8722309124Sdim 0 + (((C) >> 4) & 0x3), \ 8723309124Sdim 0 + (((C) >> 6) & 0x3), \ 8724309124Sdim 4 + (((C) >> 0) & 0x3), \ 8725309124Sdim 4 + (((C) >> 2) & 0x3), \ 8726309124Sdim 4 + (((C) >> 4) & 0x3), \ 8727309124Sdim 4 + (((C) >> 6) & 0x3)); }) 8728309124Sdim 8729309124Sdim#define _mm512_mask_permutex_pd(W, U, X, C) __extension__ ({ \ 8730309124Sdim (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ 8731309124Sdim (__v8df)_mm512_permutex_pd((X), (C)), \ 8732309124Sdim (__v8df)(__m512d)(W)); }) 8733309124Sdim 8734309124Sdim#define _mm512_maskz_permutex_pd(U, X, C) __extension__ ({ \ 8735309124Sdim (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ 8736309124Sdim (__v8df)_mm512_permutex_pd((X), (C)), \ 8737309124Sdim (__v8df)_mm512_setzero_pd()); }) 8738309124Sdim 8739309124Sdim#define _mm512_permutex_epi64(X, C) __extension__ ({ \ 8740309124Sdim (__m512i)__builtin_shufflevector((__v8di)(__m512i)(X), \ 8741309124Sdim (__v8di)_mm512_undefined_epi32(), \ 8742309124Sdim 0 + (((C) >> 0) & 0x3), \ 8743309124Sdim 0 + (((C) >> 2) & 0x3), \ 8744309124Sdim 0 + (((C) >> 4) & 0x3), \ 8745309124Sdim 0 + (((C) >> 6) & 0x3), \ 8746309124Sdim 4 + (((C) >> 0) & 0x3), \ 8747309124Sdim 4 + (((C) >> 2) & 0x3), \ 8748309124Sdim 4 + (((C) >> 4) & 0x3), \ 8749309124Sdim 4 + (((C) >> 6) & 0x3)); }) 8750309124Sdim 8751309124Sdim#define _mm512_mask_permutex_epi64(W, U, X, C) __extension__ ({ \ 8752309124Sdim (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \ 8753309124Sdim (__v8di)_mm512_permutex_epi64((X), (C)), \ 8754309124Sdim (__v8di)(__m512i)(W)); }) 8755309124Sdim 8756309124Sdim#define _mm512_maskz_permutex_epi64(U, X, C) __extension__ ({ \ 8757309124Sdim (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \ 8758309124Sdim (__v8di)_mm512_permutex_epi64((X), (C)), \ 8759309124Sdim (__v8di)_mm512_setzero_si512()); }) 8760309124Sdim 8761309124Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS 8762309124Sdim_mm512_permutexvar_pd (__m512i __X, __m512d __Y) 8763309124Sdim{ 8764309124Sdim return (__m512d) __builtin_ia32_permvardf512_mask ((__v8df) __Y, 8765309124Sdim (__v8di) __X, 8766309124Sdim (__v8df) _mm512_undefined_pd (), 8767309124Sdim (__mmask8) -1); 8768309124Sdim} 8769309124Sdim 8770309124Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS 8771309124Sdim_mm512_mask_permutexvar_pd (__m512d __W, __mmask8 __U, __m512i __X, __m512d __Y) 8772309124Sdim{ 8773309124Sdim return (__m512d) __builtin_ia32_permvardf512_mask ((__v8df) __Y, 8774309124Sdim (__v8di) __X, 8775309124Sdim (__v8df) __W, 8776309124Sdim (__mmask8) __U); 8777309124Sdim} 8778309124Sdim 8779309124Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS 8780309124Sdim_mm512_maskz_permutexvar_pd (__mmask8 __U, __m512i __X, __m512d __Y) 8781309124Sdim{ 8782309124Sdim return (__m512d) __builtin_ia32_permvardf512_mask ((__v8df) __Y, 8783309124Sdim (__v8di) __X, 8784309124Sdim (__v8df) _mm512_setzero_pd (), 8785309124Sdim (__mmask8) __U); 8786309124Sdim} 8787309124Sdim 8788309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 8789309124Sdim_mm512_maskz_permutexvar_epi64 (__mmask8 __M, __m512i __X, __m512i __Y) 8790309124Sdim{ 8791309124Sdim return (__m512i) __builtin_ia32_permvardi512_mask ((__v8di) __Y, 8792309124Sdim (__v8di) __X, 8793309124Sdim (__v8di) _mm512_setzero_si512 (), 8794309124Sdim __M); 8795309124Sdim} 8796309124Sdim 8797309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 8798309124Sdim_mm512_permutexvar_epi64 (__m512i __X, __m512i __Y) 8799309124Sdim{ 8800309124Sdim return (__m512i) __builtin_ia32_permvardi512_mask ((__v8di) __Y, 8801309124Sdim (__v8di) __X, 8802309124Sdim (__v8di) _mm512_undefined_epi32 (), 8803309124Sdim (__mmask8) -1); 8804309124Sdim} 8805309124Sdim 8806309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 8807309124Sdim_mm512_mask_permutexvar_epi64 (__m512i __W, __mmask8 __M, __m512i __X, 8808309124Sdim __m512i __Y) 8809309124Sdim{ 8810309124Sdim return (__m512i) __builtin_ia32_permvardi512_mask ((__v8di) __Y, 8811309124Sdim (__v8di) __X, 8812309124Sdim (__v8di) __W, 8813309124Sdim __M); 8814309124Sdim} 8815309124Sdim 8816309124Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS 8817309124Sdim_mm512_permutexvar_ps (__m512i __X, __m512 __Y) 8818309124Sdim{ 8819309124Sdim return (__m512) __builtin_ia32_permvarsf512_mask ((__v16sf) __Y, 8820309124Sdim (__v16si) __X, 8821309124Sdim (__v16sf) _mm512_undefined_ps (), 8822309124Sdim (__mmask16) -1); 8823309124Sdim} 8824309124Sdim 8825309124Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS 8826309124Sdim_mm512_mask_permutexvar_ps (__m512 __W, __mmask16 __U, __m512i __X, __m512 __Y) 8827309124Sdim{ 8828309124Sdim return (__m512) __builtin_ia32_permvarsf512_mask ((__v16sf) __Y, 8829309124Sdim (__v16si) __X, 8830309124Sdim (__v16sf) __W, 8831309124Sdim (__mmask16) __U); 8832309124Sdim} 8833309124Sdim 8834309124Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS 8835309124Sdim_mm512_maskz_permutexvar_ps (__mmask16 __U, __m512i __X, __m512 __Y) 8836309124Sdim{ 8837309124Sdim return (__m512) __builtin_ia32_permvarsf512_mask ((__v16sf) __Y, 8838309124Sdim (__v16si) __X, 8839309124Sdim (__v16sf) _mm512_setzero_ps (), 8840309124Sdim (__mmask16) __U); 8841309124Sdim} 8842309124Sdim 8843309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 8844309124Sdim_mm512_maskz_permutexvar_epi32 (__mmask16 __M, __m512i __X, __m512i __Y) 8845309124Sdim{ 8846309124Sdim return (__m512i) __builtin_ia32_permvarsi512_mask ((__v16si) __Y, 8847309124Sdim (__v16si) __X, 8848309124Sdim (__v16si) _mm512_setzero_si512 (), 8849309124Sdim __M); 8850309124Sdim} 8851309124Sdim 8852309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 8853309124Sdim_mm512_permutexvar_epi32 (__m512i __X, __m512i __Y) 8854309124Sdim{ 8855309124Sdim return (__m512i) __builtin_ia32_permvarsi512_mask ((__v16si) __Y, 8856309124Sdim (__v16si) __X, 8857309124Sdim (__v16si) _mm512_undefined_epi32 (), 8858309124Sdim (__mmask16) -1); 8859309124Sdim} 8860309124Sdim 8861309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 8862309124Sdim_mm512_mask_permutexvar_epi32 (__m512i __W, __mmask16 __M, __m512i __X, 8863309124Sdim __m512i __Y) 8864309124Sdim{ 8865309124Sdim return (__m512i) __builtin_ia32_permvarsi512_mask ((__v16si) __Y, 8866309124Sdim (__v16si) __X, 8867309124Sdim (__v16si) __W, 8868309124Sdim __M); 8869309124Sdim} 8870309124Sdim 8871309124Sdimstatic __inline__ __mmask16 __DEFAULT_FN_ATTRS 8872309124Sdim_mm512_kand (__mmask16 __A, __mmask16 __B) 8873309124Sdim{ 8874309124Sdim return (__mmask16) __builtin_ia32_kandhi ((__mmask16) __A, (__mmask16) __B); 8875309124Sdim} 8876309124Sdim 8877309124Sdimstatic __inline__ __mmask16 __DEFAULT_FN_ATTRS 8878309124Sdim_mm512_kandn (__mmask16 __A, __mmask16 __B) 8879309124Sdim{ 8880309124Sdim return (__mmask16) __builtin_ia32_kandnhi ((__mmask16) __A, (__mmask16) __B); 8881309124Sdim} 8882309124Sdim 8883309124Sdimstatic __inline__ __mmask16 __DEFAULT_FN_ATTRS 8884309124Sdim_mm512_kor (__mmask16 __A, __mmask16 __B) 8885309124Sdim{ 8886309124Sdim return (__mmask16) __builtin_ia32_korhi ((__mmask16) __A, (__mmask16) __B); 8887309124Sdim} 8888309124Sdim 8889309124Sdimstatic __inline__ int __DEFAULT_FN_ATTRS 8890309124Sdim_mm512_kortestc (__mmask16 __A, __mmask16 __B) 8891309124Sdim{ 8892309124Sdim return __builtin_ia32_kortestchi ((__mmask16) __A, (__mmask16) __B); 8893309124Sdim} 8894309124Sdim 8895309124Sdimstatic __inline__ int __DEFAULT_FN_ATTRS 8896309124Sdim_mm512_kortestz (__mmask16 __A, __mmask16 __B) 8897309124Sdim{ 8898309124Sdim return __builtin_ia32_kortestzhi ((__mmask16) __A, (__mmask16) __B); 8899309124Sdim} 8900309124Sdim 8901309124Sdimstatic __inline__ __mmask16 __DEFAULT_FN_ATTRS 8902309124Sdim_mm512_kunpackb (__mmask16 __A, __mmask16 __B) 8903309124Sdim{ 8904309124Sdim return (__mmask16) __builtin_ia32_kunpckhi ((__mmask16) __A, (__mmask16) __B); 8905309124Sdim} 8906309124Sdim 8907309124Sdimstatic __inline__ __mmask16 __DEFAULT_FN_ATTRS 8908309124Sdim_mm512_kxnor (__mmask16 __A, __mmask16 __B) 8909309124Sdim{ 8910309124Sdim return (__mmask16) __builtin_ia32_kxnorhi ((__mmask16) __A, (__mmask16) __B); 8911309124Sdim} 8912309124Sdim 8913309124Sdimstatic __inline__ __mmask16 __DEFAULT_FN_ATTRS 8914309124Sdim_mm512_kxor (__mmask16 __A, __mmask16 __B) 8915309124Sdim{ 8916309124Sdim return (__mmask16) __builtin_ia32_kxorhi ((__mmask16) __A, (__mmask16) __B); 8917309124Sdim} 8918309124Sdim 8919309124Sdimstatic __inline__ void __DEFAULT_FN_ATTRS 8920309124Sdim_mm512_stream_si512 (__m512i * __P, __m512i __A) 8921309124Sdim{ 8922309124Sdim __builtin_nontemporal_store((__v8di)__A, (__v8di*)__P); 8923309124Sdim} 8924309124Sdim 8925309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 8926309124Sdim_mm512_stream_load_si512 (void *__P) 8927309124Sdim{ 8928309124Sdim return __builtin_ia32_movntdqa512 ((__v8di *)__P); 8929309124Sdim} 8930309124Sdim 8931309124Sdimstatic __inline__ void __DEFAULT_FN_ATTRS 8932309124Sdim_mm512_stream_pd (double *__P, __m512d __A) 8933309124Sdim{ 8934309124Sdim __builtin_nontemporal_store((__v8df)__A, (__v8df*)__P); 8935309124Sdim} 8936309124Sdim 8937309124Sdimstatic __inline__ void __DEFAULT_FN_ATTRS 8938309124Sdim_mm512_stream_ps (float *__P, __m512 __A) 8939309124Sdim{ 8940309124Sdim __builtin_nontemporal_store((__v16sf)__A, (__v16sf*)__P); 8941309124Sdim} 8942309124Sdim 8943309124Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS 8944309124Sdim_mm512_mask_compress_pd (__m512d __W, __mmask8 __U, __m512d __A) 8945309124Sdim{ 8946309124Sdim return (__m512d) __builtin_ia32_compressdf512_mask ((__v8df) __A, 8947309124Sdim (__v8df) __W, 8948309124Sdim (__mmask8) __U); 8949309124Sdim} 8950309124Sdim 8951309124Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS 8952309124Sdim_mm512_maskz_compress_pd (__mmask8 __U, __m512d __A) 8953309124Sdim{ 8954309124Sdim return (__m512d) __builtin_ia32_compressdf512_mask ((__v8df) __A, 8955309124Sdim (__v8df) 8956309124Sdim _mm512_setzero_pd (), 8957309124Sdim (__mmask8) __U); 8958309124Sdim} 8959309124Sdim 8960309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 8961309124Sdim_mm512_mask_compress_epi64 (__m512i __W, __mmask8 __U, __m512i __A) 8962309124Sdim{ 8963309124Sdim return (__m512i) __builtin_ia32_compressdi512_mask ((__v8di) __A, 8964309124Sdim (__v8di) __W, 8965309124Sdim (__mmask8) __U); 8966309124Sdim} 8967309124Sdim 8968309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 8969309124Sdim_mm512_maskz_compress_epi64 (__mmask8 __U, __m512i __A) 8970309124Sdim{ 8971309124Sdim return (__m512i) __builtin_ia32_compressdi512_mask ((__v8di) __A, 8972309124Sdim (__v8di) 8973309124Sdim _mm512_setzero_si512 (), 8974309124Sdim (__mmask8) __U); 8975309124Sdim} 8976309124Sdim 8977309124Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS 8978309124Sdim_mm512_mask_compress_ps (__m512 __W, __mmask16 __U, __m512 __A) 8979309124Sdim{ 8980309124Sdim return (__m512) __builtin_ia32_compresssf512_mask ((__v16sf) __A, 8981309124Sdim (__v16sf) __W, 8982309124Sdim (__mmask16) __U); 8983309124Sdim} 8984309124Sdim 8985309124Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS 8986309124Sdim_mm512_maskz_compress_ps (__mmask16 __U, __m512 __A) 8987309124Sdim{ 8988309124Sdim return (__m512) __builtin_ia32_compresssf512_mask ((__v16sf) __A, 8989309124Sdim (__v16sf) 8990309124Sdim _mm512_setzero_ps (), 8991309124Sdim (__mmask16) __U); 8992309124Sdim} 8993309124Sdim 8994309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 8995309124Sdim_mm512_mask_compress_epi32 (__m512i __W, __mmask16 __U, __m512i __A) 8996309124Sdim{ 8997309124Sdim return (__m512i) __builtin_ia32_compresssi512_mask ((__v16si) __A, 8998309124Sdim (__v16si) __W, 8999309124Sdim (__mmask16) __U); 9000309124Sdim} 9001309124Sdim 9002309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 9003309124Sdim_mm512_maskz_compress_epi32 (__mmask16 __U, __m512i __A) 9004309124Sdim{ 9005309124Sdim return (__m512i) __builtin_ia32_compresssi512_mask ((__v16si) __A, 9006309124Sdim (__v16si) 9007309124Sdim _mm512_setzero_si512 (), 9008309124Sdim (__mmask16) __U); 9009309124Sdim} 9010309124Sdim 9011309124Sdim#define _mm_cmp_round_ss_mask(X, Y, P, R) __extension__ ({ \ 9012309124Sdim (__mmask8)__builtin_ia32_cmpss_mask((__v4sf)(__m128)(X), \ 9013309124Sdim (__v4sf)(__m128)(Y), (int)(P), \ 9014309124Sdim (__mmask8)-1, (int)(R)); }) 9015309124Sdim 9016309124Sdim#define _mm_mask_cmp_round_ss_mask(M, X, Y, P, R) __extension__ ({ \ 9017309124Sdim (__mmask8)__builtin_ia32_cmpss_mask((__v4sf)(__m128)(X), \ 9018309124Sdim (__v4sf)(__m128)(Y), (int)(P), \ 9019309124Sdim (__mmask8)(M), (int)(R)); }) 9020309124Sdim 9021309124Sdim#define _mm_cmp_ss_mask(X, Y, P) __extension__ ({ \ 9022309124Sdim (__mmask8)__builtin_ia32_cmpss_mask((__v4sf)(__m128)(X), \ 9023309124Sdim (__v4sf)(__m128)(Y), (int)(P), \ 9024309124Sdim (__mmask8)-1, \ 9025309124Sdim _MM_FROUND_CUR_DIRECTION); }) 9026309124Sdim 9027309124Sdim#define _mm_mask_cmp_ss_mask(M, X, Y, P) __extension__ ({ \ 9028309124Sdim (__mmask8)__builtin_ia32_cmpss_mask((__v4sf)(__m128)(X), \ 9029309124Sdim (__v4sf)(__m128)(Y), (int)(P), \ 9030309124Sdim (__mmask8)(M), \ 9031309124Sdim _MM_FROUND_CUR_DIRECTION); }) 9032309124Sdim 9033309124Sdim#define _mm_cmp_round_sd_mask(X, Y, P, R) __extension__ ({ \ 9034309124Sdim (__mmask8)__builtin_ia32_cmpsd_mask((__v2df)(__m128d)(X), \ 9035309124Sdim (__v2df)(__m128d)(Y), (int)(P), \ 9036309124Sdim (__mmask8)-1, (int)(R)); }) 9037309124Sdim 9038309124Sdim#define _mm_mask_cmp_round_sd_mask(M, X, Y, P, R) __extension__ ({ \ 9039309124Sdim (__mmask8)__builtin_ia32_cmpsd_mask((__v2df)(__m128d)(X), \ 9040309124Sdim (__v2df)(__m128d)(Y), (int)(P), \ 9041309124Sdim (__mmask8)(M), (int)(R)); }) 9042309124Sdim 9043309124Sdim#define _mm_cmp_sd_mask(X, Y, P) __extension__ ({ \ 9044309124Sdim (__mmask8)__builtin_ia32_cmpsd_mask((__v2df)(__m128d)(X), \ 9045309124Sdim (__v2df)(__m128d)(Y), (int)(P), \ 9046309124Sdim (__mmask8)-1, \ 9047309124Sdim _MM_FROUND_CUR_DIRECTION); }) 9048309124Sdim 9049309124Sdim#define _mm_mask_cmp_sd_mask(M, X, Y, P) __extension__ ({ \ 9050309124Sdim (__mmask8)__builtin_ia32_cmpsd_mask((__v2df)(__m128d)(X), \ 9051309124Sdim (__v2df)(__m128d)(Y), (int)(P), \ 9052309124Sdim (__mmask8)(M), \ 9053309124Sdim _MM_FROUND_CUR_DIRECTION); }) 9054309124Sdim 9055309124Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS 9056309124Sdim_mm512_movehdup_ps (__m512 __A) 9057309124Sdim{ 9058309124Sdim return (__m512)__builtin_shufflevector((__v16sf)__A, (__v16sf)__A, 9059309124Sdim 1, 1, 3, 3, 5, 5, 7, 7, 9, 9, 11, 11, 13, 13, 15, 15); 9060309124Sdim} 9061309124Sdim 9062309124Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS 9063309124Sdim_mm512_mask_movehdup_ps (__m512 __W, __mmask16 __U, __m512 __A) 9064309124Sdim{ 9065309124Sdim return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, 9066309124Sdim (__v16sf)_mm512_movehdup_ps(__A), 9067309124Sdim (__v16sf)__W); 9068309124Sdim} 9069309124Sdim 9070309124Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS 9071309124Sdim_mm512_maskz_movehdup_ps (__mmask16 __U, __m512 __A) 9072309124Sdim{ 9073309124Sdim return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, 9074309124Sdim (__v16sf)_mm512_movehdup_ps(__A), 9075309124Sdim (__v16sf)_mm512_setzero_ps()); 9076309124Sdim} 9077309124Sdim 9078309124Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS 9079309124Sdim_mm512_moveldup_ps (__m512 __A) 9080309124Sdim{ 9081309124Sdim return (__m512)__builtin_shufflevector((__v16sf)__A, (__v16sf)__A, 9082309124Sdim 0, 0, 2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12, 14, 14); 9083309124Sdim} 9084309124Sdim 9085309124Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS 9086309124Sdim_mm512_mask_moveldup_ps (__m512 __W, __mmask16 __U, __m512 __A) 9087309124Sdim{ 9088309124Sdim return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, 9089309124Sdim (__v16sf)_mm512_moveldup_ps(__A), 9090309124Sdim (__v16sf)__W); 9091309124Sdim} 9092309124Sdim 9093309124Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS 9094309124Sdim_mm512_maskz_moveldup_ps (__mmask16 __U, __m512 __A) 9095309124Sdim{ 9096309124Sdim return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, 9097309124Sdim (__v16sf)_mm512_moveldup_ps(__A), 9098309124Sdim (__v16sf)_mm512_setzero_ps()); 9099309124Sdim} 9100309124Sdim 9101314564Sdimstatic __inline__ __m128 __DEFAULT_FN_ATTRS 9102314564Sdim_mm_mask_move_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) 9103314564Sdim{ 9104314564Sdim __m128 res = __A; 9105314564Sdim res[0] = (__U & 1) ? __B[0] : __W[0]; 9106314564Sdim return res; 9107314564Sdim} 9108314564Sdim 9109314564Sdimstatic __inline__ __m128 __DEFAULT_FN_ATTRS 9110314564Sdim_mm_maskz_move_ss (__mmask8 __U, __m128 __A, __m128 __B) 9111314564Sdim{ 9112314564Sdim __m128 res = __A; 9113314564Sdim res[0] = (__U & 1) ? __B[0] : 0; 9114314564Sdim return res; 9115314564Sdim} 9116314564Sdim 9117314564Sdimstatic __inline__ __m128d __DEFAULT_FN_ATTRS 9118314564Sdim_mm_mask_move_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) 9119314564Sdim{ 9120314564Sdim __m128d res = __A; 9121314564Sdim res[0] = (__U & 1) ? __B[0] : __W[0]; 9122314564Sdim return res; 9123314564Sdim} 9124314564Sdim 9125314564Sdimstatic __inline__ __m128d __DEFAULT_FN_ATTRS 9126314564Sdim_mm_maskz_move_sd (__mmask8 __U, __m128d __A, __m128d __B) 9127314564Sdim{ 9128314564Sdim __m128d res = __A; 9129314564Sdim res[0] = (__U & 1) ? __B[0] : 0; 9130314564Sdim return res; 9131314564Sdim} 9132314564Sdim 9133314564Sdimstatic __inline__ void __DEFAULT_FN_ATTRS 9134314564Sdim_mm_mask_store_ss (float * __W, __mmask8 __U, __m128 __A) 9135314564Sdim{ 9136314564Sdim __builtin_ia32_storess128_mask ((__v16sf *)__W, 9137314564Sdim (__v16sf) _mm512_castps128_ps512(__A), 9138314564Sdim (__mmask16) __U & (__mmask16)1); 9139314564Sdim} 9140314564Sdim 9141314564Sdimstatic __inline__ void __DEFAULT_FN_ATTRS 9142314564Sdim_mm_mask_store_sd (double * __W, __mmask8 __U, __m128d __A) 9143314564Sdim{ 9144314564Sdim __builtin_ia32_storesd128_mask ((__v8df *)__W, 9145314564Sdim (__v8df) _mm512_castpd128_pd512(__A), 9146314564Sdim (__mmask8) __U & 1); 9147314564Sdim} 9148314564Sdim 9149314564Sdimstatic __inline__ __m128 __DEFAULT_FN_ATTRS 9150314564Sdim_mm_mask_load_ss (__m128 __W, __mmask8 __U, const float* __A) 9151314564Sdim{ 9152314564Sdim __m128 src = (__v4sf) __builtin_shufflevector((__v4sf) __W, 9153314564Sdim (__v4sf) {0.0, 0.0, 0.0, 0.0}, 9154314564Sdim 0, 4, 4, 4); 9155314564Sdim 9156314564Sdim return (__m128) __builtin_shufflevector( 9157314564Sdim __builtin_ia32_loadss128_mask ((__v16sf *) __A, 9158314564Sdim (__v16sf) _mm512_castps128_ps512(src), 9159314564Sdim (__mmask16) __U & 1), 9160314564Sdim _mm512_undefined_ps(), 0, 1, 2, 3); 9161314564Sdim} 9162314564Sdim 9163314564Sdimstatic __inline__ __m128 __DEFAULT_FN_ATTRS 9164314564Sdim_mm_maskz_load_ss (__mmask8 __U, const float* __A) 9165314564Sdim{ 9166314564Sdim return (__m128) __builtin_shufflevector( 9167314564Sdim __builtin_ia32_loadss128_mask ((__v16sf *) __A, 9168314564Sdim (__v16sf) _mm512_setzero_ps(), 9169314564Sdim (__mmask16) __U & 1), 9170314564Sdim _mm512_undefined_ps(), 0, 1, 2, 3); 9171314564Sdim} 9172314564Sdim 9173314564Sdimstatic __inline__ __m128d __DEFAULT_FN_ATTRS 9174314564Sdim_mm_mask_load_sd (__m128d __W, __mmask8 __U, const double* __A) 9175314564Sdim{ 9176314564Sdim __m128d src = (__v2df) __builtin_shufflevector((__v2df) __W, 9177314564Sdim (__v2df) {0.0, 0.0}, 0, 2); 9178314564Sdim 9179314564Sdim return (__m128d) __builtin_shufflevector( 9180314564Sdim __builtin_ia32_loadsd128_mask ((__v8df *) __A, 9181314564Sdim (__v8df) _mm512_castpd128_pd512(src), 9182314564Sdim (__mmask8) __U & 1), 9183314564Sdim _mm512_undefined_pd(), 0, 1); 9184314564Sdim} 9185314564Sdim 9186314564Sdimstatic __inline__ __m128d __DEFAULT_FN_ATTRS 9187314564Sdim_mm_maskz_load_sd (__mmask8 __U, const double* __A) 9188314564Sdim{ 9189314564Sdim return (__m128d) __builtin_shufflevector( 9190314564Sdim __builtin_ia32_loadsd128_mask ((__v8df *) __A, 9191314564Sdim (__v8df) _mm512_setzero_pd(), 9192314564Sdim (__mmask8) __U & 1), 9193314564Sdim _mm512_undefined_pd(), 0, 1); 9194314564Sdim} 9195314564Sdim 9196309124Sdim#define _mm512_shuffle_epi32(A, I) __extension__ ({ \ 9197309124Sdim (__m512i)__builtin_shufflevector((__v16si)(__m512i)(A), \ 9198309124Sdim (__v16si)_mm512_undefined_epi32(), \ 9199309124Sdim 0 + (((I) >> 0) & 0x3), \ 9200309124Sdim 0 + (((I) >> 2) & 0x3), \ 9201309124Sdim 0 + (((I) >> 4) & 0x3), \ 9202309124Sdim 0 + (((I) >> 6) & 0x3), \ 9203309124Sdim 4 + (((I) >> 0) & 0x3), \ 9204309124Sdim 4 + (((I) >> 2) & 0x3), \ 9205309124Sdim 4 + (((I) >> 4) & 0x3), \ 9206309124Sdim 4 + (((I) >> 6) & 0x3), \ 9207309124Sdim 8 + (((I) >> 0) & 0x3), \ 9208309124Sdim 8 + (((I) >> 2) & 0x3), \ 9209309124Sdim 8 + (((I) >> 4) & 0x3), \ 9210309124Sdim 8 + (((I) >> 6) & 0x3), \ 9211309124Sdim 12 + (((I) >> 0) & 0x3), \ 9212309124Sdim 12 + (((I) >> 2) & 0x3), \ 9213309124Sdim 12 + (((I) >> 4) & 0x3), \ 9214309124Sdim 12 + (((I) >> 6) & 0x3)); }) 9215309124Sdim 9216309124Sdim#define _mm512_mask_shuffle_epi32(W, U, A, I) __extension__ ({ \ 9217309124Sdim (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \ 9218309124Sdim (__v16si)_mm512_shuffle_epi32((A), (I)), \ 9219309124Sdim (__v16si)(__m512i)(W)); }) 9220309124Sdim 9221309124Sdim#define _mm512_maskz_shuffle_epi32(U, A, I) __extension__ ({ \ 9222309124Sdim (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \ 9223309124Sdim (__v16si)_mm512_shuffle_epi32((A), (I)), \ 9224309124Sdim (__v16si)_mm512_setzero_si512()); }) 9225309124Sdim 9226309124Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS 9227309124Sdim_mm512_mask_expand_pd (__m512d __W, __mmask8 __U, __m512d __A) 9228309124Sdim{ 9229309124Sdim return (__m512d) __builtin_ia32_expanddf512_mask ((__v8df) __A, 9230309124Sdim (__v8df) __W, 9231309124Sdim (__mmask8) __U); 9232309124Sdim} 9233309124Sdim 9234309124Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS 9235309124Sdim_mm512_maskz_expand_pd (__mmask8 __U, __m512d __A) 9236309124Sdim{ 9237309124Sdim return (__m512d) __builtin_ia32_expanddf512_mask ((__v8df) __A, 9238309124Sdim (__v8df) _mm512_setzero_pd (), 9239309124Sdim (__mmask8) __U); 9240309124Sdim} 9241309124Sdim 9242309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 9243309124Sdim_mm512_mask_expand_epi64 (__m512i __W, __mmask8 __U, __m512i __A) 9244309124Sdim{ 9245309124Sdim return (__m512i) __builtin_ia32_expanddi512_mask ((__v8di) __A, 9246309124Sdim (__v8di) __W, 9247309124Sdim (__mmask8) __U); 9248309124Sdim} 9249309124Sdim 9250309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 9251309124Sdim_mm512_maskz_expand_epi64 ( __mmask8 __U, __m512i __A) 9252309124Sdim{ 9253309124Sdim return (__m512i) __builtin_ia32_expanddi512_mask ((__v8di) __A, 9254309124Sdim (__v8di) _mm512_setzero_pd (), 9255309124Sdim (__mmask8) __U); 9256309124Sdim} 9257309124Sdim 9258309124Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS 9259309124Sdim_mm512_mask_expandloadu_pd(__m512d __W, __mmask8 __U, void const *__P) 9260309124Sdim{ 9261309124Sdim return (__m512d) __builtin_ia32_expandloaddf512_mask ((const __v8df *)__P, 9262309124Sdim (__v8df) __W, 9263309124Sdim (__mmask8) __U); 9264309124Sdim} 9265309124Sdim 9266309124Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS 9267309124Sdim_mm512_maskz_expandloadu_pd(__mmask8 __U, void const *__P) 9268309124Sdim{ 9269309124Sdim return (__m512d) __builtin_ia32_expandloaddf512_mask ((const __v8df *)__P, 9270309124Sdim (__v8df) _mm512_setzero_pd(), 9271309124Sdim (__mmask8) __U); 9272309124Sdim} 9273309124Sdim 9274309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 9275309124Sdim_mm512_mask_expandloadu_epi64(__m512i __W, __mmask8 __U, void const *__P) 9276309124Sdim{ 9277309124Sdim return (__m512i) __builtin_ia32_expandloaddi512_mask ((const __v8di *)__P, 9278309124Sdim (__v8di) __W, 9279309124Sdim (__mmask8) __U); 9280309124Sdim} 9281309124Sdim 9282309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 9283309124Sdim_mm512_maskz_expandloadu_epi64(__mmask8 __U, void const *__P) 9284309124Sdim{ 9285309124Sdim return (__m512i) __builtin_ia32_expandloaddi512_mask ((const __v8di *)__P, 9286309124Sdim (__v8di) _mm512_setzero_pd(), 9287309124Sdim (__mmask8) __U); 9288309124Sdim} 9289309124Sdim 9290309124Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS 9291309124Sdim_mm512_mask_expandloadu_ps(__m512 __W, __mmask16 __U, void const *__P) 9292309124Sdim{ 9293309124Sdim return (__m512) __builtin_ia32_expandloadsf512_mask ((const __v16sf *)__P, 9294309124Sdim (__v16sf) __W, 9295309124Sdim (__mmask16) __U); 9296309124Sdim} 9297309124Sdim 9298309124Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS 9299309124Sdim_mm512_maskz_expandloadu_ps(__mmask16 __U, void const *__P) 9300309124Sdim{ 9301309124Sdim return (__m512) __builtin_ia32_expandloadsf512_mask ((const __v16sf *)__P, 9302309124Sdim (__v16sf) _mm512_setzero_ps(), 9303309124Sdim (__mmask16) __U); 9304309124Sdim} 9305309124Sdim 9306309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 9307309124Sdim_mm512_mask_expandloadu_epi32(__m512i __W, __mmask16 __U, void const *__P) 9308309124Sdim{ 9309309124Sdim return (__m512i) __builtin_ia32_expandloadsi512_mask ((const __v16si *)__P, 9310309124Sdim (__v16si) __W, 9311309124Sdim (__mmask16) __U); 9312309124Sdim} 9313309124Sdim 9314309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 9315309124Sdim_mm512_maskz_expandloadu_epi32(__mmask16 __U, void const *__P) 9316309124Sdim{ 9317309124Sdim return (__m512i) __builtin_ia32_expandloadsi512_mask ((const __v16si *)__P, 9318309124Sdim (__v16si) _mm512_setzero_ps(), 9319309124Sdim (__mmask16) __U); 9320309124Sdim} 9321309124Sdim 9322309124Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS 9323309124Sdim_mm512_mask_expand_ps (__m512 __W, __mmask16 __U, __m512 __A) 9324309124Sdim{ 9325309124Sdim return (__m512) __builtin_ia32_expandsf512_mask ((__v16sf) __A, 9326309124Sdim (__v16sf) __W, 9327309124Sdim (__mmask16) __U); 9328309124Sdim} 9329309124Sdim 9330309124Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS 9331309124Sdim_mm512_maskz_expand_ps (__mmask16 __U, __m512 __A) 9332309124Sdim{ 9333309124Sdim return (__m512) __builtin_ia32_expandsf512_mask ((__v16sf) __A, 9334309124Sdim (__v16sf) _mm512_setzero_ps(), 9335309124Sdim (__mmask16) __U); 9336309124Sdim} 9337309124Sdim 9338309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 9339309124Sdim_mm512_mask_expand_epi32 (__m512i __W, __mmask16 __U, __m512i __A) 9340309124Sdim{ 9341309124Sdim return (__m512i) __builtin_ia32_expandsi512_mask ((__v16si) __A, 9342309124Sdim (__v16si) __W, 9343309124Sdim (__mmask16) __U); 9344309124Sdim} 9345309124Sdim 9346309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 9347309124Sdim_mm512_maskz_expand_epi32 (__mmask16 __U, __m512i __A) 9348309124Sdim{ 9349309124Sdim return (__m512i) __builtin_ia32_expandsi512_mask ((__v16si) __A, 9350309124Sdim (__v16si) _mm512_setzero_ps(), 9351309124Sdim (__mmask16) __U); 9352309124Sdim} 9353309124Sdim 9354309124Sdim#define _mm512_cvt_roundps_pd(A, R) __extension__ ({ \ 9355309124Sdim (__m512d)__builtin_ia32_cvtps2pd512_mask((__v8sf)(__m256)(A), \ 9356309124Sdim (__v8df)_mm512_undefined_pd(), \ 9357309124Sdim (__mmask8)-1, (int)(R)); }) 9358309124Sdim 9359309124Sdim#define _mm512_mask_cvt_roundps_pd(W, U, A, R) __extension__ ({ \ 9360309124Sdim (__m512d)__builtin_ia32_cvtps2pd512_mask((__v8sf)(__m256)(A), \ 9361309124Sdim (__v8df)(__m512d)(W), \ 9362309124Sdim (__mmask8)(U), (int)(R)); }) 9363309124Sdim 9364309124Sdim#define _mm512_maskz_cvt_roundps_pd(U, A, R) __extension__ ({ \ 9365309124Sdim (__m512d)__builtin_ia32_cvtps2pd512_mask((__v8sf)(__m256)(A), \ 9366309124Sdim (__v8df)_mm512_setzero_pd(), \ 9367309124Sdim (__mmask8)(U), (int)(R)); }) 9368309124Sdim 9369309124Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS 9370309124Sdim_mm512_cvtps_pd (__m256 __A) 9371309124Sdim{ 9372309124Sdim return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A, 9373309124Sdim (__v8df) 9374309124Sdim _mm512_undefined_pd (), 9375309124Sdim (__mmask8) -1, 9376309124Sdim _MM_FROUND_CUR_DIRECTION); 9377309124Sdim} 9378309124Sdim 9379309124Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS 9380309124Sdim_mm512_mask_cvtps_pd (__m512d __W, __mmask8 __U, __m256 __A) 9381309124Sdim{ 9382309124Sdim return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A, 9383309124Sdim (__v8df) __W, 9384309124Sdim (__mmask8) __U, 9385309124Sdim _MM_FROUND_CUR_DIRECTION); 9386309124Sdim} 9387309124Sdim 9388309124Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS 9389309124Sdim_mm512_maskz_cvtps_pd (__mmask8 __U, __m256 __A) 9390309124Sdim{ 9391309124Sdim return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A, 9392309124Sdim (__v8df) 9393309124Sdim _mm512_setzero_pd (), 9394309124Sdim (__mmask8) __U, 9395309124Sdim _MM_FROUND_CUR_DIRECTION); 9396309124Sdim} 9397309124Sdim 9398314564Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS 9399314564Sdim_mm512_cvtpslo_pd (__m512 __A) 9400314564Sdim{ 9401314564Sdim return (__m512) _mm512_cvtps_pd(_mm512_castps512_ps256(__A)); 9402314564Sdim} 9403314564Sdim 9404314564Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS 9405314564Sdim_mm512_mask_cvtpslo_pd (__m512d __W, __mmask8 __U, __m512 __A) 9406314564Sdim{ 9407314564Sdim return (__m512) _mm512_mask_cvtps_pd(__W, __U, _mm512_castps512_ps256(__A)); 9408314564Sdim} 9409314564Sdim 9410309124Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS 9411309124Sdim_mm512_mask_mov_pd (__m512d __W, __mmask8 __U, __m512d __A) 9412309124Sdim{ 9413309124Sdim return (__m512d) __builtin_ia32_selectpd_512 ((__mmask8) __U, 9414309124Sdim (__v8df) __A, 9415309124Sdim (__v8df) __W); 9416309124Sdim} 9417309124Sdim 9418309124Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS 9419309124Sdim_mm512_maskz_mov_pd (__mmask8 __U, __m512d __A) 9420309124Sdim{ 9421309124Sdim return (__m512d) __builtin_ia32_selectpd_512 ((__mmask8) __U, 9422309124Sdim (__v8df) __A, 9423309124Sdim (__v8df) _mm512_setzero_pd ()); 9424309124Sdim} 9425309124Sdim 9426309124Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS 9427309124Sdim_mm512_mask_mov_ps (__m512 __W, __mmask16 __U, __m512 __A) 9428309124Sdim{ 9429309124Sdim return (__m512) __builtin_ia32_selectps_512 ((__mmask16) __U, 9430309124Sdim (__v16sf) __A, 9431309124Sdim (__v16sf) __W); 9432309124Sdim} 9433309124Sdim 9434309124Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS 9435309124Sdim_mm512_maskz_mov_ps (__mmask16 __U, __m512 __A) 9436309124Sdim{ 9437309124Sdim return (__m512) __builtin_ia32_selectps_512 ((__mmask16) __U, 9438309124Sdim (__v16sf) __A, 9439309124Sdim (__v16sf) _mm512_setzero_ps ()); 9440309124Sdim} 9441309124Sdim 9442309124Sdimstatic __inline__ void __DEFAULT_FN_ATTRS 9443309124Sdim_mm512_mask_compressstoreu_pd (void *__P, __mmask8 __U, __m512d __A) 9444309124Sdim{ 9445309124Sdim __builtin_ia32_compressstoredf512_mask ((__v8df *) __P, (__v8df) __A, 9446309124Sdim (__mmask8) __U); 9447309124Sdim} 9448309124Sdim 9449309124Sdimstatic __inline__ void __DEFAULT_FN_ATTRS 9450309124Sdim_mm512_mask_compressstoreu_epi64 (void *__P, __mmask8 __U, __m512i __A) 9451309124Sdim{ 9452309124Sdim __builtin_ia32_compressstoredi512_mask ((__v8di *) __P, (__v8di) __A, 9453309124Sdim (__mmask8) __U); 9454309124Sdim} 9455309124Sdim 9456309124Sdimstatic __inline__ void __DEFAULT_FN_ATTRS 9457309124Sdim_mm512_mask_compressstoreu_ps (void *__P, __mmask16 __U, __m512 __A) 9458309124Sdim{ 9459309124Sdim __builtin_ia32_compressstoresf512_mask ((__v16sf *) __P, (__v16sf) __A, 9460309124Sdim (__mmask16) __U); 9461309124Sdim} 9462309124Sdim 9463309124Sdimstatic __inline__ void __DEFAULT_FN_ATTRS 9464309124Sdim_mm512_mask_compressstoreu_epi32 (void *__P, __mmask16 __U, __m512i __A) 9465309124Sdim{ 9466309124Sdim __builtin_ia32_compressstoresi512_mask ((__v16si *) __P, (__v16si) __A, 9467309124Sdim (__mmask16) __U); 9468309124Sdim} 9469309124Sdim 9470309124Sdim#define _mm_cvt_roundsd_ss(A, B, R) __extension__ ({ \ 9471309124Sdim (__m128)__builtin_ia32_cvtsd2ss_round_mask((__v4sf)(__m128)(A), \ 9472309124Sdim (__v2df)(__m128d)(B), \ 9473309124Sdim (__v4sf)_mm_undefined_ps(), \ 9474309124Sdim (__mmask8)-1, (int)(R)); }) 9475309124Sdim 9476309124Sdim#define _mm_mask_cvt_roundsd_ss(W, U, A, B, R) __extension__ ({ \ 9477309124Sdim (__m128)__builtin_ia32_cvtsd2ss_round_mask((__v4sf)(__m128)(A), \ 9478309124Sdim (__v2df)(__m128d)(B), \ 9479309124Sdim (__v4sf)(__m128)(W), \ 9480309124Sdim (__mmask8)(U), (int)(R)); }) 9481309124Sdim 9482309124Sdim#define _mm_maskz_cvt_roundsd_ss(U, A, B, R) __extension__ ({ \ 9483309124Sdim (__m128)__builtin_ia32_cvtsd2ss_round_mask((__v4sf)(__m128)(A), \ 9484309124Sdim (__v2df)(__m128d)(B), \ 9485309124Sdim (__v4sf)_mm_setzero_ps(), \ 9486309124Sdim (__mmask8)(U), (int)(R)); }) 9487309124Sdim 9488309124Sdimstatic __inline__ __m128 __DEFAULT_FN_ATTRS 9489309124Sdim_mm_mask_cvtsd_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128d __B) 9490309124Sdim{ 9491309124Sdim return __builtin_ia32_cvtsd2ss_round_mask ((__v4sf)(__A), 9492309124Sdim (__v2df)(__B), 9493309124Sdim (__v4sf)(__W), 9494309124Sdim (__mmask8)(__U), _MM_FROUND_CUR_DIRECTION); 9495309124Sdim} 9496309124Sdim 9497309124Sdimstatic __inline__ __m128 __DEFAULT_FN_ATTRS 9498309124Sdim_mm_maskz_cvtsd_ss (__mmask8 __U, __m128 __A, __m128d __B) 9499309124Sdim{ 9500309124Sdim return __builtin_ia32_cvtsd2ss_round_mask ((__v4sf)(__A), 9501309124Sdim (__v2df)(__B), 9502309124Sdim (__v4sf)_mm_setzero_ps(), 9503309124Sdim (__mmask8)(__U), _MM_FROUND_CUR_DIRECTION); 9504309124Sdim} 9505309124Sdim 9506309124Sdim#define _mm_cvtss_i32 _mm_cvtss_si32 9507314564Sdim#define _mm_cvtsd_i32 _mm_cvtsd_si32 9508314564Sdim#define _mm_cvti32_sd _mm_cvtsi32_sd 9509314564Sdim#define _mm_cvti32_ss _mm_cvtsi32_ss 9510314564Sdim#ifdef __x86_64__ 9511309124Sdim#define _mm_cvtss_i64 _mm_cvtss_si64 9512309124Sdim#define _mm_cvtsd_i64 _mm_cvtsd_si64 9513309124Sdim#define _mm_cvti64_sd _mm_cvtsi64_sd 9514309124Sdim#define _mm_cvti64_ss _mm_cvtsi64_ss 9515314564Sdim#endif 9516309124Sdim 9517314564Sdim#ifdef __x86_64__ 9518309124Sdim#define _mm_cvt_roundi64_sd(A, B, R) __extension__ ({ \ 9519309124Sdim (__m128d)__builtin_ia32_cvtsi2sd64((__v2df)(__m128d)(A), (long long)(B), \ 9520309124Sdim (int)(R)); }) 9521309124Sdim 9522309124Sdim#define _mm_cvt_roundsi64_sd(A, B, R) __extension__ ({ \ 9523309124Sdim (__m128d)__builtin_ia32_cvtsi2sd64((__v2df)(__m128d)(A), (long long)(B), \ 9524309124Sdim (int)(R)); }) 9525314564Sdim#endif 9526309124Sdim 9527309124Sdim#define _mm_cvt_roundsi32_ss(A, B, R) __extension__ ({ \ 9528309124Sdim (__m128)__builtin_ia32_cvtsi2ss32((__v4sf)(__m128)(A), (int)(B), (int)(R)); }) 9529309124Sdim 9530309124Sdim#define _mm_cvt_roundi32_ss(A, B, R) __extension__ ({ \ 9531309124Sdim (__m128)__builtin_ia32_cvtsi2ss32((__v4sf)(__m128)(A), (int)(B), (int)(R)); }) 9532309124Sdim 9533314564Sdim#ifdef __x86_64__ 9534309124Sdim#define _mm_cvt_roundsi64_ss(A, B, R) __extension__ ({ \ 9535309124Sdim (__m128)__builtin_ia32_cvtsi2ss64((__v4sf)(__m128)(A), (long long)(B), \ 9536309124Sdim (int)(R)); }) 9537309124Sdim 9538309124Sdim#define _mm_cvt_roundi64_ss(A, B, R) __extension__ ({ \ 9539309124Sdim (__m128)__builtin_ia32_cvtsi2ss64((__v4sf)(__m128)(A), (long long)(B), \ 9540309124Sdim (int)(R)); }) 9541314564Sdim#endif 9542309124Sdim 9543309124Sdim#define _mm_cvt_roundss_sd(A, B, R) __extension__ ({ \ 9544309124Sdim (__m128d)__builtin_ia32_cvtss2sd_round_mask((__v2df)(__m128d)(A), \ 9545309124Sdim (__v4sf)(__m128)(B), \ 9546309124Sdim (__v2df)_mm_undefined_pd(), \ 9547309124Sdim (__mmask8)-1, (int)(R)); }) 9548309124Sdim 9549309124Sdim#define _mm_mask_cvt_roundss_sd(W, U, A, B, R) __extension__ ({ \ 9550309124Sdim (__m128d)__builtin_ia32_cvtss2sd_round_mask((__v2df)(__m128d)(A), \ 9551309124Sdim (__v4sf)(__m128)(B), \ 9552309124Sdim (__v2df)(__m128d)(W), \ 9553309124Sdim (__mmask8)(U), (int)(R)); }) 9554309124Sdim 9555309124Sdim#define _mm_maskz_cvt_roundss_sd(U, A, B, R) __extension__ ({ \ 9556309124Sdim (__m128d)__builtin_ia32_cvtss2sd_round_mask((__v2df)(__m128d)(A), \ 9557309124Sdim (__v4sf)(__m128)(B), \ 9558309124Sdim (__v2df)_mm_setzero_pd(), \ 9559309124Sdim (__mmask8)(U), (int)(R)); }) 9560309124Sdim 9561309124Sdimstatic __inline__ __m128d __DEFAULT_FN_ATTRS 9562309124Sdim_mm_mask_cvtss_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128 __B) 9563309124Sdim{ 9564309124Sdim return __builtin_ia32_cvtss2sd_round_mask((__v2df)(__A), 9565309124Sdim (__v4sf)(__B), 9566309124Sdim (__v2df)(__W), 9567309124Sdim (__mmask8)(__U), _MM_FROUND_CUR_DIRECTION); 9568309124Sdim} 9569309124Sdim 9570309124Sdimstatic __inline__ __m128d __DEFAULT_FN_ATTRS 9571309124Sdim_mm_maskz_cvtss_sd (__mmask8 __U, __m128d __A, __m128 __B) 9572309124Sdim{ 9573309124Sdim return __builtin_ia32_cvtss2sd_round_mask((__v2df)(__A), 9574309124Sdim (__v4sf)(__B), 9575309124Sdim (__v2df)_mm_setzero_pd(), 9576309124Sdim (__mmask8)(__U), _MM_FROUND_CUR_DIRECTION); 9577309124Sdim} 9578309124Sdim 9579309124Sdimstatic __inline__ __m128d __DEFAULT_FN_ATTRS 9580309124Sdim_mm_cvtu32_sd (__m128d __A, unsigned __B) 9581309124Sdim{ 9582309124Sdim return (__m128d) __builtin_ia32_cvtusi2sd32 ((__v2df) __A, __B); 9583309124Sdim} 9584309124Sdim 9585314564Sdim#ifdef __x86_64__ 9586309124Sdim#define _mm_cvt_roundu64_sd(A, B, R) __extension__ ({ \ 9587309124Sdim (__m128d)__builtin_ia32_cvtusi2sd64((__v2df)(__m128d)(A), \ 9588309124Sdim (unsigned long long)(B), (int)(R)); }) 9589309124Sdim 9590309124Sdimstatic __inline__ __m128d __DEFAULT_FN_ATTRS 9591309124Sdim_mm_cvtu64_sd (__m128d __A, unsigned long long __B) 9592309124Sdim{ 9593309124Sdim return (__m128d) __builtin_ia32_cvtusi2sd64 ((__v2df) __A, __B, 9594309124Sdim _MM_FROUND_CUR_DIRECTION); 9595309124Sdim} 9596314564Sdim#endif 9597309124Sdim 9598309124Sdim#define _mm_cvt_roundu32_ss(A, B, R) __extension__ ({ \ 9599309124Sdim (__m128)__builtin_ia32_cvtusi2ss32((__v4sf)(__m128)(A), (unsigned int)(B), \ 9600309124Sdim (int)(R)); }) 9601309124Sdim 9602309124Sdimstatic __inline__ __m128 __DEFAULT_FN_ATTRS 9603309124Sdim_mm_cvtu32_ss (__m128 __A, unsigned __B) 9604309124Sdim{ 9605309124Sdim return (__m128) __builtin_ia32_cvtusi2ss32 ((__v4sf) __A, __B, 9606309124Sdim _MM_FROUND_CUR_DIRECTION); 9607309124Sdim} 9608309124Sdim 9609314564Sdim#ifdef __x86_64__ 9610309124Sdim#define _mm_cvt_roundu64_ss(A, B, R) __extension__ ({ \ 9611309124Sdim (__m128)__builtin_ia32_cvtusi2ss64((__v4sf)(__m128)(A), \ 9612309124Sdim (unsigned long long)(B), (int)(R)); }) 9613309124Sdim 9614309124Sdimstatic __inline__ __m128 __DEFAULT_FN_ATTRS 9615309124Sdim_mm_cvtu64_ss (__m128 __A, unsigned long long __B) 9616309124Sdim{ 9617309124Sdim return (__m128) __builtin_ia32_cvtusi2ss64 ((__v4sf) __A, __B, 9618309124Sdim _MM_FROUND_CUR_DIRECTION); 9619309124Sdim} 9620314564Sdim#endif 9621309124Sdim 9622309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 9623309124Sdim_mm512_mask_set1_epi32 (__m512i __O, __mmask16 __M, int __A) 9624309124Sdim{ 9625309124Sdim return (__m512i) __builtin_ia32_pbroadcastd512_gpr_mask (__A, (__v16si) __O, 9626309124Sdim __M); 9627309124Sdim} 9628309124Sdim 9629314564Sdim#ifdef __x86_64__ 9630309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 9631309124Sdim_mm512_mask_set1_epi64 (__m512i __O, __mmask8 __M, long long __A) 9632309124Sdim{ 9633309124Sdim return (__m512i) __builtin_ia32_pbroadcastq512_gpr_mask (__A, (__v8di) __O, 9634309124Sdim __M); 9635309124Sdim} 9636314564Sdim#endif 9637309124Sdim 9638309124Sdimstatic __inline __m512i __DEFAULT_FN_ATTRS 9639309124Sdim_mm512_set_epi32 (int __A, int __B, int __C, int __D, 9640309124Sdim int __E, int __F, int __G, int __H, 9641309124Sdim int __I, int __J, int __K, int __L, 9642309124Sdim int __M, int __N, int __O, int __P) 9643309124Sdim{ 9644309124Sdim return __extension__ (__m512i)(__v16si) 9645309124Sdim { __P, __O, __N, __M, __L, __K, __J, __I, 9646309124Sdim __H, __G, __F, __E, __D, __C, __B, __A }; 9647309124Sdim} 9648309124Sdim 9649309124Sdim#define _mm512_setr_epi32(e0,e1,e2,e3,e4,e5,e6,e7, \ 9650309124Sdim e8,e9,e10,e11,e12,e13,e14,e15) \ 9651309124Sdim _mm512_set_epi32((e15),(e14),(e13),(e12),(e11),(e10),(e9),(e8),(e7),(e6), \ 9652309124Sdim (e5),(e4),(e3),(e2),(e1),(e0)) 9653309124Sdim 9654309124Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 9655309124Sdim_mm512_set_epi64 (long long __A, long long __B, long long __C, 9656309124Sdim long long __D, long long __E, long long __F, 9657309124Sdim long long __G, long long __H) 9658309124Sdim{ 9659309124Sdim return __extension__ (__m512i) (__v8di) 9660309124Sdim { __H, __G, __F, __E, __D, __C, __B, __A }; 9661309124Sdim} 9662309124Sdim 9663309124Sdim#define _mm512_setr_epi64(e0,e1,e2,e3,e4,e5,e6,e7) \ 9664309124Sdim _mm512_set_epi64((e7),(e6),(e5),(e4),(e3),(e2),(e1),(e0)) 9665309124Sdim 9666309124Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS 9667309124Sdim_mm512_set_pd (double __A, double __B, double __C, double __D, 9668309124Sdim double __E, double __F, double __G, double __H) 9669309124Sdim{ 9670309124Sdim return __extension__ (__m512d) 9671309124Sdim { __H, __G, __F, __E, __D, __C, __B, __A }; 9672309124Sdim} 9673309124Sdim 9674309124Sdim#define _mm512_setr_pd(e0,e1,e2,e3,e4,e5,e6,e7) \ 9675309124Sdim _mm512_set_pd((e7),(e6),(e5),(e4),(e3),(e2),(e1),(e0)) 9676309124Sdim 9677309124Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS 9678309124Sdim_mm512_set_ps (float __A, float __B, float __C, float __D, 9679309124Sdim float __E, float __F, float __G, float __H, 9680309124Sdim float __I, float __J, float __K, float __L, 9681309124Sdim float __M, float __N, float __O, float __P) 9682309124Sdim{ 9683309124Sdim return __extension__ (__m512) 9684309124Sdim { __P, __O, __N, __M, __L, __K, __J, __I, 9685309124Sdim __H, __G, __F, __E, __D, __C, __B, __A }; 9686309124Sdim} 9687309124Sdim 9688309124Sdim#define _mm512_setr_ps(e0,e1,e2,e3,e4,e5,e6,e7,e8,e9,e10,e11,e12,e13,e14,e15) \ 9689309124Sdim _mm512_set_ps((e15),(e14),(e13),(e12),(e11),(e10),(e9),(e8),(e7),(e6),(e5), \ 9690309124Sdim (e4),(e3),(e2),(e1),(e0)) 9691309124Sdim 9692309124Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS 9693314564Sdim_mm512_abs_ps(__m512 __A) 9694309124Sdim{ 9695314564Sdim return (__m512)_mm512_and_epi32(_mm512_set1_epi32(0x7FFFFFFF),(__m512i)__A) ; 9696309124Sdim} 9697309124Sdim 9698309124Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS 9699314564Sdim_mm512_mask_abs_ps(__m512 __W, __mmask16 __K, __m512 __A) 9700309124Sdim{ 9701314564Sdim return (__m512)_mm512_mask_and_epi32((__m512i)__W, __K, _mm512_set1_epi32(0x7FFFFFFF),(__m512i)__A) ; 9702309124Sdim} 9703309124Sdim 9704309124Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS 9705314564Sdim_mm512_abs_pd(__m512d __A) 9706309124Sdim{ 9707314564Sdim return (__m512d)_mm512_and_epi64(_mm512_set1_epi64(0x7FFFFFFFFFFFFFFF),(__v8di)__A) ; 9708309124Sdim} 9709309124Sdim 9710309124Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS 9711314564Sdim_mm512_mask_abs_pd(__m512d __W, __mmask8 __K, __m512d __A) 9712309124Sdim{ 9713314564Sdim return (__m512d)_mm512_mask_and_epi64((__v8di)__W, __K, _mm512_set1_epi64(0x7FFFFFFFFFFFFFFF),(__v8di)__A); 9714309124Sdim} 9715309124Sdim 9716314564Sdim// Vector-reduction arithmetic accepts vectors as inputs and produces scalars as 9717314564Sdim// outputs. This class of vector operation forms the basis of many scientific 9718314564Sdim// computations. In vector-reduction arithmetic, the evaluation off is 9719314564Sdim// independent of the order of the input elements of V. 9720314564Sdim 9721314564Sdim// Used bisection method. At each step, we partition the vector with previous 9722314564Sdim// step in half, and the operation is performed on its two halves. 9723314564Sdim// This takes log2(n) steps where n is the number of elements in the vector. 9724314564Sdim 9725314564Sdim// Vec512 - Vector with size 512. 9726314564Sdim// Operator - Can be one of following: +,*,&,| 9727314564Sdim// T2 - Can get 'i' for int and 'f' for float. 9728314564Sdim// T1 - Can get 'i' for int and 'd' for double. 9729314564Sdim 9730314564Sdim#define _mm512_reduce_operator_64bit(Vec512, Operator, T2, T1) \ 9731314564Sdim __extension__({ \ 9732314564Sdim __m256##T1 Vec256 = __builtin_shufflevector( \ 9733314564Sdim (__v8d##T2)Vec512, \ 9734314564Sdim (__v8d##T2)Vec512, \ 9735314564Sdim 0, 1, 2, 3) \ 9736314564Sdim Operator \ 9737314564Sdim __builtin_shufflevector( \ 9738314564Sdim (__v8d##T2)Vec512, \ 9739314564Sdim (__v8d##T2)Vec512, \ 9740314564Sdim 4, 5, 6, 7); \ 9741314564Sdim __m128##T1 Vec128 = __builtin_shufflevector( \ 9742314564Sdim (__v4d##T2)Vec256, \ 9743314564Sdim (__v4d##T2)Vec256, \ 9744314564Sdim 0, 1) \ 9745314564Sdim Operator \ 9746314564Sdim __builtin_shufflevector( \ 9747314564Sdim (__v4d##T2)Vec256, \ 9748314564Sdim (__v4d##T2)Vec256, \ 9749314564Sdim 2, 3); \ 9750314564Sdim Vec128 = __builtin_shufflevector((__v2d##T2)Vec128, \ 9751314564Sdim (__v2d##T2)Vec128, 0, -1) \ 9752314564Sdim Operator \ 9753314564Sdim __builtin_shufflevector((__v2d##T2)Vec128, \ 9754314564Sdim (__v2d##T2)Vec128, 1, -1); \ 9755314564Sdim return Vec128[0]; \ 9756314564Sdim }) 9757314564Sdim 9758314564Sdimstatic __inline__ long long __DEFAULT_FN_ATTRS _mm512_reduce_add_epi64(__m512i __W) { 9759314564Sdim _mm512_reduce_operator_64bit(__W, +, i, i); 9760314564Sdim} 9761314564Sdim 9762314564Sdimstatic __inline__ long long __DEFAULT_FN_ATTRS _mm512_reduce_mul_epi64(__m512i __W) { 9763314564Sdim _mm512_reduce_operator_64bit(__W, *, i, i); 9764314564Sdim} 9765314564Sdim 9766314564Sdimstatic __inline__ long long __DEFAULT_FN_ATTRS _mm512_reduce_and_epi64(__m512i __W) { 9767314564Sdim _mm512_reduce_operator_64bit(__W, &, i, i); 9768314564Sdim} 9769314564Sdim 9770314564Sdimstatic __inline__ long long __DEFAULT_FN_ATTRS _mm512_reduce_or_epi64(__m512i __W) { 9771314564Sdim _mm512_reduce_operator_64bit(__W, |, i, i); 9772314564Sdim} 9773314564Sdim 9774314564Sdimstatic __inline__ double __DEFAULT_FN_ATTRS _mm512_reduce_add_pd(__m512d __W) { 9775314564Sdim _mm512_reduce_operator_64bit(__W, +, f, d); 9776314564Sdim} 9777314564Sdim 9778314564Sdimstatic __inline__ double __DEFAULT_FN_ATTRS _mm512_reduce_mul_pd(__m512d __W) { 9779314564Sdim _mm512_reduce_operator_64bit(__W, *, f, d); 9780314564Sdim} 9781314564Sdim 9782314564Sdim// Vec512 - Vector with size 512. 9783314564Sdim// Vec512Neutral - All vector elements set to the identity element. 9784314564Sdim// Identity element: {+,0},{*,1},{&,0xFFFFFFFFFFFFFFFF},{|,0} 9785314564Sdim// Operator - Can be one of following: +,*,&,| 9786314564Sdim// Mask - Intrinsic Mask 9787314564Sdim// T2 - Can get 'i' for int and 'f' for float. 9788314564Sdim// T1 - Can get 'i' for int and 'd' for packed double-precision. 9789314564Sdim// T3 - Can be Pd for packed double or q for q-word. 9790314564Sdim 9791314564Sdim#define _mm512_mask_reduce_operator_64bit(Vec512, Vec512Neutral, Operator, \ 9792314564Sdim Mask, T2, T1, T3) \ 9793314564Sdim __extension__({ \ 9794314564Sdim Vec512 = __builtin_ia32_select##T3##_512( \ 9795314564Sdim (__mmask8)Mask, \ 9796314564Sdim (__v8d##T2)Vec512, \ 9797314564Sdim (__v8d##T2)Vec512Neutral); \ 9798314564Sdim _mm512_reduce_operator_64bit(Vec512, Operator, T2, T1); \ 9799314564Sdim }) 9800314564Sdim 9801314564Sdimstatic __inline__ long long __DEFAULT_FN_ATTRS 9802314564Sdim_mm512_mask_reduce_add_epi64(__mmask8 __M, __m512i __W) { 9803314564Sdim _mm512_mask_reduce_operator_64bit(__W, _mm512_set1_epi64(0), +, __M, i, i, q); 9804314564Sdim} 9805314564Sdim 9806314564Sdimstatic __inline__ long long __DEFAULT_FN_ATTRS 9807314564Sdim_mm512_mask_reduce_mul_epi64(__mmask8 __M, __m512i __W) { 9808314564Sdim _mm512_mask_reduce_operator_64bit(__W, _mm512_set1_epi64(1), *, __M, i, i, q); 9809314564Sdim} 9810314564Sdim 9811314564Sdimstatic __inline__ long long __DEFAULT_FN_ATTRS 9812314564Sdim_mm512_mask_reduce_and_epi64(__mmask8 __M, __m512i __W) { 9813314564Sdim _mm512_mask_reduce_operator_64bit(__W, _mm512_set1_epi64(0xFFFFFFFFFFFFFFFF), 9814314564Sdim &, __M, i, i, q); 9815314564Sdim} 9816314564Sdim 9817314564Sdimstatic __inline__ long long __DEFAULT_FN_ATTRS 9818314564Sdim_mm512_mask_reduce_or_epi64(__mmask8 __M, __m512i __W) { 9819314564Sdim _mm512_mask_reduce_operator_64bit(__W, _mm512_set1_epi64(0), |, __M, 9820314564Sdim i, i, q); 9821314564Sdim} 9822314564Sdim 9823314564Sdimstatic __inline__ double __DEFAULT_FN_ATTRS 9824314564Sdim_mm512_mask_reduce_add_pd(__mmask8 __M, __m512d __W) { 9825314564Sdim _mm512_mask_reduce_operator_64bit(__W, _mm512_set1_pd(0), +, __M, 9826314564Sdim f, d, pd); 9827314564Sdim} 9828314564Sdim 9829314564Sdimstatic __inline__ double __DEFAULT_FN_ATTRS 9830314564Sdim_mm512_mask_reduce_mul_pd(__mmask8 __M, __m512d __W) { 9831314564Sdim _mm512_mask_reduce_operator_64bit(__W, _mm512_set1_pd(1), *, __M, 9832314564Sdim f, d, pd); 9833314564Sdim} 9834314564Sdim 9835314564Sdim// Vec512 - Vector with size 512. 9836314564Sdim// Operator - Can be one of following: +,*,&,| 9837314564Sdim// T2 - Can get 'i' for int and ' ' for packed single. 9838314564Sdim// T1 - Can get 'i' for int and 'f' for float. 9839314564Sdim 9840314564Sdim#define _mm512_reduce_operator_32bit(Vec512, Operator, T2, T1) __extension__({ \ 9841314564Sdim __m256##T1 Vec256 = \ 9842314564Sdim (__m256##T1)(__builtin_shufflevector( \ 9843314564Sdim (__v16s##T2)Vec512, \ 9844314564Sdim (__v16s##T2)Vec512, \ 9845314564Sdim 0, 1, 2, 3, 4, 5, 6, 7) \ 9846314564Sdim Operator \ 9847314564Sdim __builtin_shufflevector( \ 9848314564Sdim (__v16s##T2)Vec512, \ 9849314564Sdim (__v16s##T2)Vec512, \ 9850314564Sdim 8, 9, 10, 11, 12, 13, 14, 15)); \ 9851314564Sdim __m128##T1 Vec128 = \ 9852314564Sdim (__m128##T1)(__builtin_shufflevector( \ 9853314564Sdim (__v8s##T2)Vec256, \ 9854314564Sdim (__v8s##T2)Vec256, \ 9855314564Sdim 0, 1, 2, 3) \ 9856314564Sdim Operator \ 9857314564Sdim __builtin_shufflevector( \ 9858314564Sdim (__v8s##T2)Vec256, \ 9859314564Sdim (__v8s##T2)Vec256, \ 9860314564Sdim 4, 5, 6, 7)); \ 9861314564Sdim Vec128 = (__m128##T1)(__builtin_shufflevector( \ 9862314564Sdim (__v4s##T2)Vec128, \ 9863314564Sdim (__v4s##T2)Vec128, \ 9864314564Sdim 0, 1, -1, -1) \ 9865314564Sdim Operator \ 9866314564Sdim __builtin_shufflevector( \ 9867314564Sdim (__v4s##T2)Vec128, \ 9868314564Sdim (__v4s##T2)Vec128, \ 9869314564Sdim 2, 3, -1, -1)); \ 9870314564Sdim Vec128 = (__m128##T1)(__builtin_shufflevector( \ 9871314564Sdim (__v4s##T2)Vec128, \ 9872314564Sdim (__v4s##T2)Vec128, \ 9873314564Sdim 0, -1, -1, -1) \ 9874314564Sdim Operator \ 9875314564Sdim __builtin_shufflevector( \ 9876314564Sdim (__v4s##T2)Vec128, \ 9877314564Sdim (__v4s##T2)Vec128, \ 9878314564Sdim 1, -1, -1, -1)); \ 9879314564Sdim return Vec128[0]; \ 9880314564Sdim }) 9881314564Sdim 9882314564Sdimstatic __inline__ int __DEFAULT_FN_ATTRS 9883314564Sdim_mm512_reduce_add_epi32(__m512i __W) { 9884314564Sdim _mm512_reduce_operator_32bit(__W, +, i, i); 9885314564Sdim} 9886314564Sdim 9887314564Sdimstatic __inline__ int __DEFAULT_FN_ATTRS 9888314564Sdim_mm512_reduce_mul_epi32(__m512i __W) { 9889314564Sdim _mm512_reduce_operator_32bit(__W, *, i, i); 9890314564Sdim} 9891314564Sdim 9892314564Sdimstatic __inline__ int __DEFAULT_FN_ATTRS 9893314564Sdim_mm512_reduce_and_epi32(__m512i __W) { 9894314564Sdim _mm512_reduce_operator_32bit(__W, &, i, i); 9895314564Sdim} 9896314564Sdim 9897314564Sdimstatic __inline__ int __DEFAULT_FN_ATTRS 9898314564Sdim_mm512_reduce_or_epi32(__m512i __W) { 9899314564Sdim _mm512_reduce_operator_32bit(__W, |, i, i); 9900314564Sdim} 9901314564Sdim 9902314564Sdimstatic __inline__ float __DEFAULT_FN_ATTRS 9903314564Sdim_mm512_reduce_add_ps(__m512 __W) { 9904314564Sdim _mm512_reduce_operator_32bit(__W, +, f, ); 9905314564Sdim} 9906314564Sdim 9907314564Sdimstatic __inline__ float __DEFAULT_FN_ATTRS 9908314564Sdim_mm512_reduce_mul_ps(__m512 __W) { 9909314564Sdim _mm512_reduce_operator_32bit(__W, *, f, ); 9910314564Sdim} 9911314564Sdim 9912314564Sdim// Vec512 - Vector with size 512. 9913314564Sdim// Vec512Neutral - All vector elements set to the identity element. 9914314564Sdim// Identity element: {+,0},{*,1},{&,0xFFFFFFFF},{|,0} 9915314564Sdim// Operator - Can be one of following: +,*,&,| 9916314564Sdim// Mask - Intrinsic Mask 9917314564Sdim// T2 - Can get 'i' for int and 'f' for float. 9918314564Sdim// T1 - Can get 'i' for int and 'd' for double. 9919314564Sdim// T3 - Can be Ps for packed single or d for d-word. 9920314564Sdim 9921314564Sdim#define _mm512_mask_reduce_operator_32bit(Vec512, Vec512Neutral, Operator, \ 9922314564Sdim Mask, T2, T1, T3) \ 9923314564Sdim __extension__({ \ 9924314564Sdim Vec512 = (__m512##T1)__builtin_ia32_select##T3##_512( \ 9925314564Sdim (__mmask16)Mask, \ 9926314564Sdim (__v16s##T2)Vec512, \ 9927314564Sdim (__v16s##T2)Vec512Neutral); \ 9928314564Sdim _mm512_reduce_operator_32bit(Vec512, Operator, T2, T1); \ 9929314564Sdim }) 9930314564Sdim 9931314564Sdimstatic __inline__ int __DEFAULT_FN_ATTRS 9932314564Sdim_mm512_mask_reduce_add_epi32( __mmask16 __M, __m512i __W) { 9933314564Sdim _mm512_mask_reduce_operator_32bit(__W, _mm512_set1_epi32(0), +, __M, i, i, d); 9934314564Sdim} 9935314564Sdim 9936314564Sdimstatic __inline__ int __DEFAULT_FN_ATTRS 9937314564Sdim_mm512_mask_reduce_mul_epi32( __mmask16 __M, __m512i __W) { 9938314564Sdim _mm512_mask_reduce_operator_32bit(__W, _mm512_set1_epi32(1), *, __M, i, i, d); 9939314564Sdim} 9940314564Sdim 9941314564Sdimstatic __inline__ int __DEFAULT_FN_ATTRS 9942314564Sdim_mm512_mask_reduce_and_epi32( __mmask16 __M, __m512i __W) { 9943314564Sdim _mm512_mask_reduce_operator_32bit(__W, _mm512_set1_epi32(0xFFFFFFFF), &, __M, 9944314564Sdim i, i, d); 9945314564Sdim} 9946314564Sdim 9947314564Sdimstatic __inline__ int __DEFAULT_FN_ATTRS 9948314564Sdim_mm512_mask_reduce_or_epi32(__mmask16 __M, __m512i __W) { 9949314564Sdim _mm512_mask_reduce_operator_32bit(__W, _mm512_set1_epi32(0), |, __M, i, i, d); 9950314564Sdim} 9951314564Sdim 9952314564Sdimstatic __inline__ float __DEFAULT_FN_ATTRS 9953314564Sdim_mm512_mask_reduce_add_ps(__mmask16 __M, __m512 __W) { 9954314564Sdim _mm512_mask_reduce_operator_32bit(__W, _mm512_set1_ps(0), +, __M, f, , ps); 9955314564Sdim} 9956314564Sdim 9957314564Sdimstatic __inline__ float __DEFAULT_FN_ATTRS 9958314564Sdim_mm512_mask_reduce_mul_ps(__mmask16 __M, __m512 __W) { 9959314564Sdim _mm512_mask_reduce_operator_32bit(__W, _mm512_set1_ps(1), *, __M, f, , ps); 9960314564Sdim} 9961314564Sdim 9962314564Sdim// Used bisection method. At each step, we partition the vector with previous 9963314564Sdim// step in half, and the operation is performed on its two halves. 9964314564Sdim// This takes log2(n) steps where n is the number of elements in the vector. 9965314564Sdim// This macro uses only intrinsics from the AVX512F feature. 9966314564Sdim 9967314564Sdim// Vec512 - Vector with size of 512. 9968314564Sdim// IntrinName - Can be one of following: {max|min}_{epi64|epu64|pd} for example: 9969314564Sdim// __mm512_max_epi64 9970314564Sdim// T1 - Can get 'i' for int and 'd' for double.[__m512{i|d}] 9971314564Sdim// T2 - Can get 'i' for int and 'f' for float. [__v8d{i|f}] 9972314564Sdim 9973314564Sdim#define _mm512_reduce_maxMin_64bit(Vec512, IntrinName, T1, T2) __extension__({ \ 9974314564Sdim Vec512 = _mm512_##IntrinName( \ 9975314564Sdim (__m512##T1)__builtin_shufflevector( \ 9976314564Sdim (__v8d##T2)Vec512, \ 9977314564Sdim (__v8d##T2)Vec512, \ 9978314564Sdim 0, 1, 2, 3, -1, -1, -1, -1), \ 9979314564Sdim (__m512##T1)__builtin_shufflevector( \ 9980314564Sdim (__v8d##T2)Vec512, \ 9981314564Sdim (__v8d##T2)Vec512, \ 9982314564Sdim 4, 5, 6, 7, -1, -1, -1, -1)); \ 9983314564Sdim Vec512 = _mm512_##IntrinName( \ 9984314564Sdim (__m512##T1)__builtin_shufflevector( \ 9985314564Sdim (__v8d##T2)Vec512, \ 9986314564Sdim (__v8d##T2)Vec512, \ 9987314564Sdim 0, 1, -1, -1, -1, -1, -1, -1),\ 9988314564Sdim (__m512##T1)__builtin_shufflevector( \ 9989314564Sdim (__v8d##T2)Vec512, \ 9990314564Sdim (__v8d##T2)Vec512, \ 9991314564Sdim 2, 3, -1, -1, -1, -1, -1, \ 9992314564Sdim -1)); \ 9993314564Sdim Vec512 = _mm512_##IntrinName( \ 9994314564Sdim (__m512##T1)__builtin_shufflevector( \ 9995314564Sdim (__v8d##T2)Vec512, \ 9996314564Sdim (__v8d##T2)Vec512, \ 9997314564Sdim 0, -1, -1, -1, -1, -1, -1, -1),\ 9998314564Sdim (__m512##T1)__builtin_shufflevector( \ 9999314564Sdim (__v8d##T2)Vec512, \ 10000314564Sdim (__v8d##T2)Vec512, \ 10001314564Sdim 1, -1, -1, -1, -1, -1, -1, -1))\ 10002314564Sdim ; \ 10003314564Sdim return Vec512[0]; \ 10004314564Sdim }) 10005314564Sdim 10006314564Sdimstatic __inline__ long long __DEFAULT_FN_ATTRS 10007314564Sdim_mm512_reduce_max_epi64(__m512i __V) { 10008314564Sdim _mm512_reduce_maxMin_64bit(__V, max_epi64, i, i); 10009314564Sdim} 10010314564Sdim 10011314564Sdimstatic __inline__ unsigned long long __DEFAULT_FN_ATTRS 10012314564Sdim_mm512_reduce_max_epu64(__m512i __V) { 10013314564Sdim _mm512_reduce_maxMin_64bit(__V, max_epu64, i, i); 10014314564Sdim} 10015314564Sdim 10016314564Sdimstatic __inline__ double __DEFAULT_FN_ATTRS 10017314564Sdim_mm512_reduce_max_pd(__m512d __V) { 10018314564Sdim _mm512_reduce_maxMin_64bit(__V, max_pd, d, f); 10019314564Sdim} 10020314564Sdim 10021314564Sdimstatic __inline__ long long __DEFAULT_FN_ATTRS _mm512_reduce_min_epi64 10022314564Sdim(__m512i __V) { 10023314564Sdim _mm512_reduce_maxMin_64bit(__V, min_epi64, i, i); 10024314564Sdim} 10025314564Sdim 10026314564Sdimstatic __inline__ unsigned long long __DEFAULT_FN_ATTRS 10027314564Sdim_mm512_reduce_min_epu64(__m512i __V) { 10028314564Sdim _mm512_reduce_maxMin_64bit(__V, min_epu64, i, i); 10029314564Sdim} 10030314564Sdim 10031314564Sdimstatic __inline__ double __DEFAULT_FN_ATTRS 10032314564Sdim_mm512_reduce_min_pd(__m512d __V) { 10033314564Sdim _mm512_reduce_maxMin_64bit(__V, min_pd, d, f); 10034314564Sdim} 10035314564Sdim 10036314564Sdim// Vec512 - Vector with size 512. 10037314564Sdim// Vec512Neutral - A 512 length vector with elements set to the identity element 10038314564Sdim// Identity element: {max_epi,0x8000000000000000} 10039314564Sdim// {max_epu,0x0000000000000000} 10040314564Sdim// {max_pd, 0xFFF0000000000000} 10041314564Sdim// {min_epi,0x7FFFFFFFFFFFFFFF} 10042314564Sdim// {min_epu,0xFFFFFFFFFFFFFFFF} 10043314564Sdim// {min_pd, 0x7FF0000000000000} 10044314564Sdim// 10045314564Sdim// IntrinName - Can be one of following: {max|min}_{epi64|epu64|pd} for example: 10046314564Sdim// __mm512_max_epi64 10047314564Sdim// T1 - Can get 'i' for int and 'd' for double.[__m512{i|d}] 10048314564Sdim// T2 - Can get 'i' for int and 'f' for float. [__v8d{i|f}] 10049314564Sdim// T3 - Can get 'q' q word and 'pd' for packed double. 10050314564Sdim// [__builtin_ia32_select{q|pd}_512] 10051314564Sdim// Mask - Intrinsic Mask 10052314564Sdim 10053314564Sdim#define _mm512_mask_reduce_maxMin_64bit(Vec512, Vec512Neutral, IntrinName, T1, \ 10054314564Sdim T2, T3, Mask) \ 10055314564Sdim __extension__({ \ 10056314564Sdim Vec512 = (__m512##T1)__builtin_ia32_select##T3##_512( \ 10057314564Sdim (__mmask8)Mask, \ 10058314564Sdim (__v8d##T2)Vec512, \ 10059314564Sdim (__v8d##T2)Vec512Neutral); \ 10060314564Sdim _mm512_reduce_maxMin_64bit(Vec512, IntrinName, T1, T2); \ 10061314564Sdim }) 10062314564Sdim 10063314564Sdimstatic __inline__ long long __DEFAULT_FN_ATTRS 10064314564Sdim_mm512_mask_reduce_max_epi64(__mmask8 __M, __m512i __V) { 10065314564Sdim _mm512_mask_reduce_maxMin_64bit(__V, _mm512_set1_epi64(0x8000000000000000), 10066314564Sdim max_epi64, i, i, q, __M); 10067314564Sdim} 10068314564Sdim 10069314564Sdimstatic __inline__ unsigned long long __DEFAULT_FN_ATTRS 10070314564Sdim_mm512_mask_reduce_max_epu64(__mmask8 __M, __m512i __V) { 10071314564Sdim _mm512_mask_reduce_maxMin_64bit(__V, _mm512_set1_epi64(0x0000000000000000), 10072314564Sdim max_epu64, i, i, q, __M); 10073314564Sdim} 10074314564Sdim 10075314564Sdimstatic __inline__ double __DEFAULT_FN_ATTRS 10076314564Sdim_mm512_mask_reduce_max_pd(__mmask8 __M, __m512d __V) { 10077314564Sdim _mm512_mask_reduce_maxMin_64bit(__V, -_mm512_set1_pd(__builtin_inf()), 10078314564Sdim max_pd, d, f, pd, __M); 10079314564Sdim} 10080314564Sdim 10081314564Sdimstatic __inline__ long long __DEFAULT_FN_ATTRS 10082314564Sdim_mm512_mask_reduce_min_epi64(__mmask8 __M, __m512i __V) { 10083314564Sdim _mm512_mask_reduce_maxMin_64bit(__V, _mm512_set1_epi64(0x7FFFFFFFFFFFFFFF), 10084314564Sdim min_epi64, i, i, q, __M); 10085314564Sdim} 10086314564Sdim 10087314564Sdimstatic __inline__ unsigned long long __DEFAULT_FN_ATTRS 10088314564Sdim_mm512_mask_reduce_min_epu64(__mmask8 __M, __m512i __V) { 10089314564Sdim _mm512_mask_reduce_maxMin_64bit(__V, _mm512_set1_epi64(0xFFFFFFFFFFFFFFFF), 10090314564Sdim min_epu64, i, i, q, __M); 10091314564Sdim} 10092314564Sdim 10093314564Sdimstatic __inline__ double __DEFAULT_FN_ATTRS 10094314564Sdim_mm512_mask_reduce_min_pd(__mmask8 __M, __m512d __V) { 10095314564Sdim _mm512_mask_reduce_maxMin_64bit(__V, _mm512_set1_pd(__builtin_inf()), 10096314564Sdim min_pd, d, f, pd, __M); 10097314564Sdim} 10098314564Sdim 10099314564Sdim// Vec512 - Vector with size 512. 10100314564Sdim// IntrinName - Can be one of following: {max|min}_{epi32|epu32|ps} for example: 10101314564Sdim// __mm512_max_epi32 10102314564Sdim// T1 - Can get 'i' for int and ' ' .[__m512{i|}] 10103314564Sdim// T2 - Can get 'i' for int and 'f' for float.[__v16s{i|f}] 10104314564Sdim 10105314564Sdim#define _mm512_reduce_maxMin_32bit(Vec512, IntrinName, T1, T2) __extension__({ \ 10106314564Sdim Vec512 = _mm512_##IntrinName( \ 10107314564Sdim (__m512##T1)__builtin_shufflevector( \ 10108314564Sdim (__v16s##T2)Vec512, \ 10109314564Sdim (__v16s##T2)Vec512, \ 10110314564Sdim 0, 1, 2, 3, 4, 5, 6, 7, \ 10111314564Sdim -1, -1, -1, -1, -1, -1, -1, -1), \ 10112314564Sdim (__m512##T1)__builtin_shufflevector( \ 10113314564Sdim (__v16s##T2)Vec512, \ 10114314564Sdim (__v16s##T2)Vec512, \ 10115314564Sdim 8, 9, 10, 11, 12, 13, 14, 15, \ 10116314564Sdim -1, -1, -1, -1, -1, -1, -1, -1)); \ 10117314564Sdim Vec512 = _mm512_##IntrinName( \ 10118314564Sdim (__m512##T1)__builtin_shufflevector( \ 10119314564Sdim (__v16s##T2)Vec512, \ 10120314564Sdim (__v16s##T2)Vec512, \ 10121314564Sdim 0, 1, 2, 3, -1, -1, -1, -1, \ 10122314564Sdim -1, -1, -1, -1, -1, -1, -1, -1), \ 10123314564Sdim (__m512##T1)__builtin_shufflevector( \ 10124314564Sdim (__v16s##T2)Vec512, \ 10125314564Sdim (__v16s##T2)Vec512, \ 10126314564Sdim 4, 5, 6, 7, -1, -1, -1, -1, \ 10127314564Sdim -1, -1, -1, -1, -1, -1, -1, -1)); \ 10128314564Sdim Vec512 = _mm512_##IntrinName( \ 10129314564Sdim (__m512##T1)__builtin_shufflevector( \ 10130314564Sdim (__v16s##T2)Vec512, \ 10131314564Sdim (__v16s##T2)Vec512, \ 10132314564Sdim 0, 1, -1, -1, -1, -1, -1, -1, \ 10133314564Sdim -1, -1, -1, -1, -1, -1, -1, -1), \ 10134314564Sdim (__m512##T1)__builtin_shufflevector( \ 10135314564Sdim (__v16s##T2)Vec512, \ 10136314564Sdim (__v16s##T2)Vec512, \ 10137314564Sdim 2, 3, -1, -1, -1, -1, -1, -1, \ 10138314564Sdim -1, -1, -1, -1, -1, -1, -1, -1)); \ 10139314564Sdim Vec512 = _mm512_##IntrinName( \ 10140314564Sdim (__m512##T1)__builtin_shufflevector( \ 10141314564Sdim (__v16s##T2)Vec512, \ 10142314564Sdim (__v16s##T2)Vec512, \ 10143314564Sdim 0, -1, -1, -1, -1, -1, -1, -1, \ 10144314564Sdim -1, -1, -1, -1, -1, -1, -1, -1), \ 10145314564Sdim (__m512##T1)__builtin_shufflevector( \ 10146314564Sdim (__v16s##T2)Vec512, \ 10147314564Sdim (__v16s##T2)Vec512, \ 10148314564Sdim 1, -1, -1, -1, -1, -1, -1, -1, \ 10149314564Sdim -1, -1, -1, -1, -1, -1, -1, -1)); \ 10150314564Sdim return Vec512[0]; \ 10151314564Sdim }) 10152314564Sdim 10153314564Sdimstatic __inline__ int __DEFAULT_FN_ATTRS _mm512_reduce_max_epi32(__m512i a) { 10154314564Sdim _mm512_reduce_maxMin_32bit(a, max_epi32, i, i); 10155314564Sdim} 10156314564Sdim 10157314564Sdimstatic __inline__ unsigned int __DEFAULT_FN_ATTRS 10158314564Sdim_mm512_reduce_max_epu32(__m512i a) { 10159314564Sdim _mm512_reduce_maxMin_32bit(a, max_epu32, i, i); 10160314564Sdim} 10161314564Sdim 10162314564Sdimstatic __inline__ float __DEFAULT_FN_ATTRS _mm512_reduce_max_ps(__m512 a) { 10163314564Sdim _mm512_reduce_maxMin_32bit(a, max_ps, , f); 10164314564Sdim} 10165314564Sdim 10166314564Sdimstatic __inline__ int __DEFAULT_FN_ATTRS _mm512_reduce_min_epi32(__m512i a) { 10167314564Sdim _mm512_reduce_maxMin_32bit(a, min_epi32, i, i); 10168314564Sdim} 10169314564Sdim 10170314564Sdimstatic __inline__ unsigned int __DEFAULT_FN_ATTRS 10171314564Sdim_mm512_reduce_min_epu32(__m512i a) { 10172314564Sdim _mm512_reduce_maxMin_32bit(a, min_epu32, i, i); 10173314564Sdim} 10174314564Sdim 10175314564Sdimstatic __inline__ float __DEFAULT_FN_ATTRS _mm512_reduce_min_ps(__m512 a) { 10176314564Sdim _mm512_reduce_maxMin_32bit(a, min_ps, , f); 10177314564Sdim} 10178314564Sdim 10179314564Sdim// Vec512 - Vector with size 512. 10180314564Sdim// Vec512Neutral - A 512 length vector with elements set to the identity element 10181314564Sdim// Identity element: {max_epi,0x80000000} 10182314564Sdim// {max_epu,0x00000000} 10183314564Sdim// {max_ps, 0xFF800000} 10184314564Sdim// {min_epi,0x7FFFFFFF} 10185314564Sdim// {min_epu,0xFFFFFFFF} 10186314564Sdim// {min_ps, 0x7F800000} 10187314564Sdim// 10188314564Sdim// IntrinName - Can be one of following: {max|min}_{epi32|epu32|ps} for example: 10189314564Sdim// __mm512_max_epi32 10190314564Sdim// T1 - Can get 'i' for int and ' ' .[__m512{i|}] 10191314564Sdim// T2 - Can get 'i' for int and 'f' for float.[__v16s{i|f}] 10192314564Sdim// T3 - Can get 'q' q word and 'pd' for packed double. 10193314564Sdim// [__builtin_ia32_select{q|pd}_512] 10194314564Sdim// Mask - Intrinsic Mask 10195314564Sdim 10196314564Sdim#define _mm512_mask_reduce_maxMin_32bit(Vec512, Vec512Neutral, IntrinName, T1, \ 10197314564Sdim T2, T3, Mask) \ 10198314564Sdim __extension__({ \ 10199314564Sdim Vec512 = (__m512##T1)__builtin_ia32_select##T3##_512( \ 10200314564Sdim (__mmask16)Mask, \ 10201314564Sdim (__v16s##T2)Vec512, \ 10202314564Sdim (__v16s##T2)Vec512Neutral); \ 10203314564Sdim _mm512_reduce_maxMin_32bit(Vec512, IntrinName, T1, T2); \ 10204314564Sdim }) 10205314564Sdim 10206314564Sdimstatic __inline__ int __DEFAULT_FN_ATTRS 10207314564Sdim_mm512_mask_reduce_max_epi32(__mmask16 __M, __m512i __V) { 10208314564Sdim _mm512_mask_reduce_maxMin_32bit(__V, _mm512_set1_epi32(0x80000000), max_epi32, 10209314564Sdim i, i, d, __M); 10210314564Sdim} 10211314564Sdim 10212314564Sdimstatic __inline__ unsigned int __DEFAULT_FN_ATTRS 10213314564Sdim_mm512_mask_reduce_max_epu32(__mmask16 __M, __m512i __V) { 10214314564Sdim _mm512_mask_reduce_maxMin_32bit(__V, _mm512_set1_epi32(0x00000000), max_epu32, 10215314564Sdim i, i, d, __M); 10216314564Sdim} 10217314564Sdim 10218314564Sdimstatic __inline__ float __DEFAULT_FN_ATTRS 10219314564Sdim_mm512_mask_reduce_max_ps(__mmask16 __M, __m512 __V) { 10220314564Sdim _mm512_mask_reduce_maxMin_32bit(__V,-_mm512_set1_ps(__builtin_inff()), max_ps, , f, 10221314564Sdim ps, __M); 10222314564Sdim} 10223314564Sdim 10224314564Sdimstatic __inline__ int __DEFAULT_FN_ATTRS 10225314564Sdim_mm512_mask_reduce_min_epi32(__mmask16 __M, __m512i __V) { 10226314564Sdim _mm512_mask_reduce_maxMin_32bit(__V, _mm512_set1_epi32(0x7FFFFFFF), min_epi32, 10227314564Sdim i, i, d, __M); 10228314564Sdim} 10229314564Sdim 10230314564Sdimstatic __inline__ unsigned int __DEFAULT_FN_ATTRS 10231314564Sdim_mm512_mask_reduce_min_epu32(__mmask16 __M, __m512i __V) { 10232314564Sdim _mm512_mask_reduce_maxMin_32bit(__V, _mm512_set1_epi32(0xFFFFFFFF), min_epu32, 10233314564Sdim i, i, d, __M); 10234314564Sdim} 10235314564Sdim 10236314564Sdimstatic __inline__ float __DEFAULT_FN_ATTRS 10237314564Sdim_mm512_mask_reduce_min_ps(__mmask16 __M, __m512 __V) { 10238314564Sdim _mm512_mask_reduce_maxMin_32bit(__V, _mm512_set1_ps(__builtin_inff()), min_ps, , f, 10239314564Sdim ps, __M); 10240314564Sdim} 10241314564Sdim 10242288943Sdim#undef __DEFAULT_FN_ATTRS 10243288943Sdim 10244277325Sdim#endif // __AVX512FINTRIN_H 10245