1296417Sdim/*===---- avx512fintrin.h - AVX512F intrinsics -----------------------------=== 2277325Sdim * 3353358Sdim * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4353358Sdim * See https://llvm.org/LICENSE.txt for license information. 5353358Sdim * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6277325Sdim * 7277325Sdim *===-----------------------------------------------------------------------=== 8277325Sdim */ 9277325Sdim#ifndef __IMMINTRIN_H 10277325Sdim#error "Never use <avx512fintrin.h> directly; include <immintrin.h> instead." 11277325Sdim#endif 12277325Sdim 13277325Sdim#ifndef __AVX512FINTRIN_H 14277325Sdim#define __AVX512FINTRIN_H 15277325Sdim 16309124Sdimtypedef char __v64qi __attribute__((__vector_size__(64))); 17309124Sdimtypedef short __v32hi __attribute__((__vector_size__(64))); 18277325Sdimtypedef double __v8df __attribute__((__vector_size__(64))); 19277325Sdimtypedef float __v16sf __attribute__((__vector_size__(64))); 20277325Sdimtypedef long long __v8di __attribute__((__vector_size__(64))); 21277325Sdimtypedef int __v16si __attribute__((__vector_size__(64))); 22277325Sdim 23309124Sdim/* Unsigned types */ 24309124Sdimtypedef unsigned char __v64qu __attribute__((__vector_size__(64))); 25309124Sdimtypedef unsigned short __v32hu __attribute__((__vector_size__(64))); 26309124Sdimtypedef unsigned long long __v8du __attribute__((__vector_size__(64))); 27309124Sdimtypedef unsigned int __v16su __attribute__((__vector_size__(64))); 28309124Sdim 29353358Sdimtypedef float __m512 __attribute__((__vector_size__(64), __aligned__(64))); 30353358Sdimtypedef double __m512d __attribute__((__vector_size__(64), __aligned__(64))); 31353358Sdimtypedef long long __m512i __attribute__((__vector_size__(64), __aligned__(64))); 32277325Sdim 33353358Sdimtypedef float __m512_u __attribute__((__vector_size__(64), __aligned__(1))); 34353358Sdimtypedef double __m512d_u __attribute__((__vector_size__(64), __aligned__(1))); 35353358Sdimtypedef long long __m512i_u __attribute__((__vector_size__(64), __aligned__(1))); 36353358Sdim 37277325Sdimtypedef unsigned char __mmask8; 38277325Sdimtypedef unsigned short __mmask16; 39277325Sdim 40277325Sdim/* Rounding mode macros. */ 41277325Sdim#define _MM_FROUND_TO_NEAREST_INT 0x00 42277325Sdim#define _MM_FROUND_TO_NEG_INF 0x01 43277325Sdim#define _MM_FROUND_TO_POS_INF 0x02 44277325Sdim#define _MM_FROUND_TO_ZERO 0x03 45277325Sdim#define _MM_FROUND_CUR_DIRECTION 0x04 46277325Sdim 47314564Sdim/* Constants for integer comparison predicates */ 48314564Sdimtypedef enum { 49314564Sdim _MM_CMPINT_EQ, /* Equal */ 50314564Sdim _MM_CMPINT_LT, /* Less than */ 51314564Sdim _MM_CMPINT_LE, /* Less than or Equal */ 52314564Sdim _MM_CMPINT_UNUSED, 53314564Sdim _MM_CMPINT_NE, /* Not Equal */ 54314564Sdim _MM_CMPINT_NLT, /* Not Less than */ 55314564Sdim#define _MM_CMPINT_GE _MM_CMPINT_NLT /* Greater than or Equal */ 56314564Sdim _MM_CMPINT_NLE /* Not Less than or Equal */ 57314564Sdim#define _MM_CMPINT_GT _MM_CMPINT_NLE /* Greater than */ 58314564Sdim} _MM_CMPINT_ENUM; 59314564Sdim 60309124Sdimtypedef enum 61309124Sdim{ 62309124Sdim _MM_PERM_AAAA = 0x00, _MM_PERM_AAAB = 0x01, _MM_PERM_AAAC = 0x02, 63309124Sdim _MM_PERM_AAAD = 0x03, _MM_PERM_AABA = 0x04, _MM_PERM_AABB = 0x05, 64309124Sdim _MM_PERM_AABC = 0x06, _MM_PERM_AABD = 0x07, _MM_PERM_AACA = 0x08, 65309124Sdim _MM_PERM_AACB = 0x09, _MM_PERM_AACC = 0x0A, _MM_PERM_AACD = 0x0B, 66309124Sdim _MM_PERM_AADA = 0x0C, _MM_PERM_AADB = 0x0D, _MM_PERM_AADC = 0x0E, 67309124Sdim _MM_PERM_AADD = 0x0F, _MM_PERM_ABAA = 0x10, _MM_PERM_ABAB = 0x11, 68309124Sdim _MM_PERM_ABAC = 0x12, _MM_PERM_ABAD = 0x13, _MM_PERM_ABBA = 0x14, 69309124Sdim _MM_PERM_ABBB = 0x15, _MM_PERM_ABBC = 0x16, _MM_PERM_ABBD = 0x17, 70309124Sdim _MM_PERM_ABCA = 0x18, _MM_PERM_ABCB = 0x19, _MM_PERM_ABCC = 0x1A, 71309124Sdim _MM_PERM_ABCD = 0x1B, _MM_PERM_ABDA = 0x1C, _MM_PERM_ABDB = 0x1D, 72309124Sdim _MM_PERM_ABDC = 0x1E, _MM_PERM_ABDD = 0x1F, _MM_PERM_ACAA = 0x20, 73309124Sdim _MM_PERM_ACAB = 0x21, _MM_PERM_ACAC = 0x22, _MM_PERM_ACAD = 0x23, 74309124Sdim _MM_PERM_ACBA = 0x24, _MM_PERM_ACBB = 0x25, _MM_PERM_ACBC = 0x26, 75309124Sdim _MM_PERM_ACBD = 0x27, _MM_PERM_ACCA = 0x28, _MM_PERM_ACCB = 0x29, 76309124Sdim _MM_PERM_ACCC = 0x2A, _MM_PERM_ACCD = 0x2B, _MM_PERM_ACDA = 0x2C, 77309124Sdim _MM_PERM_ACDB = 0x2D, _MM_PERM_ACDC = 0x2E, _MM_PERM_ACDD = 0x2F, 78309124Sdim _MM_PERM_ADAA = 0x30, _MM_PERM_ADAB = 0x31, _MM_PERM_ADAC = 0x32, 79309124Sdim _MM_PERM_ADAD = 0x33, _MM_PERM_ADBA = 0x34, _MM_PERM_ADBB = 0x35, 80309124Sdim _MM_PERM_ADBC = 0x36, _MM_PERM_ADBD = 0x37, _MM_PERM_ADCA = 0x38, 81309124Sdim _MM_PERM_ADCB = 0x39, _MM_PERM_ADCC = 0x3A, _MM_PERM_ADCD = 0x3B, 82309124Sdim _MM_PERM_ADDA = 0x3C, _MM_PERM_ADDB = 0x3D, _MM_PERM_ADDC = 0x3E, 83309124Sdim _MM_PERM_ADDD = 0x3F, _MM_PERM_BAAA = 0x40, _MM_PERM_BAAB = 0x41, 84309124Sdim _MM_PERM_BAAC = 0x42, _MM_PERM_BAAD = 0x43, _MM_PERM_BABA = 0x44, 85309124Sdim _MM_PERM_BABB = 0x45, _MM_PERM_BABC = 0x46, _MM_PERM_BABD = 0x47, 86309124Sdim _MM_PERM_BACA = 0x48, _MM_PERM_BACB = 0x49, _MM_PERM_BACC = 0x4A, 87309124Sdim _MM_PERM_BACD = 0x4B, _MM_PERM_BADA = 0x4C, _MM_PERM_BADB = 0x4D, 88309124Sdim _MM_PERM_BADC = 0x4E, _MM_PERM_BADD = 0x4F, _MM_PERM_BBAA = 0x50, 89309124Sdim _MM_PERM_BBAB = 0x51, _MM_PERM_BBAC = 0x52, _MM_PERM_BBAD = 0x53, 90309124Sdim _MM_PERM_BBBA = 0x54, _MM_PERM_BBBB = 0x55, _MM_PERM_BBBC = 0x56, 91309124Sdim _MM_PERM_BBBD = 0x57, _MM_PERM_BBCA = 0x58, _MM_PERM_BBCB = 0x59, 92309124Sdim _MM_PERM_BBCC = 0x5A, _MM_PERM_BBCD = 0x5B, _MM_PERM_BBDA = 0x5C, 93309124Sdim _MM_PERM_BBDB = 0x5D, _MM_PERM_BBDC = 0x5E, _MM_PERM_BBDD = 0x5F, 94309124Sdim _MM_PERM_BCAA = 0x60, _MM_PERM_BCAB = 0x61, _MM_PERM_BCAC = 0x62, 95309124Sdim _MM_PERM_BCAD = 0x63, _MM_PERM_BCBA = 0x64, _MM_PERM_BCBB = 0x65, 96309124Sdim _MM_PERM_BCBC = 0x66, _MM_PERM_BCBD = 0x67, _MM_PERM_BCCA = 0x68, 97309124Sdim _MM_PERM_BCCB = 0x69, _MM_PERM_BCCC = 0x6A, _MM_PERM_BCCD = 0x6B, 98309124Sdim _MM_PERM_BCDA = 0x6C, _MM_PERM_BCDB = 0x6D, _MM_PERM_BCDC = 0x6E, 99309124Sdim _MM_PERM_BCDD = 0x6F, _MM_PERM_BDAA = 0x70, _MM_PERM_BDAB = 0x71, 100309124Sdim _MM_PERM_BDAC = 0x72, _MM_PERM_BDAD = 0x73, _MM_PERM_BDBA = 0x74, 101309124Sdim _MM_PERM_BDBB = 0x75, _MM_PERM_BDBC = 0x76, _MM_PERM_BDBD = 0x77, 102309124Sdim _MM_PERM_BDCA = 0x78, _MM_PERM_BDCB = 0x79, _MM_PERM_BDCC = 0x7A, 103309124Sdim _MM_PERM_BDCD = 0x7B, _MM_PERM_BDDA = 0x7C, _MM_PERM_BDDB = 0x7D, 104309124Sdim _MM_PERM_BDDC = 0x7E, _MM_PERM_BDDD = 0x7F, _MM_PERM_CAAA = 0x80, 105309124Sdim _MM_PERM_CAAB = 0x81, _MM_PERM_CAAC = 0x82, _MM_PERM_CAAD = 0x83, 106309124Sdim _MM_PERM_CABA = 0x84, _MM_PERM_CABB = 0x85, _MM_PERM_CABC = 0x86, 107309124Sdim _MM_PERM_CABD = 0x87, _MM_PERM_CACA = 0x88, _MM_PERM_CACB = 0x89, 108309124Sdim _MM_PERM_CACC = 0x8A, _MM_PERM_CACD = 0x8B, _MM_PERM_CADA = 0x8C, 109309124Sdim _MM_PERM_CADB = 0x8D, _MM_PERM_CADC = 0x8E, _MM_PERM_CADD = 0x8F, 110309124Sdim _MM_PERM_CBAA = 0x90, _MM_PERM_CBAB = 0x91, _MM_PERM_CBAC = 0x92, 111309124Sdim _MM_PERM_CBAD = 0x93, _MM_PERM_CBBA = 0x94, _MM_PERM_CBBB = 0x95, 112309124Sdim _MM_PERM_CBBC = 0x96, _MM_PERM_CBBD = 0x97, _MM_PERM_CBCA = 0x98, 113309124Sdim _MM_PERM_CBCB = 0x99, _MM_PERM_CBCC = 0x9A, _MM_PERM_CBCD = 0x9B, 114309124Sdim _MM_PERM_CBDA = 0x9C, _MM_PERM_CBDB = 0x9D, _MM_PERM_CBDC = 0x9E, 115309124Sdim _MM_PERM_CBDD = 0x9F, _MM_PERM_CCAA = 0xA0, _MM_PERM_CCAB = 0xA1, 116309124Sdim _MM_PERM_CCAC = 0xA2, _MM_PERM_CCAD = 0xA3, _MM_PERM_CCBA = 0xA4, 117309124Sdim _MM_PERM_CCBB = 0xA5, _MM_PERM_CCBC = 0xA6, _MM_PERM_CCBD = 0xA7, 118309124Sdim _MM_PERM_CCCA = 0xA8, _MM_PERM_CCCB = 0xA9, _MM_PERM_CCCC = 0xAA, 119309124Sdim _MM_PERM_CCCD = 0xAB, _MM_PERM_CCDA = 0xAC, _MM_PERM_CCDB = 0xAD, 120309124Sdim _MM_PERM_CCDC = 0xAE, _MM_PERM_CCDD = 0xAF, _MM_PERM_CDAA = 0xB0, 121309124Sdim _MM_PERM_CDAB = 0xB1, _MM_PERM_CDAC = 0xB2, _MM_PERM_CDAD = 0xB3, 122309124Sdim _MM_PERM_CDBA = 0xB4, _MM_PERM_CDBB = 0xB5, _MM_PERM_CDBC = 0xB6, 123309124Sdim _MM_PERM_CDBD = 0xB7, _MM_PERM_CDCA = 0xB8, _MM_PERM_CDCB = 0xB9, 124309124Sdim _MM_PERM_CDCC = 0xBA, _MM_PERM_CDCD = 0xBB, _MM_PERM_CDDA = 0xBC, 125309124Sdim _MM_PERM_CDDB = 0xBD, _MM_PERM_CDDC = 0xBE, _MM_PERM_CDDD = 0xBF, 126309124Sdim _MM_PERM_DAAA = 0xC0, _MM_PERM_DAAB = 0xC1, _MM_PERM_DAAC = 0xC2, 127309124Sdim _MM_PERM_DAAD = 0xC3, _MM_PERM_DABA = 0xC4, _MM_PERM_DABB = 0xC5, 128309124Sdim _MM_PERM_DABC = 0xC6, _MM_PERM_DABD = 0xC7, _MM_PERM_DACA = 0xC8, 129309124Sdim _MM_PERM_DACB = 0xC9, _MM_PERM_DACC = 0xCA, _MM_PERM_DACD = 0xCB, 130309124Sdim _MM_PERM_DADA = 0xCC, _MM_PERM_DADB = 0xCD, _MM_PERM_DADC = 0xCE, 131309124Sdim _MM_PERM_DADD = 0xCF, _MM_PERM_DBAA = 0xD0, _MM_PERM_DBAB = 0xD1, 132309124Sdim _MM_PERM_DBAC = 0xD2, _MM_PERM_DBAD = 0xD3, _MM_PERM_DBBA = 0xD4, 133309124Sdim _MM_PERM_DBBB = 0xD5, _MM_PERM_DBBC = 0xD6, _MM_PERM_DBBD = 0xD7, 134309124Sdim _MM_PERM_DBCA = 0xD8, _MM_PERM_DBCB = 0xD9, _MM_PERM_DBCC = 0xDA, 135309124Sdim _MM_PERM_DBCD = 0xDB, _MM_PERM_DBDA = 0xDC, _MM_PERM_DBDB = 0xDD, 136309124Sdim _MM_PERM_DBDC = 0xDE, _MM_PERM_DBDD = 0xDF, _MM_PERM_DCAA = 0xE0, 137309124Sdim _MM_PERM_DCAB = 0xE1, _MM_PERM_DCAC = 0xE2, _MM_PERM_DCAD = 0xE3, 138309124Sdim _MM_PERM_DCBA = 0xE4, _MM_PERM_DCBB = 0xE5, _MM_PERM_DCBC = 0xE6, 139309124Sdim _MM_PERM_DCBD = 0xE7, _MM_PERM_DCCA = 0xE8, _MM_PERM_DCCB = 0xE9, 140309124Sdim _MM_PERM_DCCC = 0xEA, _MM_PERM_DCCD = 0xEB, _MM_PERM_DCDA = 0xEC, 141309124Sdim _MM_PERM_DCDB = 0xED, _MM_PERM_DCDC = 0xEE, _MM_PERM_DCDD = 0xEF, 142309124Sdim _MM_PERM_DDAA = 0xF0, _MM_PERM_DDAB = 0xF1, _MM_PERM_DDAC = 0xF2, 143309124Sdim _MM_PERM_DDAD = 0xF3, _MM_PERM_DDBA = 0xF4, _MM_PERM_DDBB = 0xF5, 144309124Sdim _MM_PERM_DDBC = 0xF6, _MM_PERM_DDBD = 0xF7, _MM_PERM_DDCA = 0xF8, 145309124Sdim _MM_PERM_DDCB = 0xF9, _MM_PERM_DDCC = 0xFA, _MM_PERM_DDCD = 0xFB, 146309124Sdim _MM_PERM_DDDA = 0xFC, _MM_PERM_DDDB = 0xFD, _MM_PERM_DDDC = 0xFE, 147309124Sdim _MM_PERM_DDDD = 0xFF 148309124Sdim} _MM_PERM_ENUM; 149309124Sdim 150309124Sdimtypedef enum 151309124Sdim{ 152309124Sdim _MM_MANT_NORM_1_2, /* interval [1, 2) */ 153309124Sdim _MM_MANT_NORM_p5_2, /* interval [0.5, 2) */ 154309124Sdim _MM_MANT_NORM_p5_1, /* interval [0.5, 1) */ 155309124Sdim _MM_MANT_NORM_p75_1p5 /* interval [0.75, 1.5) */ 156309124Sdim} _MM_MANTISSA_NORM_ENUM; 157309124Sdim 158309124Sdimtypedef enum 159309124Sdim{ 160309124Sdim _MM_MANT_SIGN_src, /* sign = sign(SRC) */ 161309124Sdim _MM_MANT_SIGN_zero, /* sign = 0 */ 162309124Sdim _MM_MANT_SIGN_nan /* DEST = NaN if sign(SRC) = 1 */ 163309124Sdim} _MM_MANTISSA_SIGN_ENUM; 164309124Sdim 165288943Sdim/* Define the default attributes for the functions in this file. */ 166341825Sdim#define __DEFAULT_FN_ATTRS512 __attribute__((__always_inline__, __nodebug__, __target__("avx512f"), __min_vector_width__(512))) 167341825Sdim#define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("avx512f"), __min_vector_width__(128))) 168344779Sdim#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512f"))) 169288943Sdim 170277325Sdim/* Create vectors with repeated elements */ 171277325Sdim 172341825Sdimstatic __inline __m512i __DEFAULT_FN_ATTRS512 173277325Sdim_mm512_setzero_si512(void) 174277325Sdim{ 175341825Sdim return __extension__ (__m512i)(__v8di){ 0, 0, 0, 0, 0, 0, 0, 0 }; 176277325Sdim} 177277325Sdim 178309124Sdim#define _mm512_setzero_epi32 _mm512_setzero_si512 179309124Sdim 180341825Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS512 181309124Sdim_mm512_undefined_pd(void) 182296417Sdim{ 183296417Sdim return (__m512d)__builtin_ia32_undef512(); 184296417Sdim} 185296417Sdim 186341825Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS512 187309124Sdim_mm512_undefined(void) 188296417Sdim{ 189296417Sdim return (__m512)__builtin_ia32_undef512(); 190296417Sdim} 191296417Sdim 192341825Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS512 193309124Sdim_mm512_undefined_ps(void) 194296417Sdim{ 195296417Sdim return (__m512)__builtin_ia32_undef512(); 196296417Sdim} 197296417Sdim 198341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 199309124Sdim_mm512_undefined_epi32(void) 200296417Sdim{ 201296417Sdim return (__m512i)__builtin_ia32_undef512(); 202296417Sdim} 203296417Sdim 204341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 205309124Sdim_mm512_broadcastd_epi32 (__m128i __A) 206309124Sdim{ 207341825Sdim return (__m512i)__builtin_shufflevector((__v4si) __A, (__v4si) __A, 208309124Sdim 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); 209309124Sdim} 210309124Sdim 211341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 212309124Sdim_mm512_mask_broadcastd_epi32 (__m512i __O, __mmask16 __M, __m128i __A) 213309124Sdim{ 214309124Sdim return (__m512i)__builtin_ia32_selectd_512(__M, 215309124Sdim (__v16si) _mm512_broadcastd_epi32(__A), 216309124Sdim (__v16si) __O); 217309124Sdim} 218309124Sdim 219341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 220309124Sdim_mm512_maskz_broadcastd_epi32 (__mmask16 __M, __m128i __A) 221309124Sdim{ 222309124Sdim return (__m512i)__builtin_ia32_selectd_512(__M, 223309124Sdim (__v16si) _mm512_broadcastd_epi32(__A), 224309124Sdim (__v16si) _mm512_setzero_si512()); 225309124Sdim} 226309124Sdim 227341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 228309124Sdim_mm512_broadcastq_epi64 (__m128i __A) 229309124Sdim{ 230341825Sdim return (__m512i)__builtin_shufflevector((__v2di) __A, (__v2di) __A, 231309124Sdim 0, 0, 0, 0, 0, 0, 0, 0); 232309124Sdim} 233309124Sdim 234341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 235309124Sdim_mm512_mask_broadcastq_epi64 (__m512i __O, __mmask8 __M, __m128i __A) 236309124Sdim{ 237309124Sdim return (__m512i)__builtin_ia32_selectq_512(__M, 238309124Sdim (__v8di) _mm512_broadcastq_epi64(__A), 239309124Sdim (__v8di) __O); 240309124Sdim 241309124Sdim} 242309124Sdim 243341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 244309124Sdim_mm512_maskz_broadcastq_epi64 (__mmask8 __M, __m128i __A) 245309124Sdim{ 246309124Sdim return (__m512i)__builtin_ia32_selectq_512(__M, 247309124Sdim (__v8di) _mm512_broadcastq_epi64(__A), 248309124Sdim (__v8di) _mm512_setzero_si512()); 249309124Sdim} 250309124Sdim 251277325Sdim 252341825Sdimstatic __inline __m512 __DEFAULT_FN_ATTRS512 253277325Sdim_mm512_setzero_ps(void) 254277325Sdim{ 255341825Sdim return __extension__ (__m512){ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 256341825Sdim 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 }; 257277325Sdim} 258309124Sdim 259309124Sdim#define _mm512_setzero _mm512_setzero_ps 260309124Sdim 261341825Sdimstatic __inline __m512d __DEFAULT_FN_ATTRS512 262277325Sdim_mm512_setzero_pd(void) 263277325Sdim{ 264341825Sdim return __extension__ (__m512d){ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 }; 265277325Sdim} 266277325Sdim 267341825Sdimstatic __inline __m512 __DEFAULT_FN_ATTRS512 268277325Sdim_mm512_set1_ps(float __w) 269277325Sdim{ 270341825Sdim return __extension__ (__m512){ __w, __w, __w, __w, __w, __w, __w, __w, 271341825Sdim __w, __w, __w, __w, __w, __w, __w, __w }; 272277325Sdim} 273277325Sdim 274341825Sdimstatic __inline __m512d __DEFAULT_FN_ATTRS512 275277325Sdim_mm512_set1_pd(double __w) 276277325Sdim{ 277341825Sdim return __extension__ (__m512d){ __w, __w, __w, __w, __w, __w, __w, __w }; 278277325Sdim} 279277325Sdim 280341825Sdimstatic __inline __m512i __DEFAULT_FN_ATTRS512 281309124Sdim_mm512_set1_epi8(char __w) 282309124Sdim{ 283341825Sdim return __extension__ (__m512i)(__v64qi){ 284341825Sdim __w, __w, __w, __w, __w, __w, __w, __w, 285341825Sdim __w, __w, __w, __w, __w, __w, __w, __w, 286341825Sdim __w, __w, __w, __w, __w, __w, __w, __w, 287341825Sdim __w, __w, __w, __w, __w, __w, __w, __w, 288341825Sdim __w, __w, __w, __w, __w, __w, __w, __w, 289341825Sdim __w, __w, __w, __w, __w, __w, __w, __w, 290341825Sdim __w, __w, __w, __w, __w, __w, __w, __w, 291341825Sdim __w, __w, __w, __w, __w, __w, __w, __w }; 292309124Sdim} 293309124Sdim 294341825Sdimstatic __inline __m512i __DEFAULT_FN_ATTRS512 295309124Sdim_mm512_set1_epi16(short __w) 296309124Sdim{ 297341825Sdim return __extension__ (__m512i)(__v32hi){ 298341825Sdim __w, __w, __w, __w, __w, __w, __w, __w, 299341825Sdim __w, __w, __w, __w, __w, __w, __w, __w, 300341825Sdim __w, __w, __w, __w, __w, __w, __w, __w, 301341825Sdim __w, __w, __w, __w, __w, __w, __w, __w }; 302309124Sdim} 303309124Sdim 304341825Sdimstatic __inline __m512i __DEFAULT_FN_ATTRS512 305277325Sdim_mm512_set1_epi32(int __s) 306277325Sdim{ 307341825Sdim return __extension__ (__m512i)(__v16si){ 308341825Sdim __s, __s, __s, __s, __s, __s, __s, __s, 309341825Sdim __s, __s, __s, __s, __s, __s, __s, __s }; 310277325Sdim} 311277325Sdim 312341825Sdimstatic __inline __m512i __DEFAULT_FN_ATTRS512 313341825Sdim_mm512_maskz_set1_epi32(__mmask16 __M, int __A) 314327952Sdim{ 315341825Sdim return (__m512i)__builtin_ia32_selectd_512(__M, 316327952Sdim (__v16si)_mm512_set1_epi32(__A), 317327952Sdim (__v16si)_mm512_setzero_si512()); 318327952Sdim} 319327952Sdim 320341825Sdimstatic __inline __m512i __DEFAULT_FN_ATTRS512 321277325Sdim_mm512_set1_epi64(long long __d) 322277325Sdim{ 323341825Sdim return __extension__(__m512i)(__v8di){ __d, __d, __d, __d, __d, __d, __d, __d }; 324277325Sdim} 325277325Sdim 326341825Sdimstatic __inline __m512i __DEFAULT_FN_ATTRS512 327327952Sdim_mm512_maskz_set1_epi64(__mmask8 __M, long long __A) 328327952Sdim{ 329327952Sdim return (__m512i)__builtin_ia32_selectq_512(__M, 330327952Sdim (__v8di)_mm512_set1_epi64(__A), 331327952Sdim (__v8di)_mm512_setzero_si512()); 332327952Sdim} 333327952Sdim 334341825Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS512 335309124Sdim_mm512_broadcastss_ps(__m128 __A) 336277325Sdim{ 337341825Sdim return (__m512)__builtin_shufflevector((__v4sf) __A, (__v4sf) __A, 338309124Sdim 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); 339277325Sdim} 340277325Sdim 341341825Sdimstatic __inline __m512i __DEFAULT_FN_ATTRS512 342309124Sdim_mm512_set4_epi32 (int __A, int __B, int __C, int __D) 343309124Sdim{ 344341825Sdim return __extension__ (__m512i)(__v16si) 345309124Sdim { __D, __C, __B, __A, __D, __C, __B, __A, 346309124Sdim __D, __C, __B, __A, __D, __C, __B, __A }; 347309124Sdim} 348309124Sdim 349341825Sdimstatic __inline __m512i __DEFAULT_FN_ATTRS512 350309124Sdim_mm512_set4_epi64 (long long __A, long long __B, long long __C, 351309124Sdim long long __D) 352309124Sdim{ 353341825Sdim return __extension__ (__m512i) (__v8di) 354309124Sdim { __D, __C, __B, __A, __D, __C, __B, __A }; 355309124Sdim} 356309124Sdim 357341825Sdimstatic __inline __m512d __DEFAULT_FN_ATTRS512 358309124Sdim_mm512_set4_pd (double __A, double __B, double __C, double __D) 359309124Sdim{ 360341825Sdim return __extension__ (__m512d) 361309124Sdim { __D, __C, __B, __A, __D, __C, __B, __A }; 362309124Sdim} 363309124Sdim 364341825Sdimstatic __inline __m512 __DEFAULT_FN_ATTRS512 365309124Sdim_mm512_set4_ps (float __A, float __B, float __C, float __D) 366309124Sdim{ 367341825Sdim return __extension__ (__m512) 368309124Sdim { __D, __C, __B, __A, __D, __C, __B, __A, 369309124Sdim __D, __C, __B, __A, __D, __C, __B, __A }; 370309124Sdim} 371309124Sdim 372309124Sdim#define _mm512_setr4_epi32(e0,e1,e2,e3) \ 373309124Sdim _mm512_set4_epi32((e3),(e2),(e1),(e0)) 374309124Sdim 375309124Sdim#define _mm512_setr4_epi64(e0,e1,e2,e3) \ 376309124Sdim _mm512_set4_epi64((e3),(e2),(e1),(e0)) 377309124Sdim 378309124Sdim#define _mm512_setr4_pd(e0,e1,e2,e3) \ 379309124Sdim _mm512_set4_pd((e3),(e2),(e1),(e0)) 380309124Sdim 381309124Sdim#define _mm512_setr4_ps(e0,e1,e2,e3) \ 382309124Sdim _mm512_set4_ps((e3),(e2),(e1),(e0)) 383309124Sdim 384341825Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS512 385309124Sdim_mm512_broadcastsd_pd(__m128d __A) 386277325Sdim{ 387341825Sdim return (__m512d)__builtin_shufflevector((__v2df) __A, (__v2df) __A, 388309124Sdim 0, 0, 0, 0, 0, 0, 0, 0); 389277325Sdim} 390277325Sdim 391277325Sdim/* Cast between vector types */ 392277325Sdim 393341825Sdimstatic __inline __m512d __DEFAULT_FN_ATTRS512 394277325Sdim_mm512_castpd256_pd512(__m256d __a) 395277325Sdim{ 396277325Sdim return __builtin_shufflevector(__a, __a, 0, 1, 2, 3, -1, -1, -1, -1); 397277325Sdim} 398277325Sdim 399341825Sdimstatic __inline __m512 __DEFAULT_FN_ATTRS512 400277325Sdim_mm512_castps256_ps512(__m256 __a) 401277325Sdim{ 402277325Sdim return __builtin_shufflevector(__a, __a, 0, 1, 2, 3, 4, 5, 6, 7, 403277325Sdim -1, -1, -1, -1, -1, -1, -1, -1); 404277325Sdim} 405277325Sdim 406341825Sdimstatic __inline __m128d __DEFAULT_FN_ATTRS512 407277325Sdim_mm512_castpd512_pd128(__m512d __a) 408277325Sdim{ 409277325Sdim return __builtin_shufflevector(__a, __a, 0, 1); 410277325Sdim} 411277325Sdim 412341825Sdimstatic __inline __m256d __DEFAULT_FN_ATTRS512 413309124Sdim_mm512_castpd512_pd256 (__m512d __A) 414309124Sdim{ 415309124Sdim return __builtin_shufflevector(__A, __A, 0, 1, 2, 3); 416309124Sdim} 417309124Sdim 418341825Sdimstatic __inline __m128 __DEFAULT_FN_ATTRS512 419277325Sdim_mm512_castps512_ps128(__m512 __a) 420277325Sdim{ 421277325Sdim return __builtin_shufflevector(__a, __a, 0, 1, 2, 3); 422277325Sdim} 423277325Sdim 424341825Sdimstatic __inline __m256 __DEFAULT_FN_ATTRS512 425309124Sdim_mm512_castps512_ps256 (__m512 __A) 426309124Sdim{ 427309124Sdim return __builtin_shufflevector(__A, __A, 0, 1, 2, 3, 4, 5, 6, 7); 428309124Sdim} 429309124Sdim 430341825Sdimstatic __inline __m512 __DEFAULT_FN_ATTRS512 431309124Sdim_mm512_castpd_ps (__m512d __A) 432309124Sdim{ 433309124Sdim return (__m512) (__A); 434309124Sdim} 435309124Sdim 436341825Sdimstatic __inline __m512i __DEFAULT_FN_ATTRS512 437309124Sdim_mm512_castpd_si512 (__m512d __A) 438309124Sdim{ 439309124Sdim return (__m512i) (__A); 440309124Sdim} 441309124Sdim 442341825Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS512 443309124Sdim_mm512_castpd128_pd512 (__m128d __A) 444309124Sdim{ 445309124Sdim return __builtin_shufflevector( __A, __A, 0, 1, -1, -1, -1, -1, -1, -1); 446309124Sdim} 447309124Sdim 448341825Sdimstatic __inline __m512d __DEFAULT_FN_ATTRS512 449309124Sdim_mm512_castps_pd (__m512 __A) 450309124Sdim{ 451309124Sdim return (__m512d) (__A); 452309124Sdim} 453309124Sdim 454341825Sdimstatic __inline __m512i __DEFAULT_FN_ATTRS512 455309124Sdim_mm512_castps_si512 (__m512 __A) 456309124Sdim{ 457309124Sdim return (__m512i) (__A); 458309124Sdim} 459309124Sdim 460341825Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS512 461309124Sdim_mm512_castps128_ps512 (__m128 __A) 462309124Sdim{ 463309124Sdim return __builtin_shufflevector( __A, __A, 0, 1, 2, 3, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1); 464309124Sdim} 465309124Sdim 466341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 467309124Sdim_mm512_castsi128_si512 (__m128i __A) 468309124Sdim{ 469309124Sdim return __builtin_shufflevector( __A, __A, 0, 1, -1, -1, -1, -1, -1, -1); 470309124Sdim} 471309124Sdim 472341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 473309124Sdim_mm512_castsi256_si512 (__m256i __A) 474309124Sdim{ 475309124Sdim return __builtin_shufflevector( __A, __A, 0, 1, 2, 3, -1, -1, -1, -1); 476309124Sdim} 477309124Sdim 478341825Sdimstatic __inline __m512 __DEFAULT_FN_ATTRS512 479309124Sdim_mm512_castsi512_ps (__m512i __A) 480309124Sdim{ 481309124Sdim return (__m512) (__A); 482309124Sdim} 483309124Sdim 484341825Sdimstatic __inline __m512d __DEFAULT_FN_ATTRS512 485309124Sdim_mm512_castsi512_pd (__m512i __A) 486309124Sdim{ 487309124Sdim return (__m512d) (__A); 488309124Sdim} 489309124Sdim 490341825Sdimstatic __inline __m128i __DEFAULT_FN_ATTRS512 491309124Sdim_mm512_castsi512_si128 (__m512i __A) 492309124Sdim{ 493309124Sdim return (__m128i)__builtin_shufflevector(__A, __A , 0, 1); 494309124Sdim} 495309124Sdim 496341825Sdimstatic __inline __m256i __DEFAULT_FN_ATTRS512 497309124Sdim_mm512_castsi512_si256 (__m512i __A) 498309124Sdim{ 499309124Sdim return (__m256i)__builtin_shufflevector(__A, __A , 0, 1, 2, 3); 500309124Sdim} 501309124Sdim 502344779Sdimstatic __inline__ __mmask16 __DEFAULT_FN_ATTRS 503314564Sdim_mm512_int2mask(int __a) 504314564Sdim{ 505314564Sdim return (__mmask16)__a; 506314564Sdim} 507314564Sdim 508344779Sdimstatic __inline__ int __DEFAULT_FN_ATTRS 509314564Sdim_mm512_mask2int(__mmask16 __a) 510314564Sdim{ 511314564Sdim return (int)__a; 512314564Sdim} 513314564Sdim 514341825Sdim/// Constructs a 512-bit floating-point vector of [8 x double] from a 515321369Sdim/// 128-bit floating-point vector of [2 x double]. The lower 128 bits 516321369Sdim/// contain the value of the source vector. The upper 384 bits are set 517321369Sdim/// to zero. 518321369Sdim/// 519321369Sdim/// \headerfile <x86intrin.h> 520321369Sdim/// 521321369Sdim/// This intrinsic has no corresponding instruction. 522321369Sdim/// 523321369Sdim/// \param __a 524321369Sdim/// A 128-bit vector of [2 x double]. 525321369Sdim/// \returns A 512-bit floating-point vector of [8 x double]. The lower 128 bits 526321369Sdim/// contain the value of the parameter. The upper 384 bits are set to zero. 527341825Sdimstatic __inline __m512d __DEFAULT_FN_ATTRS512 528321369Sdim_mm512_zextpd128_pd512(__m128d __a) 529321369Sdim{ 530321369Sdim return __builtin_shufflevector((__v2df)__a, (__v2df)_mm_setzero_pd(), 0, 1, 2, 3, 2, 3, 2, 3); 531321369Sdim} 532321369Sdim 533341825Sdim/// Constructs a 512-bit floating-point vector of [8 x double] from a 534321369Sdim/// 256-bit floating-point vector of [4 x double]. The lower 256 bits 535321369Sdim/// contain the value of the source vector. The upper 256 bits are set 536321369Sdim/// to zero. 537321369Sdim/// 538321369Sdim/// \headerfile <x86intrin.h> 539321369Sdim/// 540321369Sdim/// This intrinsic has no corresponding instruction. 541321369Sdim/// 542321369Sdim/// \param __a 543321369Sdim/// A 256-bit vector of [4 x double]. 544321369Sdim/// \returns A 512-bit floating-point vector of [8 x double]. The lower 256 bits 545321369Sdim/// contain the value of the parameter. The upper 256 bits are set to zero. 546341825Sdimstatic __inline __m512d __DEFAULT_FN_ATTRS512 547321369Sdim_mm512_zextpd256_pd512(__m256d __a) 548321369Sdim{ 549321369Sdim return __builtin_shufflevector((__v4df)__a, (__v4df)_mm256_setzero_pd(), 0, 1, 2, 3, 4, 5, 6, 7); 550321369Sdim} 551321369Sdim 552341825Sdim/// Constructs a 512-bit floating-point vector of [16 x float] from a 553321369Sdim/// 128-bit floating-point vector of [4 x float]. The lower 128 bits contain 554321369Sdim/// the value of the source vector. The upper 384 bits are set to zero. 555321369Sdim/// 556321369Sdim/// \headerfile <x86intrin.h> 557321369Sdim/// 558321369Sdim/// This intrinsic has no corresponding instruction. 559321369Sdim/// 560321369Sdim/// \param __a 561321369Sdim/// A 128-bit vector of [4 x float]. 562321369Sdim/// \returns A 512-bit floating-point vector of [16 x float]. The lower 128 bits 563321369Sdim/// contain the value of the parameter. The upper 384 bits are set to zero. 564341825Sdimstatic __inline __m512 __DEFAULT_FN_ATTRS512 565321369Sdim_mm512_zextps128_ps512(__m128 __a) 566321369Sdim{ 567321369Sdim return __builtin_shufflevector((__v4sf)__a, (__v4sf)_mm_setzero_ps(), 0, 1, 2, 3, 4, 5, 6, 7, 4, 5, 6, 7, 4, 5, 6, 7); 568321369Sdim} 569321369Sdim 570341825Sdim/// Constructs a 512-bit floating-point vector of [16 x float] from a 571321369Sdim/// 256-bit floating-point vector of [8 x float]. The lower 256 bits contain 572321369Sdim/// the value of the source vector. The upper 256 bits are set to zero. 573321369Sdim/// 574321369Sdim/// \headerfile <x86intrin.h> 575321369Sdim/// 576321369Sdim/// This intrinsic has no corresponding instruction. 577321369Sdim/// 578321369Sdim/// \param __a 579321369Sdim/// A 256-bit vector of [8 x float]. 580321369Sdim/// \returns A 512-bit floating-point vector of [16 x float]. The lower 256 bits 581321369Sdim/// contain the value of the parameter. The upper 256 bits are set to zero. 582341825Sdimstatic __inline __m512 __DEFAULT_FN_ATTRS512 583321369Sdim_mm512_zextps256_ps512(__m256 __a) 584321369Sdim{ 585321369Sdim return __builtin_shufflevector((__v8sf)__a, (__v8sf)_mm256_setzero_ps(), 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); 586321369Sdim} 587321369Sdim 588341825Sdim/// Constructs a 512-bit integer vector from a 128-bit integer vector. 589321369Sdim/// The lower 128 bits contain the value of the source vector. The upper 590321369Sdim/// 384 bits are set to zero. 591321369Sdim/// 592321369Sdim/// \headerfile <x86intrin.h> 593321369Sdim/// 594321369Sdim/// This intrinsic has no corresponding instruction. 595321369Sdim/// 596321369Sdim/// \param __a 597321369Sdim/// A 128-bit integer vector. 598321369Sdim/// \returns A 512-bit integer vector. The lower 128 bits contain the value of 599321369Sdim/// the parameter. The upper 384 bits are set to zero. 600341825Sdimstatic __inline __m512i __DEFAULT_FN_ATTRS512 601321369Sdim_mm512_zextsi128_si512(__m128i __a) 602321369Sdim{ 603321369Sdim return __builtin_shufflevector((__v2di)__a, (__v2di)_mm_setzero_si128(), 0, 1, 2, 3, 2, 3, 2, 3); 604321369Sdim} 605321369Sdim 606341825Sdim/// Constructs a 512-bit integer vector from a 256-bit integer vector. 607321369Sdim/// The lower 256 bits contain the value of the source vector. The upper 608321369Sdim/// 256 bits are set to zero. 609321369Sdim/// 610321369Sdim/// \headerfile <x86intrin.h> 611321369Sdim/// 612321369Sdim/// This intrinsic has no corresponding instruction. 613321369Sdim/// 614321369Sdim/// \param __a 615321369Sdim/// A 256-bit integer vector. 616321369Sdim/// \returns A 512-bit integer vector. The lower 256 bits contain the value of 617321369Sdim/// the parameter. The upper 256 bits are set to zero. 618341825Sdimstatic __inline __m512i __DEFAULT_FN_ATTRS512 619321369Sdim_mm512_zextsi256_si512(__m256i __a) 620321369Sdim{ 621321369Sdim return __builtin_shufflevector((__v4di)__a, (__v4di)_mm256_setzero_si256(), 0, 1, 2, 3, 4, 5, 6, 7); 622321369Sdim} 623321369Sdim 624288943Sdim/* Bitwise operators */ 625341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 626288943Sdim_mm512_and_epi32(__m512i __a, __m512i __b) 627288943Sdim{ 628309124Sdim return (__m512i)((__v16su)__a & (__v16su)__b); 629288943Sdim} 630288943Sdim 631341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 632288943Sdim_mm512_mask_and_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b) 633288943Sdim{ 634309124Sdim return (__m512i)__builtin_ia32_selectd_512((__mmask16)__k, 635309124Sdim (__v16si) _mm512_and_epi32(__a, __b), 636309124Sdim (__v16si) __src); 637288943Sdim} 638309124Sdim 639341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 640288943Sdim_mm512_maskz_and_epi32(__mmask16 __k, __m512i __a, __m512i __b) 641288943Sdim{ 642309124Sdim return (__m512i) _mm512_mask_and_epi32(_mm512_setzero_si512 (), 643309124Sdim __k, __a, __b); 644288943Sdim} 645288943Sdim 646341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 647288943Sdim_mm512_and_epi64(__m512i __a, __m512i __b) 648288943Sdim{ 649309124Sdim return (__m512i)((__v8du)__a & (__v8du)__b); 650288943Sdim} 651288943Sdim 652341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 653288943Sdim_mm512_mask_and_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b) 654288943Sdim{ 655309124Sdim return (__m512i) __builtin_ia32_selectq_512 ((__mmask8) __k, 656309124Sdim (__v8di) _mm512_and_epi64(__a, __b), 657309124Sdim (__v8di) __src); 658288943Sdim} 659309124Sdim 660341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 661288943Sdim_mm512_maskz_and_epi64(__mmask8 __k, __m512i __a, __m512i __b) 662288943Sdim{ 663309124Sdim return (__m512i) _mm512_mask_and_epi64(_mm512_setzero_si512 (), 664309124Sdim __k, __a, __b); 665288943Sdim} 666288943Sdim 667341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 668309124Sdim_mm512_andnot_si512 (__m512i __A, __m512i __B) 669309124Sdim{ 670341825Sdim return (__m512i)(~(__v8du)__A & (__v8du)__B); 671309124Sdim} 672309124Sdim 673341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 674288943Sdim_mm512_andnot_epi32 (__m512i __A, __m512i __B) 675288943Sdim{ 676341825Sdim return (__m512i)(~(__v16su)__A & (__v16su)__B); 677288943Sdim} 678288943Sdim 679341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 680309124Sdim_mm512_mask_andnot_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B) 681288943Sdim{ 682309124Sdim return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, 683309124Sdim (__v16si)_mm512_andnot_epi32(__A, __B), 684309124Sdim (__v16si)__W); 685288943Sdim} 686288943Sdim 687341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 688309124Sdim_mm512_maskz_andnot_epi32(__mmask16 __U, __m512i __A, __m512i __B) 689288943Sdim{ 690309124Sdim return (__m512i)_mm512_mask_andnot_epi32(_mm512_setzero_si512(), 691309124Sdim __U, __A, __B); 692288943Sdim} 693288943Sdim 694341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 695309124Sdim_mm512_andnot_epi64(__m512i __A, __m512i __B) 696288943Sdim{ 697341825Sdim return (__m512i)(~(__v8du)__A & (__v8du)__B); 698288943Sdim} 699288943Sdim 700341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 701309124Sdim_mm512_mask_andnot_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) 702288943Sdim{ 703309124Sdim return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, 704309124Sdim (__v8di)_mm512_andnot_epi64(__A, __B), 705309124Sdim (__v8di)__W); 706288943Sdim} 707288943Sdim 708341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 709309124Sdim_mm512_maskz_andnot_epi64(__mmask8 __U, __m512i __A, __m512i __B) 710288943Sdim{ 711309124Sdim return (__m512i)_mm512_mask_andnot_epi64(_mm512_setzero_si512(), 712309124Sdim __U, __A, __B); 713288943Sdim} 714309124Sdim 715341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 716288943Sdim_mm512_or_epi32(__m512i __a, __m512i __b) 717288943Sdim{ 718309124Sdim return (__m512i)((__v16su)__a | (__v16su)__b); 719288943Sdim} 720288943Sdim 721341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 722288943Sdim_mm512_mask_or_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b) 723288943Sdim{ 724309124Sdim return (__m512i)__builtin_ia32_selectd_512((__mmask16)__k, 725309124Sdim (__v16si)_mm512_or_epi32(__a, __b), 726309124Sdim (__v16si)__src); 727288943Sdim} 728309124Sdim 729341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 730288943Sdim_mm512_maskz_or_epi32(__mmask16 __k, __m512i __a, __m512i __b) 731288943Sdim{ 732309124Sdim return (__m512i)_mm512_mask_or_epi32(_mm512_setzero_si512(), __k, __a, __b); 733288943Sdim} 734288943Sdim 735341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 736288943Sdim_mm512_or_epi64(__m512i __a, __m512i __b) 737288943Sdim{ 738309124Sdim return (__m512i)((__v8du)__a | (__v8du)__b); 739288943Sdim} 740288943Sdim 741341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 742288943Sdim_mm512_mask_or_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b) 743288943Sdim{ 744309124Sdim return (__m512i)__builtin_ia32_selectq_512((__mmask8)__k, 745309124Sdim (__v8di)_mm512_or_epi64(__a, __b), 746309124Sdim (__v8di)__src); 747288943Sdim} 748309124Sdim 749341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 750288943Sdim_mm512_maskz_or_epi64(__mmask8 __k, __m512i __a, __m512i __b) 751288943Sdim{ 752309124Sdim return (__m512i)_mm512_mask_or_epi64(_mm512_setzero_si512(), __k, __a, __b); 753288943Sdim} 754288943Sdim 755341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 756288943Sdim_mm512_xor_epi32(__m512i __a, __m512i __b) 757288943Sdim{ 758309124Sdim return (__m512i)((__v16su)__a ^ (__v16su)__b); 759288943Sdim} 760288943Sdim 761341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 762288943Sdim_mm512_mask_xor_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b) 763288943Sdim{ 764309124Sdim return (__m512i)__builtin_ia32_selectd_512((__mmask16)__k, 765309124Sdim (__v16si)_mm512_xor_epi32(__a, __b), 766309124Sdim (__v16si)__src); 767288943Sdim} 768309124Sdim 769341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 770288943Sdim_mm512_maskz_xor_epi32(__mmask16 __k, __m512i __a, __m512i __b) 771288943Sdim{ 772309124Sdim return (__m512i)_mm512_mask_xor_epi32(_mm512_setzero_si512(), __k, __a, __b); 773288943Sdim} 774288943Sdim 775341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 776288943Sdim_mm512_xor_epi64(__m512i __a, __m512i __b) 777288943Sdim{ 778309124Sdim return (__m512i)((__v8du)__a ^ (__v8du)__b); 779288943Sdim} 780288943Sdim 781341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 782288943Sdim_mm512_mask_xor_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b) 783288943Sdim{ 784309124Sdim return (__m512i)__builtin_ia32_selectq_512((__mmask8)__k, 785309124Sdim (__v8di)_mm512_xor_epi64(__a, __b), 786309124Sdim (__v8di)__src); 787288943Sdim} 788309124Sdim 789341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 790288943Sdim_mm512_maskz_xor_epi64(__mmask8 __k, __m512i __a, __m512i __b) 791288943Sdim{ 792309124Sdim return (__m512i)_mm512_mask_xor_epi64(_mm512_setzero_si512(), __k, __a, __b); 793288943Sdim} 794288943Sdim 795341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 796288943Sdim_mm512_and_si512(__m512i __a, __m512i __b) 797288943Sdim{ 798309124Sdim return (__m512i)((__v8du)__a & (__v8du)__b); 799288943Sdim} 800288943Sdim 801341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 802288943Sdim_mm512_or_si512(__m512i __a, __m512i __b) 803288943Sdim{ 804309124Sdim return (__m512i)((__v8du)__a | (__v8du)__b); 805288943Sdim} 806288943Sdim 807341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 808288943Sdim_mm512_xor_si512(__m512i __a, __m512i __b) 809288943Sdim{ 810309124Sdim return (__m512i)((__v8du)__a ^ (__v8du)__b); 811288943Sdim} 812309124Sdim 813277325Sdim/* Arithmetic */ 814277325Sdim 815341825Sdimstatic __inline __m512d __DEFAULT_FN_ATTRS512 816277325Sdim_mm512_add_pd(__m512d __a, __m512d __b) 817277325Sdim{ 818309124Sdim return (__m512d)((__v8df)__a + (__v8df)__b); 819277325Sdim} 820277325Sdim 821341825Sdimstatic __inline __m512 __DEFAULT_FN_ATTRS512 822277325Sdim_mm512_add_ps(__m512 __a, __m512 __b) 823277325Sdim{ 824309124Sdim return (__m512)((__v16sf)__a + (__v16sf)__b); 825277325Sdim} 826277325Sdim 827341825Sdimstatic __inline __m512d __DEFAULT_FN_ATTRS512 828277325Sdim_mm512_mul_pd(__m512d __a, __m512d __b) 829277325Sdim{ 830309124Sdim return (__m512d)((__v8df)__a * (__v8df)__b); 831277325Sdim} 832277325Sdim 833341825Sdimstatic __inline __m512 __DEFAULT_FN_ATTRS512 834277325Sdim_mm512_mul_ps(__m512 __a, __m512 __b) 835277325Sdim{ 836309124Sdim return (__m512)((__v16sf)__a * (__v16sf)__b); 837277325Sdim} 838277325Sdim 839341825Sdimstatic __inline __m512d __DEFAULT_FN_ATTRS512 840277325Sdim_mm512_sub_pd(__m512d __a, __m512d __b) 841277325Sdim{ 842309124Sdim return (__m512d)((__v8df)__a - (__v8df)__b); 843277325Sdim} 844277325Sdim 845341825Sdimstatic __inline __m512 __DEFAULT_FN_ATTRS512 846277325Sdim_mm512_sub_ps(__m512 __a, __m512 __b) 847277325Sdim{ 848309124Sdim return (__m512)((__v16sf)__a - (__v16sf)__b); 849277325Sdim} 850277325Sdim 851341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 852288943Sdim_mm512_add_epi64 (__m512i __A, __m512i __B) 853288943Sdim{ 854309124Sdim return (__m512i) ((__v8du) __A + (__v8du) __B); 855288943Sdim} 856288943Sdim 857341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 858314564Sdim_mm512_mask_add_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) 859288943Sdim{ 860314564Sdim return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, 861314564Sdim (__v8di)_mm512_add_epi64(__A, __B), 862314564Sdim (__v8di)__W); 863288943Sdim} 864288943Sdim 865341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 866314564Sdim_mm512_maskz_add_epi64(__mmask8 __U, __m512i __A, __m512i __B) 867288943Sdim{ 868314564Sdim return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, 869314564Sdim (__v8di)_mm512_add_epi64(__A, __B), 870314564Sdim (__v8di)_mm512_setzero_si512()); 871288943Sdim} 872288943Sdim 873341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 874288943Sdim_mm512_sub_epi64 (__m512i __A, __m512i __B) 875288943Sdim{ 876309124Sdim return (__m512i) ((__v8du) __A - (__v8du) __B); 877288943Sdim} 878288943Sdim 879341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 880314564Sdim_mm512_mask_sub_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) 881288943Sdim{ 882314564Sdim return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, 883314564Sdim (__v8di)_mm512_sub_epi64(__A, __B), 884314564Sdim (__v8di)__W); 885288943Sdim} 886288943Sdim 887341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 888314564Sdim_mm512_maskz_sub_epi64(__mmask8 __U, __m512i __A, __m512i __B) 889288943Sdim{ 890314564Sdim return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, 891314564Sdim (__v8di)_mm512_sub_epi64(__A, __B), 892314564Sdim (__v8di)_mm512_setzero_si512()); 893288943Sdim} 894288943Sdim 895341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 896288943Sdim_mm512_add_epi32 (__m512i __A, __m512i __B) 897288943Sdim{ 898309124Sdim return (__m512i) ((__v16su) __A + (__v16su) __B); 899288943Sdim} 900288943Sdim 901341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 902314564Sdim_mm512_mask_add_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B) 903288943Sdim{ 904314564Sdim return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, 905314564Sdim (__v16si)_mm512_add_epi32(__A, __B), 906314564Sdim (__v16si)__W); 907288943Sdim} 908288943Sdim 909341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 910288943Sdim_mm512_maskz_add_epi32 (__mmask16 __U, __m512i __A, __m512i __B) 911288943Sdim{ 912314564Sdim return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, 913314564Sdim (__v16si)_mm512_add_epi32(__A, __B), 914314564Sdim (__v16si)_mm512_setzero_si512()); 915288943Sdim} 916288943Sdim 917341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 918288943Sdim_mm512_sub_epi32 (__m512i __A, __m512i __B) 919288943Sdim{ 920309124Sdim return (__m512i) ((__v16su) __A - (__v16su) __B); 921288943Sdim} 922288943Sdim 923341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 924314564Sdim_mm512_mask_sub_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B) 925288943Sdim{ 926314564Sdim return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, 927314564Sdim (__v16si)_mm512_sub_epi32(__A, __B), 928314564Sdim (__v16si)__W); 929288943Sdim} 930288943Sdim 931341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 932314564Sdim_mm512_maskz_sub_epi32(__mmask16 __U, __m512i __A, __m512i __B) 933288943Sdim{ 934314564Sdim return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, 935314564Sdim (__v16si)_mm512_sub_epi32(__A, __B), 936314564Sdim (__v16si)_mm512_setzero_si512()); 937288943Sdim} 938288943Sdim 939341825Sdim#define _mm512_max_round_pd(A, B, R) \ 940341825Sdim (__m512d)__builtin_ia32_maxpd512((__v8df)(__m512d)(A), \ 941341825Sdim (__v8df)(__m512d)(B), (int)(R)) 942309124Sdim 943341825Sdim#define _mm512_mask_max_round_pd(W, U, A, B, R) \ 944341825Sdim (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ 945341825Sdim (__v8df)_mm512_max_round_pd((A), (B), (R)), \ 946341825Sdim (__v8df)(W)) 947309124Sdim 948341825Sdim#define _mm512_maskz_max_round_pd(U, A, B, R) \ 949341825Sdim (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ 950341825Sdim (__v8df)_mm512_max_round_pd((A), (B), (R)), \ 951341825Sdim (__v8df)_mm512_setzero_pd()) 952309124Sdim 953341825Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS512 954277325Sdim_mm512_max_pd(__m512d __A, __m512d __B) 955277325Sdim{ 956341825Sdim return (__m512d) __builtin_ia32_maxpd512((__v8df) __A, (__v8df) __B, 957341825Sdim _MM_FROUND_CUR_DIRECTION); 958277325Sdim} 959277325Sdim 960341825Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS512 961309124Sdim_mm512_mask_max_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) 962309124Sdim{ 963341825Sdim return (__m512d)__builtin_ia32_selectpd_512(__U, 964341825Sdim (__v8df)_mm512_max_pd(__A, __B), 965341825Sdim (__v8df)__W); 966309124Sdim} 967309124Sdim 968341825Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS512 969309124Sdim_mm512_maskz_max_pd (__mmask8 __U, __m512d __A, __m512d __B) 970309124Sdim{ 971341825Sdim return (__m512d)__builtin_ia32_selectpd_512(__U, 972341825Sdim (__v8df)_mm512_max_pd(__A, __B), 973341825Sdim (__v8df)_mm512_setzero_pd()); 974309124Sdim} 975309124Sdim 976341825Sdim#define _mm512_max_round_ps(A, B, R) \ 977341825Sdim (__m512)__builtin_ia32_maxps512((__v16sf)(__m512)(A), \ 978341825Sdim (__v16sf)(__m512)(B), (int)(R)) 979309124Sdim 980341825Sdim#define _mm512_mask_max_round_ps(W, U, A, B, R) \ 981341825Sdim (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ 982341825Sdim (__v16sf)_mm512_max_round_ps((A), (B), (R)), \ 983341825Sdim (__v16sf)(W)) 984309124Sdim 985341825Sdim#define _mm512_maskz_max_round_ps(U, A, B, R) \ 986341825Sdim (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ 987341825Sdim (__v16sf)_mm512_max_round_ps((A), (B), (R)), \ 988341825Sdim (__v16sf)_mm512_setzero_ps()) 989309124Sdim 990341825Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS512 991277325Sdim_mm512_max_ps(__m512 __A, __m512 __B) 992277325Sdim{ 993341825Sdim return (__m512) __builtin_ia32_maxps512((__v16sf) __A, (__v16sf) __B, 994341825Sdim _MM_FROUND_CUR_DIRECTION); 995277325Sdim} 996277325Sdim 997341825Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS512 998309124Sdim_mm512_mask_max_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) 999309124Sdim{ 1000341825Sdim return (__m512)__builtin_ia32_selectps_512(__U, 1001341825Sdim (__v16sf)_mm512_max_ps(__A, __B), 1002341825Sdim (__v16sf)__W); 1003309124Sdim} 1004309124Sdim 1005341825Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS512 1006309124Sdim_mm512_maskz_max_ps (__mmask16 __U, __m512 __A, __m512 __B) 1007309124Sdim{ 1008341825Sdim return (__m512)__builtin_ia32_selectps_512(__U, 1009341825Sdim (__v16sf)_mm512_max_ps(__A, __B), 1010341825Sdim (__v16sf)_mm512_setzero_ps()); 1011309124Sdim} 1012309124Sdim 1013341825Sdimstatic __inline__ __m128 __DEFAULT_FN_ATTRS128 1014296417Sdim_mm_mask_max_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) { 1015309124Sdim return (__m128) __builtin_ia32_maxss_round_mask ((__v4sf) __A, 1016296417Sdim (__v4sf) __B, 1017296417Sdim (__v4sf) __W, 1018296417Sdim (__mmask8) __U, 1019296417Sdim _MM_FROUND_CUR_DIRECTION); 1020296417Sdim} 1021296417Sdim 1022341825Sdimstatic __inline__ __m128 __DEFAULT_FN_ATTRS128 1023296417Sdim_mm_maskz_max_ss(__mmask8 __U,__m128 __A, __m128 __B) { 1024309124Sdim return (__m128) __builtin_ia32_maxss_round_mask ((__v4sf) __A, 1025296417Sdim (__v4sf) __B, 1026296417Sdim (__v4sf) _mm_setzero_ps (), 1027296417Sdim (__mmask8) __U, 1028296417Sdim _MM_FROUND_CUR_DIRECTION); 1029296417Sdim} 1030296417Sdim 1031341825Sdim#define _mm_max_round_ss(A, B, R) \ 1032309124Sdim (__m128)__builtin_ia32_maxss_round_mask((__v4sf)(__m128)(A), \ 1033309124Sdim (__v4sf)(__m128)(B), \ 1034309124Sdim (__v4sf)_mm_setzero_ps(), \ 1035341825Sdim (__mmask8)-1, (int)(R)) 1036296417Sdim 1037341825Sdim#define _mm_mask_max_round_ss(W, U, A, B, R) \ 1038309124Sdim (__m128)__builtin_ia32_maxss_round_mask((__v4sf)(__m128)(A), \ 1039309124Sdim (__v4sf)(__m128)(B), \ 1040309124Sdim (__v4sf)(__m128)(W), (__mmask8)(U), \ 1041341825Sdim (int)(R)) 1042296417Sdim 1043341825Sdim#define _mm_maskz_max_round_ss(U, A, B, R) \ 1044309124Sdim (__m128)__builtin_ia32_maxss_round_mask((__v4sf)(__m128)(A), \ 1045309124Sdim (__v4sf)(__m128)(B), \ 1046309124Sdim (__v4sf)_mm_setzero_ps(), \ 1047341825Sdim (__mmask8)(U), (int)(R)) 1048296417Sdim 1049341825Sdimstatic __inline__ __m128d __DEFAULT_FN_ATTRS128 1050296417Sdim_mm_mask_max_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) { 1051309124Sdim return (__m128d) __builtin_ia32_maxsd_round_mask ((__v2df) __A, 1052296417Sdim (__v2df) __B, 1053296417Sdim (__v2df) __W, 1054296417Sdim (__mmask8) __U, 1055296417Sdim _MM_FROUND_CUR_DIRECTION); 1056296417Sdim} 1057296417Sdim 1058341825Sdimstatic __inline__ __m128d __DEFAULT_FN_ATTRS128 1059296417Sdim_mm_maskz_max_sd(__mmask8 __U,__m128d __A, __m128d __B) { 1060309124Sdim return (__m128d) __builtin_ia32_maxsd_round_mask ((__v2df) __A, 1061296417Sdim (__v2df) __B, 1062296417Sdim (__v2df) _mm_setzero_pd (), 1063296417Sdim (__mmask8) __U, 1064296417Sdim _MM_FROUND_CUR_DIRECTION); 1065296417Sdim} 1066296417Sdim 1067341825Sdim#define _mm_max_round_sd(A, B, R) \ 1068309124Sdim (__m128d)__builtin_ia32_maxsd_round_mask((__v2df)(__m128d)(A), \ 1069309124Sdim (__v2df)(__m128d)(B), \ 1070309124Sdim (__v2df)_mm_setzero_pd(), \ 1071341825Sdim (__mmask8)-1, (int)(R)) 1072296417Sdim 1073341825Sdim#define _mm_mask_max_round_sd(W, U, A, B, R) \ 1074309124Sdim (__m128d)__builtin_ia32_maxsd_round_mask((__v2df)(__m128d)(A), \ 1075309124Sdim (__v2df)(__m128d)(B), \ 1076309124Sdim (__v2df)(__m128d)(W), \ 1077341825Sdim (__mmask8)(U), (int)(R)) 1078296417Sdim 1079341825Sdim#define _mm_maskz_max_round_sd(U, A, B, R) \ 1080309124Sdim (__m128d)__builtin_ia32_maxsd_round_mask((__v2df)(__m128d)(A), \ 1081309124Sdim (__v2df)(__m128d)(B), \ 1082309124Sdim (__v2df)_mm_setzero_pd(), \ 1083341825Sdim (__mmask8)(U), (int)(R)) 1084296417Sdim 1085277325Sdimstatic __inline __m512i 1086341825Sdim__DEFAULT_FN_ATTRS512 1087277325Sdim_mm512_max_epi32(__m512i __A, __m512i __B) 1088277325Sdim{ 1089341825Sdim return (__m512i)__builtin_ia32_pmaxsd512((__v16si)__A, (__v16si)__B); 1090277325Sdim} 1091277325Sdim 1092341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 1093309124Sdim_mm512_mask_max_epi32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B) 1094309124Sdim{ 1095341825Sdim return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M, 1096341825Sdim (__v16si)_mm512_max_epi32(__A, __B), 1097341825Sdim (__v16si)__W); 1098309124Sdim} 1099309124Sdim 1100341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 1101309124Sdim_mm512_maskz_max_epi32 (__mmask16 __M, __m512i __A, __m512i __B) 1102309124Sdim{ 1103341825Sdim return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M, 1104341825Sdim (__v16si)_mm512_max_epi32(__A, __B), 1105341825Sdim (__v16si)_mm512_setzero_si512()); 1106309124Sdim} 1107309124Sdim 1108341825Sdimstatic __inline __m512i __DEFAULT_FN_ATTRS512 1109277325Sdim_mm512_max_epu32(__m512i __A, __m512i __B) 1110277325Sdim{ 1111341825Sdim return (__m512i)__builtin_ia32_pmaxud512((__v16si)__A, (__v16si)__B); 1112277325Sdim} 1113277325Sdim 1114341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 1115309124Sdim_mm512_mask_max_epu32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B) 1116309124Sdim{ 1117341825Sdim return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M, 1118341825Sdim (__v16si)_mm512_max_epu32(__A, __B), 1119341825Sdim (__v16si)__W); 1120309124Sdim} 1121309124Sdim 1122341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 1123309124Sdim_mm512_maskz_max_epu32 (__mmask16 __M, __m512i __A, __m512i __B) 1124309124Sdim{ 1125341825Sdim return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M, 1126341825Sdim (__v16si)_mm512_max_epu32(__A, __B), 1127341825Sdim (__v16si)_mm512_setzero_si512()); 1128309124Sdim} 1129309124Sdim 1130341825Sdimstatic __inline __m512i __DEFAULT_FN_ATTRS512 1131277325Sdim_mm512_max_epi64(__m512i __A, __m512i __B) 1132277325Sdim{ 1133341825Sdim return (__m512i)__builtin_ia32_pmaxsq512((__v8di)__A, (__v8di)__B); 1134277325Sdim} 1135277325Sdim 1136341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 1137309124Sdim_mm512_mask_max_epi64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B) 1138309124Sdim{ 1139341825Sdim return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M, 1140341825Sdim (__v8di)_mm512_max_epi64(__A, __B), 1141341825Sdim (__v8di)__W); 1142309124Sdim} 1143309124Sdim 1144341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 1145309124Sdim_mm512_maskz_max_epi64 (__mmask8 __M, __m512i __A, __m512i __B) 1146309124Sdim{ 1147341825Sdim return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M, 1148341825Sdim (__v8di)_mm512_max_epi64(__A, __B), 1149341825Sdim (__v8di)_mm512_setzero_si512()); 1150309124Sdim} 1151309124Sdim 1152341825Sdimstatic __inline __m512i __DEFAULT_FN_ATTRS512 1153277325Sdim_mm512_max_epu64(__m512i __A, __m512i __B) 1154277325Sdim{ 1155341825Sdim return (__m512i)__builtin_ia32_pmaxuq512((__v8di)__A, (__v8di)__B); 1156277325Sdim} 1157277325Sdim 1158341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 1159309124Sdim_mm512_mask_max_epu64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B) 1160309124Sdim{ 1161341825Sdim return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M, 1162341825Sdim (__v8di)_mm512_max_epu64(__A, __B), 1163341825Sdim (__v8di)__W); 1164309124Sdim} 1165309124Sdim 1166341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 1167309124Sdim_mm512_maskz_max_epu64 (__mmask8 __M, __m512i __A, __m512i __B) 1168309124Sdim{ 1169341825Sdim return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M, 1170341825Sdim (__v8di)_mm512_max_epu64(__A, __B), 1171341825Sdim (__v8di)_mm512_setzero_si512()); 1172309124Sdim} 1173309124Sdim 1174341825Sdim#define _mm512_min_round_pd(A, B, R) \ 1175341825Sdim (__m512d)__builtin_ia32_minpd512((__v8df)(__m512d)(A), \ 1176341825Sdim (__v8df)(__m512d)(B), (int)(R)) 1177309124Sdim 1178341825Sdim#define _mm512_mask_min_round_pd(W, U, A, B, R) \ 1179341825Sdim (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ 1180341825Sdim (__v8df)_mm512_min_round_pd((A), (B), (R)), \ 1181341825Sdim (__v8df)(W)) 1182309124Sdim 1183341825Sdim#define _mm512_maskz_min_round_pd(U, A, B, R) \ 1184341825Sdim (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ 1185341825Sdim (__v8df)_mm512_min_round_pd((A), (B), (R)), \ 1186341825Sdim (__v8df)_mm512_setzero_pd()) 1187309124Sdim 1188341825Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS512 1189277325Sdim_mm512_min_pd(__m512d __A, __m512d __B) 1190277325Sdim{ 1191341825Sdim return (__m512d) __builtin_ia32_minpd512((__v8df) __A, (__v8df) __B, 1192341825Sdim _MM_FROUND_CUR_DIRECTION); 1193277325Sdim} 1194277325Sdim 1195341825Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS512 1196309124Sdim_mm512_mask_min_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) 1197309124Sdim{ 1198341825Sdim return (__m512d)__builtin_ia32_selectpd_512(__U, 1199341825Sdim (__v8df)_mm512_min_pd(__A, __B), 1200341825Sdim (__v8df)__W); 1201309124Sdim} 1202309124Sdim 1203341825Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS512 1204309124Sdim_mm512_maskz_min_pd (__mmask8 __U, __m512d __A, __m512d __B) 1205309124Sdim{ 1206341825Sdim return (__m512d)__builtin_ia32_selectpd_512(__U, 1207341825Sdim (__v8df)_mm512_min_pd(__A, __B), 1208341825Sdim (__v8df)_mm512_setzero_pd()); 1209309124Sdim} 1210309124Sdim 1211341825Sdim#define _mm512_min_round_ps(A, B, R) \ 1212341825Sdim (__m512)__builtin_ia32_minps512((__v16sf)(__m512)(A), \ 1213341825Sdim (__v16sf)(__m512)(B), (int)(R)) 1214341825Sdim 1215341825Sdim#define _mm512_mask_min_round_ps(W, U, A, B, R) \ 1216341825Sdim (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ 1217341825Sdim (__v16sf)_mm512_min_round_ps((A), (B), (R)), \ 1218341825Sdim (__v16sf)(W)) 1219341825Sdim 1220341825Sdim#define _mm512_maskz_min_round_ps(U, A, B, R) \ 1221341825Sdim (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ 1222341825Sdim (__v16sf)_mm512_min_round_ps((A), (B), (R)), \ 1223341825Sdim (__v16sf)_mm512_setzero_ps()) 1224341825Sdim 1225341825Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS512 1226277325Sdim_mm512_min_ps(__m512 __A, __m512 __B) 1227277325Sdim{ 1228341825Sdim return (__m512) __builtin_ia32_minps512((__v16sf) __A, (__v16sf) __B, 1229341825Sdim _MM_FROUND_CUR_DIRECTION); 1230277325Sdim} 1231277325Sdim 1232341825Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS512 1233309124Sdim_mm512_mask_min_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) 1234309124Sdim{ 1235341825Sdim return (__m512)__builtin_ia32_selectps_512(__U, 1236341825Sdim (__v16sf)_mm512_min_ps(__A, __B), 1237341825Sdim (__v16sf)__W); 1238309124Sdim} 1239309124Sdim 1240341825Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS512 1241309124Sdim_mm512_maskz_min_ps (__mmask16 __U, __m512 __A, __m512 __B) 1242309124Sdim{ 1243341825Sdim return (__m512)__builtin_ia32_selectps_512(__U, 1244341825Sdim (__v16sf)_mm512_min_ps(__A, __B), 1245341825Sdim (__v16sf)_mm512_setzero_ps()); 1246309124Sdim} 1247309124Sdim 1248341825Sdimstatic __inline__ __m128 __DEFAULT_FN_ATTRS128 1249296417Sdim_mm_mask_min_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) { 1250309124Sdim return (__m128) __builtin_ia32_minss_round_mask ((__v4sf) __A, 1251296417Sdim (__v4sf) __B, 1252296417Sdim (__v4sf) __W, 1253296417Sdim (__mmask8) __U, 1254296417Sdim _MM_FROUND_CUR_DIRECTION); 1255296417Sdim} 1256296417Sdim 1257341825Sdimstatic __inline__ __m128 __DEFAULT_FN_ATTRS128 1258296417Sdim_mm_maskz_min_ss(__mmask8 __U,__m128 __A, __m128 __B) { 1259309124Sdim return (__m128) __builtin_ia32_minss_round_mask ((__v4sf) __A, 1260296417Sdim (__v4sf) __B, 1261296417Sdim (__v4sf) _mm_setzero_ps (), 1262296417Sdim (__mmask8) __U, 1263296417Sdim _MM_FROUND_CUR_DIRECTION); 1264296417Sdim} 1265296417Sdim 1266341825Sdim#define _mm_min_round_ss(A, B, R) \ 1267309124Sdim (__m128)__builtin_ia32_minss_round_mask((__v4sf)(__m128)(A), \ 1268309124Sdim (__v4sf)(__m128)(B), \ 1269309124Sdim (__v4sf)_mm_setzero_ps(), \ 1270341825Sdim (__mmask8)-1, (int)(R)) 1271296417Sdim 1272341825Sdim#define _mm_mask_min_round_ss(W, U, A, B, R) \ 1273309124Sdim (__m128)__builtin_ia32_minss_round_mask((__v4sf)(__m128)(A), \ 1274309124Sdim (__v4sf)(__m128)(B), \ 1275309124Sdim (__v4sf)(__m128)(W), (__mmask8)(U), \ 1276341825Sdim (int)(R)) 1277296417Sdim 1278341825Sdim#define _mm_maskz_min_round_ss(U, A, B, R) \ 1279309124Sdim (__m128)__builtin_ia32_minss_round_mask((__v4sf)(__m128)(A), \ 1280309124Sdim (__v4sf)(__m128)(B), \ 1281309124Sdim (__v4sf)_mm_setzero_ps(), \ 1282341825Sdim (__mmask8)(U), (int)(R)) 1283296417Sdim 1284341825Sdimstatic __inline__ __m128d __DEFAULT_FN_ATTRS128 1285296417Sdim_mm_mask_min_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) { 1286309124Sdim return (__m128d) __builtin_ia32_minsd_round_mask ((__v2df) __A, 1287296417Sdim (__v2df) __B, 1288296417Sdim (__v2df) __W, 1289296417Sdim (__mmask8) __U, 1290296417Sdim _MM_FROUND_CUR_DIRECTION); 1291296417Sdim} 1292296417Sdim 1293341825Sdimstatic __inline__ __m128d __DEFAULT_FN_ATTRS128 1294296417Sdim_mm_maskz_min_sd(__mmask8 __U,__m128d __A, __m128d __B) { 1295309124Sdim return (__m128d) __builtin_ia32_minsd_round_mask ((__v2df) __A, 1296296417Sdim (__v2df) __B, 1297296417Sdim (__v2df) _mm_setzero_pd (), 1298296417Sdim (__mmask8) __U, 1299296417Sdim _MM_FROUND_CUR_DIRECTION); 1300296417Sdim} 1301296417Sdim 1302341825Sdim#define _mm_min_round_sd(A, B, R) \ 1303309124Sdim (__m128d)__builtin_ia32_minsd_round_mask((__v2df)(__m128d)(A), \ 1304309124Sdim (__v2df)(__m128d)(B), \ 1305309124Sdim (__v2df)_mm_setzero_pd(), \ 1306341825Sdim (__mmask8)-1, (int)(R)) 1307296417Sdim 1308341825Sdim#define _mm_mask_min_round_sd(W, U, A, B, R) \ 1309309124Sdim (__m128d)__builtin_ia32_minsd_round_mask((__v2df)(__m128d)(A), \ 1310309124Sdim (__v2df)(__m128d)(B), \ 1311309124Sdim (__v2df)(__m128d)(W), \ 1312341825Sdim (__mmask8)(U), (int)(R)) 1313296417Sdim 1314341825Sdim#define _mm_maskz_min_round_sd(U, A, B, R) \ 1315309124Sdim (__m128d)__builtin_ia32_minsd_round_mask((__v2df)(__m128d)(A), \ 1316309124Sdim (__v2df)(__m128d)(B), \ 1317309124Sdim (__v2df)_mm_setzero_pd(), \ 1318341825Sdim (__mmask8)(U), (int)(R)) 1319296417Sdim 1320277325Sdimstatic __inline __m512i 1321341825Sdim__DEFAULT_FN_ATTRS512 1322277325Sdim_mm512_min_epi32(__m512i __A, __m512i __B) 1323277325Sdim{ 1324341825Sdim return (__m512i)__builtin_ia32_pminsd512((__v16si)__A, (__v16si)__B); 1325277325Sdim} 1326277325Sdim 1327341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 1328309124Sdim_mm512_mask_min_epi32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B) 1329309124Sdim{ 1330341825Sdim return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M, 1331341825Sdim (__v16si)_mm512_min_epi32(__A, __B), 1332341825Sdim (__v16si)__W); 1333309124Sdim} 1334309124Sdim 1335341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 1336309124Sdim_mm512_maskz_min_epi32 (__mmask16 __M, __m512i __A, __m512i __B) 1337309124Sdim{ 1338341825Sdim return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M, 1339341825Sdim (__v16si)_mm512_min_epi32(__A, __B), 1340341825Sdim (__v16si)_mm512_setzero_si512()); 1341309124Sdim} 1342309124Sdim 1343341825Sdimstatic __inline __m512i __DEFAULT_FN_ATTRS512 1344277325Sdim_mm512_min_epu32(__m512i __A, __m512i __B) 1345277325Sdim{ 1346341825Sdim return (__m512i)__builtin_ia32_pminud512((__v16si)__A, (__v16si)__B); 1347277325Sdim} 1348277325Sdim 1349341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 1350309124Sdim_mm512_mask_min_epu32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B) 1351309124Sdim{ 1352341825Sdim return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M, 1353341825Sdim (__v16si)_mm512_min_epu32(__A, __B), 1354341825Sdim (__v16si)__W); 1355309124Sdim} 1356309124Sdim 1357341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 1358309124Sdim_mm512_maskz_min_epu32 (__mmask16 __M, __m512i __A, __m512i __B) 1359309124Sdim{ 1360341825Sdim return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M, 1361341825Sdim (__v16si)_mm512_min_epu32(__A, __B), 1362341825Sdim (__v16si)_mm512_setzero_si512()); 1363309124Sdim} 1364309124Sdim 1365341825Sdimstatic __inline __m512i __DEFAULT_FN_ATTRS512 1366277325Sdim_mm512_min_epi64(__m512i __A, __m512i __B) 1367277325Sdim{ 1368341825Sdim return (__m512i)__builtin_ia32_pminsq512((__v8di)__A, (__v8di)__B); 1369277325Sdim} 1370277325Sdim 1371341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 1372309124Sdim_mm512_mask_min_epi64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B) 1373309124Sdim{ 1374341825Sdim return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M, 1375341825Sdim (__v8di)_mm512_min_epi64(__A, __B), 1376341825Sdim (__v8di)__W); 1377309124Sdim} 1378309124Sdim 1379341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 1380309124Sdim_mm512_maskz_min_epi64 (__mmask8 __M, __m512i __A, __m512i __B) 1381309124Sdim{ 1382341825Sdim return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M, 1383341825Sdim (__v8di)_mm512_min_epi64(__A, __B), 1384341825Sdim (__v8di)_mm512_setzero_si512()); 1385309124Sdim} 1386309124Sdim 1387341825Sdimstatic __inline __m512i __DEFAULT_FN_ATTRS512 1388277325Sdim_mm512_min_epu64(__m512i __A, __m512i __B) 1389277325Sdim{ 1390341825Sdim return (__m512i)__builtin_ia32_pminuq512((__v8di)__A, (__v8di)__B); 1391277325Sdim} 1392277325Sdim 1393341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 1394309124Sdim_mm512_mask_min_epu64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B) 1395309124Sdim{ 1396341825Sdim return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M, 1397341825Sdim (__v8di)_mm512_min_epu64(__A, __B), 1398341825Sdim (__v8di)__W); 1399309124Sdim} 1400309124Sdim 1401341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 1402309124Sdim_mm512_maskz_min_epu64 (__mmask8 __M, __m512i __A, __m512i __B) 1403309124Sdim{ 1404341825Sdim return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M, 1405341825Sdim (__v8di)_mm512_min_epu64(__A, __B), 1406341825Sdim (__v8di)_mm512_setzero_si512()); 1407309124Sdim} 1408309124Sdim 1409341825Sdimstatic __inline __m512i __DEFAULT_FN_ATTRS512 1410277325Sdim_mm512_mul_epi32(__m512i __X, __m512i __Y) 1411277325Sdim{ 1412314564Sdim return (__m512i)__builtin_ia32_pmuldq512((__v16si)__X, (__v16si) __Y); 1413277325Sdim} 1414277325Sdim 1415341825Sdimstatic __inline __m512i __DEFAULT_FN_ATTRS512 1416314564Sdim_mm512_mask_mul_epi32(__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y) 1417288943Sdim{ 1418314564Sdim return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M, 1419314564Sdim (__v8di)_mm512_mul_epi32(__X, __Y), 1420314564Sdim (__v8di)__W); 1421288943Sdim} 1422288943Sdim 1423341825Sdimstatic __inline __m512i __DEFAULT_FN_ATTRS512 1424314564Sdim_mm512_maskz_mul_epi32(__mmask8 __M, __m512i __X, __m512i __Y) 1425288943Sdim{ 1426314564Sdim return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M, 1427314564Sdim (__v8di)_mm512_mul_epi32(__X, __Y), 1428314564Sdim (__v8di)_mm512_setzero_si512 ()); 1429288943Sdim} 1430288943Sdim 1431341825Sdimstatic __inline __m512i __DEFAULT_FN_ATTRS512 1432277325Sdim_mm512_mul_epu32(__m512i __X, __m512i __Y) 1433277325Sdim{ 1434314564Sdim return (__m512i)__builtin_ia32_pmuludq512((__v16si)__X, (__v16si)__Y); 1435277325Sdim} 1436277325Sdim 1437341825Sdimstatic __inline __m512i __DEFAULT_FN_ATTRS512 1438314564Sdim_mm512_mask_mul_epu32(__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y) 1439288943Sdim{ 1440314564Sdim return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M, 1441314564Sdim (__v8di)_mm512_mul_epu32(__X, __Y), 1442314564Sdim (__v8di)__W); 1443288943Sdim} 1444288943Sdim 1445341825Sdimstatic __inline __m512i __DEFAULT_FN_ATTRS512 1446314564Sdim_mm512_maskz_mul_epu32(__mmask8 __M, __m512i __X, __m512i __Y) 1447288943Sdim{ 1448314564Sdim return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M, 1449314564Sdim (__v8di)_mm512_mul_epu32(__X, __Y), 1450314564Sdim (__v8di)_mm512_setzero_si512 ()); 1451288943Sdim} 1452288943Sdim 1453341825Sdimstatic __inline __m512i __DEFAULT_FN_ATTRS512 1454288943Sdim_mm512_mullo_epi32 (__m512i __A, __m512i __B) 1455288943Sdim{ 1456309124Sdim return (__m512i) ((__v16su) __A * (__v16su) __B); 1457288943Sdim} 1458288943Sdim 1459341825Sdimstatic __inline __m512i __DEFAULT_FN_ATTRS512 1460314564Sdim_mm512_maskz_mullo_epi32(__mmask16 __M, __m512i __A, __m512i __B) 1461288943Sdim{ 1462314564Sdim return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M, 1463314564Sdim (__v16si)_mm512_mullo_epi32(__A, __B), 1464314564Sdim (__v16si)_mm512_setzero_si512()); 1465288943Sdim} 1466288943Sdim 1467341825Sdimstatic __inline __m512i __DEFAULT_FN_ATTRS512 1468314564Sdim_mm512_mask_mullo_epi32(__m512i __W, __mmask16 __M, __m512i __A, __m512i __B) 1469288943Sdim{ 1470314564Sdim return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M, 1471314564Sdim (__v16si)_mm512_mullo_epi32(__A, __B), 1472314564Sdim (__v16si)__W); 1473288943Sdim} 1474288943Sdim 1475341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 1476341825Sdim_mm512_mullox_epi64 (__m512i __A, __m512i __B) { 1477341825Sdim return (__m512i) ((__v8du) __A * (__v8du) __B); 1478341825Sdim} 1479309124Sdim 1480341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 1481341825Sdim_mm512_mask_mullox_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) { 1482341825Sdim return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, 1483341825Sdim (__v8di)_mm512_mullox_epi64(__A, __B), 1484341825Sdim (__v8di)__W); 1485341825Sdim} 1486309124Sdim 1487341825Sdim#define _mm512_sqrt_round_pd(A, R) \ 1488341825Sdim (__m512d)__builtin_ia32_sqrtpd512((__v8df)(__m512d)(A), (int)(R)) 1489309124Sdim 1490341825Sdim#define _mm512_mask_sqrt_round_pd(W, U, A, R) \ 1491341825Sdim (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ 1492341825Sdim (__v8df)_mm512_sqrt_round_pd((A), (R)), \ 1493341825Sdim (__v8df)(__m512d)(W)) 1494341825Sdim 1495341825Sdim#define _mm512_maskz_sqrt_round_pd(U, A, R) \ 1496341825Sdim (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ 1497341825Sdim (__v8df)_mm512_sqrt_round_pd((A), (R)), \ 1498341825Sdim (__v8df)_mm512_setzero_pd()) 1499341825Sdim 1500341825Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS512 1501341825Sdim_mm512_sqrt_pd(__m512d __A) 1502277325Sdim{ 1503341825Sdim return (__m512d)__builtin_ia32_sqrtpd512((__v8df)__A, 1504341825Sdim _MM_FROUND_CUR_DIRECTION); 1505277325Sdim} 1506277325Sdim 1507341825Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS512 1508309124Sdim_mm512_mask_sqrt_pd (__m512d __W, __mmask8 __U, __m512d __A) 1509309124Sdim{ 1510341825Sdim return (__m512d)__builtin_ia32_selectpd_512(__U, 1511341825Sdim (__v8df)_mm512_sqrt_pd(__A), 1512341825Sdim (__v8df)__W); 1513309124Sdim} 1514309124Sdim 1515341825Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS512 1516309124Sdim_mm512_maskz_sqrt_pd (__mmask8 __U, __m512d __A) 1517309124Sdim{ 1518341825Sdim return (__m512d)__builtin_ia32_selectpd_512(__U, 1519341825Sdim (__v8df)_mm512_sqrt_pd(__A), 1520341825Sdim (__v8df)_mm512_setzero_pd()); 1521309124Sdim} 1522309124Sdim 1523341825Sdim#define _mm512_sqrt_round_ps(A, R) \ 1524341825Sdim (__m512)__builtin_ia32_sqrtps512((__v16sf)(__m512)(A), (int)(R)) 1525309124Sdim 1526341825Sdim#define _mm512_mask_sqrt_round_ps(W, U, A, R) \ 1527341825Sdim (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ 1528341825Sdim (__v16sf)_mm512_sqrt_round_ps((A), (R)), \ 1529341825Sdim (__v16sf)(__m512)(W)) 1530309124Sdim 1531341825Sdim#define _mm512_maskz_sqrt_round_ps(U, A, R) \ 1532341825Sdim (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ 1533341825Sdim (__v16sf)_mm512_sqrt_round_ps((A), (R)), \ 1534341825Sdim (__v16sf)_mm512_setzero_ps()) 1535309124Sdim 1536341825Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS512 1537341825Sdim_mm512_sqrt_ps(__m512 __A) 1538277325Sdim{ 1539341825Sdim return (__m512)__builtin_ia32_sqrtps512((__v16sf)__A, 1540341825Sdim _MM_FROUND_CUR_DIRECTION); 1541277325Sdim} 1542277325Sdim 1543341825Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS512 1544309124Sdim_mm512_mask_sqrt_ps(__m512 __W, __mmask16 __U, __m512 __A) 1545309124Sdim{ 1546341825Sdim return (__m512)__builtin_ia32_selectps_512(__U, 1547341825Sdim (__v16sf)_mm512_sqrt_ps(__A), 1548341825Sdim (__v16sf)__W); 1549309124Sdim} 1550309124Sdim 1551341825Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS512 1552309124Sdim_mm512_maskz_sqrt_ps( __mmask16 __U, __m512 __A) 1553309124Sdim{ 1554341825Sdim return (__m512)__builtin_ia32_selectps_512(__U, 1555341825Sdim (__v16sf)_mm512_sqrt_ps(__A), 1556341825Sdim (__v16sf)_mm512_setzero_ps()); 1557309124Sdim} 1558309124Sdim 1559341825Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS512 1560277325Sdim_mm512_rsqrt14_pd(__m512d __A) 1561277325Sdim{ 1562277325Sdim return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A, 1563277325Sdim (__v8df) 1564277325Sdim _mm512_setzero_pd (), 1565277325Sdim (__mmask8) -1);} 1566277325Sdim 1567341825Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS512 1568309124Sdim_mm512_mask_rsqrt14_pd (__m512d __W, __mmask8 __U, __m512d __A) 1569309124Sdim{ 1570309124Sdim return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A, 1571309124Sdim (__v8df) __W, 1572309124Sdim (__mmask8) __U); 1573309124Sdim} 1574309124Sdim 1575341825Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS512 1576309124Sdim_mm512_maskz_rsqrt14_pd (__mmask8 __U, __m512d __A) 1577309124Sdim{ 1578309124Sdim return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A, 1579309124Sdim (__v8df) 1580309124Sdim _mm512_setzero_pd (), 1581309124Sdim (__mmask8) __U); 1582309124Sdim} 1583309124Sdim 1584341825Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS512 1585277325Sdim_mm512_rsqrt14_ps(__m512 __A) 1586277325Sdim{ 1587277325Sdim return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A, 1588277325Sdim (__v16sf) 1589277325Sdim _mm512_setzero_ps (), 1590277325Sdim (__mmask16) -1); 1591277325Sdim} 1592277325Sdim 1593341825Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS512 1594309124Sdim_mm512_mask_rsqrt14_ps (__m512 __W, __mmask16 __U, __m512 __A) 1595309124Sdim{ 1596309124Sdim return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A, 1597309124Sdim (__v16sf) __W, 1598309124Sdim (__mmask16) __U); 1599309124Sdim} 1600309124Sdim 1601341825Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS512 1602309124Sdim_mm512_maskz_rsqrt14_ps (__mmask16 __U, __m512 __A) 1603309124Sdim{ 1604309124Sdim return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A, 1605309124Sdim (__v16sf) 1606309124Sdim _mm512_setzero_ps (), 1607309124Sdim (__mmask16) __U); 1608309124Sdim} 1609309124Sdim 1610341825Sdimstatic __inline__ __m128 __DEFAULT_FN_ATTRS128 1611277325Sdim_mm_rsqrt14_ss(__m128 __A, __m128 __B) 1612277325Sdim{ 1613309124Sdim return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __A, 1614277325Sdim (__v4sf) __B, 1615277325Sdim (__v4sf) 1616277325Sdim _mm_setzero_ps (), 1617277325Sdim (__mmask8) -1); 1618277325Sdim} 1619277325Sdim 1620341825Sdimstatic __inline__ __m128 __DEFAULT_FN_ATTRS128 1621309124Sdim_mm_mask_rsqrt14_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) 1622309124Sdim{ 1623309124Sdim return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __A, 1624309124Sdim (__v4sf) __B, 1625309124Sdim (__v4sf) __W, 1626309124Sdim (__mmask8) __U); 1627309124Sdim} 1628309124Sdim 1629341825Sdimstatic __inline__ __m128 __DEFAULT_FN_ATTRS128 1630309124Sdim_mm_maskz_rsqrt14_ss (__mmask8 __U, __m128 __A, __m128 __B) 1631309124Sdim{ 1632309124Sdim return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __A, 1633309124Sdim (__v4sf) __B, 1634309124Sdim (__v4sf) _mm_setzero_ps (), 1635309124Sdim (__mmask8) __U); 1636309124Sdim} 1637309124Sdim 1638341825Sdimstatic __inline__ __m128d __DEFAULT_FN_ATTRS128 1639277325Sdim_mm_rsqrt14_sd(__m128d __A, __m128d __B) 1640277325Sdim{ 1641309124Sdim return (__m128d) __builtin_ia32_rsqrt14sd_mask ((__v2df) __A, 1642277325Sdim (__v2df) __B, 1643277325Sdim (__v2df) 1644277325Sdim _mm_setzero_pd (), 1645277325Sdim (__mmask8) -1); 1646277325Sdim} 1647277325Sdim 1648341825Sdimstatic __inline__ __m128d __DEFAULT_FN_ATTRS128 1649309124Sdim_mm_mask_rsqrt14_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) 1650309124Sdim{ 1651309124Sdim return (__m128d) __builtin_ia32_rsqrt14sd_mask ( (__v2df) __A, 1652309124Sdim (__v2df) __B, 1653309124Sdim (__v2df) __W, 1654309124Sdim (__mmask8) __U); 1655309124Sdim} 1656309124Sdim 1657341825Sdimstatic __inline__ __m128d __DEFAULT_FN_ATTRS128 1658309124Sdim_mm_maskz_rsqrt14_sd (__mmask8 __U, __m128d __A, __m128d __B) 1659309124Sdim{ 1660309124Sdim return (__m128d) __builtin_ia32_rsqrt14sd_mask ( (__v2df) __A, 1661309124Sdim (__v2df) __B, 1662309124Sdim (__v2df) _mm_setzero_pd (), 1663309124Sdim (__mmask8) __U); 1664309124Sdim} 1665309124Sdim 1666341825Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS512 1667277325Sdim_mm512_rcp14_pd(__m512d __A) 1668277325Sdim{ 1669277325Sdim return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A, 1670277325Sdim (__v8df) 1671277325Sdim _mm512_setzero_pd (), 1672277325Sdim (__mmask8) -1); 1673277325Sdim} 1674277325Sdim 1675341825Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS512 1676309124Sdim_mm512_mask_rcp14_pd (__m512d __W, __mmask8 __U, __m512d __A) 1677309124Sdim{ 1678309124Sdim return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A, 1679309124Sdim (__v8df) __W, 1680309124Sdim (__mmask8) __U); 1681309124Sdim} 1682309124Sdim 1683341825Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS512 1684309124Sdim_mm512_maskz_rcp14_pd (__mmask8 __U, __m512d __A) 1685309124Sdim{ 1686309124Sdim return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A, 1687309124Sdim (__v8df) 1688309124Sdim _mm512_setzero_pd (), 1689309124Sdim (__mmask8) __U); 1690309124Sdim} 1691309124Sdim 1692341825Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS512 1693277325Sdim_mm512_rcp14_ps(__m512 __A) 1694277325Sdim{ 1695277325Sdim return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A, 1696277325Sdim (__v16sf) 1697277325Sdim _mm512_setzero_ps (), 1698277325Sdim (__mmask16) -1); 1699277325Sdim} 1700309124Sdim 1701341825Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS512 1702309124Sdim_mm512_mask_rcp14_ps (__m512 __W, __mmask16 __U, __m512 __A) 1703309124Sdim{ 1704309124Sdim return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A, 1705309124Sdim (__v16sf) __W, 1706309124Sdim (__mmask16) __U); 1707309124Sdim} 1708309124Sdim 1709341825Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS512 1710309124Sdim_mm512_maskz_rcp14_ps (__mmask16 __U, __m512 __A) 1711309124Sdim{ 1712309124Sdim return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A, 1713309124Sdim (__v16sf) 1714309124Sdim _mm512_setzero_ps (), 1715309124Sdim (__mmask16) __U); 1716309124Sdim} 1717309124Sdim 1718341825Sdimstatic __inline__ __m128 __DEFAULT_FN_ATTRS128 1719277325Sdim_mm_rcp14_ss(__m128 __A, __m128 __B) 1720277325Sdim{ 1721309124Sdim return (__m128) __builtin_ia32_rcp14ss_mask ((__v4sf) __A, 1722277325Sdim (__v4sf) __B, 1723277325Sdim (__v4sf) 1724277325Sdim _mm_setzero_ps (), 1725277325Sdim (__mmask8) -1); 1726277325Sdim} 1727277325Sdim 1728341825Sdimstatic __inline__ __m128 __DEFAULT_FN_ATTRS128 1729309124Sdim_mm_mask_rcp14_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) 1730309124Sdim{ 1731309124Sdim return (__m128) __builtin_ia32_rcp14ss_mask ((__v4sf) __A, 1732309124Sdim (__v4sf) __B, 1733309124Sdim (__v4sf) __W, 1734309124Sdim (__mmask8) __U); 1735309124Sdim} 1736309124Sdim 1737341825Sdimstatic __inline__ __m128 __DEFAULT_FN_ATTRS128 1738309124Sdim_mm_maskz_rcp14_ss (__mmask8 __U, __m128 __A, __m128 __B) 1739309124Sdim{ 1740309124Sdim return (__m128) __builtin_ia32_rcp14ss_mask ((__v4sf) __A, 1741309124Sdim (__v4sf) __B, 1742309124Sdim (__v4sf) _mm_setzero_ps (), 1743309124Sdim (__mmask8) __U); 1744309124Sdim} 1745309124Sdim 1746341825Sdimstatic __inline__ __m128d __DEFAULT_FN_ATTRS128 1747277325Sdim_mm_rcp14_sd(__m128d __A, __m128d __B) 1748277325Sdim{ 1749309124Sdim return (__m128d) __builtin_ia32_rcp14sd_mask ((__v2df) __A, 1750277325Sdim (__v2df) __B, 1751277325Sdim (__v2df) 1752277325Sdim _mm_setzero_pd (), 1753277325Sdim (__mmask8) -1); 1754277325Sdim} 1755277325Sdim 1756341825Sdimstatic __inline__ __m128d __DEFAULT_FN_ATTRS128 1757309124Sdim_mm_mask_rcp14_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) 1758309124Sdim{ 1759309124Sdim return (__m128d) __builtin_ia32_rcp14sd_mask ( (__v2df) __A, 1760309124Sdim (__v2df) __B, 1761309124Sdim (__v2df) __W, 1762309124Sdim (__mmask8) __U); 1763309124Sdim} 1764309124Sdim 1765341825Sdimstatic __inline__ __m128d __DEFAULT_FN_ATTRS128 1766309124Sdim_mm_maskz_rcp14_sd (__mmask8 __U, __m128d __A, __m128d __B) 1767309124Sdim{ 1768309124Sdim return (__m128d) __builtin_ia32_rcp14sd_mask ( (__v2df) __A, 1769309124Sdim (__v2df) __B, 1770309124Sdim (__v2df) _mm_setzero_pd (), 1771309124Sdim (__mmask8) __U); 1772309124Sdim} 1773309124Sdim 1774341825Sdimstatic __inline __m512 __DEFAULT_FN_ATTRS512 1775277325Sdim_mm512_floor_ps(__m512 __A) 1776277325Sdim{ 1777277325Sdim return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A, 1778277325Sdim _MM_FROUND_FLOOR, 1779277325Sdim (__v16sf) __A, -1, 1780277325Sdim _MM_FROUND_CUR_DIRECTION); 1781277325Sdim} 1782277325Sdim 1783341825Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS512 1784309124Sdim_mm512_mask_floor_ps (__m512 __W, __mmask16 __U, __m512 __A) 1785309124Sdim{ 1786309124Sdim return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A, 1787309124Sdim _MM_FROUND_FLOOR, 1788309124Sdim (__v16sf) __W, __U, 1789309124Sdim _MM_FROUND_CUR_DIRECTION); 1790309124Sdim} 1791309124Sdim 1792341825Sdimstatic __inline __m512d __DEFAULT_FN_ATTRS512 1793277325Sdim_mm512_floor_pd(__m512d __A) 1794277325Sdim{ 1795277325Sdim return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A, 1796277325Sdim _MM_FROUND_FLOOR, 1797277325Sdim (__v8df) __A, -1, 1798277325Sdim _MM_FROUND_CUR_DIRECTION); 1799277325Sdim} 1800277325Sdim 1801341825Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS512 1802309124Sdim_mm512_mask_floor_pd (__m512d __W, __mmask8 __U, __m512d __A) 1803309124Sdim{ 1804309124Sdim return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A, 1805309124Sdim _MM_FROUND_FLOOR, 1806309124Sdim (__v8df) __W, __U, 1807309124Sdim _MM_FROUND_CUR_DIRECTION); 1808309124Sdim} 1809309124Sdim 1810341825Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS512 1811309124Sdim_mm512_mask_ceil_ps (__m512 __W, __mmask16 __U, __m512 __A) 1812309124Sdim{ 1813309124Sdim return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A, 1814309124Sdim _MM_FROUND_CEIL, 1815309124Sdim (__v16sf) __W, __U, 1816309124Sdim _MM_FROUND_CUR_DIRECTION); 1817309124Sdim} 1818309124Sdim 1819341825Sdimstatic __inline __m512 __DEFAULT_FN_ATTRS512 1820277325Sdim_mm512_ceil_ps(__m512 __A) 1821277325Sdim{ 1822277325Sdim return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A, 1823277325Sdim _MM_FROUND_CEIL, 1824277325Sdim (__v16sf) __A, -1, 1825277325Sdim _MM_FROUND_CUR_DIRECTION); 1826277325Sdim} 1827277325Sdim 1828341825Sdimstatic __inline __m512d __DEFAULT_FN_ATTRS512 1829277325Sdim_mm512_ceil_pd(__m512d __A) 1830277325Sdim{ 1831277325Sdim return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A, 1832277325Sdim _MM_FROUND_CEIL, 1833277325Sdim (__v8df) __A, -1, 1834277325Sdim _MM_FROUND_CUR_DIRECTION); 1835277325Sdim} 1836277325Sdim 1837341825Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS512 1838309124Sdim_mm512_mask_ceil_pd (__m512d __W, __mmask8 __U, __m512d __A) 1839309124Sdim{ 1840309124Sdim return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A, 1841309124Sdim _MM_FROUND_CEIL, 1842309124Sdim (__v8df) __W, __U, 1843309124Sdim _MM_FROUND_CUR_DIRECTION); 1844309124Sdim} 1845309124Sdim 1846341825Sdimstatic __inline __m512i __DEFAULT_FN_ATTRS512 1847277325Sdim_mm512_abs_epi64(__m512i __A) 1848277325Sdim{ 1849341825Sdim return (__m512i)__builtin_ia32_pabsq512((__v8di)__A); 1850277325Sdim} 1851277325Sdim 1852341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 1853309124Sdim_mm512_mask_abs_epi64 (__m512i __W, __mmask8 __U, __m512i __A) 1854309124Sdim{ 1855341825Sdim return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, 1856341825Sdim (__v8di)_mm512_abs_epi64(__A), 1857341825Sdim (__v8di)__W); 1858309124Sdim} 1859309124Sdim 1860341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 1861309124Sdim_mm512_maskz_abs_epi64 (__mmask8 __U, __m512i __A) 1862309124Sdim{ 1863341825Sdim return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, 1864341825Sdim (__v8di)_mm512_abs_epi64(__A), 1865341825Sdim (__v8di)_mm512_setzero_si512()); 1866309124Sdim} 1867309124Sdim 1868341825Sdimstatic __inline __m512i __DEFAULT_FN_ATTRS512 1869277325Sdim_mm512_abs_epi32(__m512i __A) 1870277325Sdim{ 1871341825Sdim return (__m512i)__builtin_ia32_pabsd512((__v16si) __A); 1872277325Sdim} 1873277325Sdim 1874341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 1875309124Sdim_mm512_mask_abs_epi32 (__m512i __W, __mmask16 __U, __m512i __A) 1876309124Sdim{ 1877341825Sdim return (__m512i)__builtin_ia32_selectd_512(__U, 1878341825Sdim (__v16si)_mm512_abs_epi32(__A), 1879341825Sdim (__v16si)__W); 1880309124Sdim} 1881309124Sdim 1882341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 1883309124Sdim_mm512_maskz_abs_epi32 (__mmask16 __U, __m512i __A) 1884309124Sdim{ 1885341825Sdim return (__m512i)__builtin_ia32_selectd_512(__U, 1886341825Sdim (__v16si)_mm512_abs_epi32(__A), 1887341825Sdim (__v16si)_mm512_setzero_si512()); 1888309124Sdim} 1889309124Sdim 1890341825Sdimstatic __inline__ __m128 __DEFAULT_FN_ATTRS128 1891296417Sdim_mm_mask_add_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) { 1892341825Sdim __A = _mm_add_ss(__A, __B); 1893341825Sdim return __builtin_ia32_selectss_128(__U, __A, __W); 1894296417Sdim} 1895296417Sdim 1896341825Sdimstatic __inline__ __m128 __DEFAULT_FN_ATTRS128 1897296417Sdim_mm_maskz_add_ss(__mmask8 __U,__m128 __A, __m128 __B) { 1898341825Sdim __A = _mm_add_ss(__A, __B); 1899341825Sdim return __builtin_ia32_selectss_128(__U, __A, _mm_setzero_ps()); 1900296417Sdim} 1901296417Sdim 1902341825Sdim#define _mm_add_round_ss(A, B, R) \ 1903309124Sdim (__m128)__builtin_ia32_addss_round_mask((__v4sf)(__m128)(A), \ 1904309124Sdim (__v4sf)(__m128)(B), \ 1905309124Sdim (__v4sf)_mm_setzero_ps(), \ 1906341825Sdim (__mmask8)-1, (int)(R)) 1907296417Sdim 1908341825Sdim#define _mm_mask_add_round_ss(W, U, A, B, R) \ 1909309124Sdim (__m128)__builtin_ia32_addss_round_mask((__v4sf)(__m128)(A), \ 1910309124Sdim (__v4sf)(__m128)(B), \ 1911309124Sdim (__v4sf)(__m128)(W), (__mmask8)(U), \ 1912341825Sdim (int)(R)) 1913296417Sdim 1914341825Sdim#define _mm_maskz_add_round_ss(U, A, B, R) \ 1915309124Sdim (__m128)__builtin_ia32_addss_round_mask((__v4sf)(__m128)(A), \ 1916309124Sdim (__v4sf)(__m128)(B), \ 1917309124Sdim (__v4sf)_mm_setzero_ps(), \ 1918341825Sdim (__mmask8)(U), (int)(R)) 1919296417Sdim 1920341825Sdimstatic __inline__ __m128d __DEFAULT_FN_ATTRS128 1921296417Sdim_mm_mask_add_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) { 1922341825Sdim __A = _mm_add_sd(__A, __B); 1923341825Sdim return __builtin_ia32_selectsd_128(__U, __A, __W); 1924296417Sdim} 1925296417Sdim 1926341825Sdimstatic __inline__ __m128d __DEFAULT_FN_ATTRS128 1927296417Sdim_mm_maskz_add_sd(__mmask8 __U,__m128d __A, __m128d __B) { 1928341825Sdim __A = _mm_add_sd(__A, __B); 1929341825Sdim return __builtin_ia32_selectsd_128(__U, __A, _mm_setzero_pd()); 1930296417Sdim} 1931341825Sdim#define _mm_add_round_sd(A, B, R) \ 1932309124Sdim (__m128d)__builtin_ia32_addsd_round_mask((__v2df)(__m128d)(A), \ 1933309124Sdim (__v2df)(__m128d)(B), \ 1934309124Sdim (__v2df)_mm_setzero_pd(), \ 1935341825Sdim (__mmask8)-1, (int)(R)) 1936296417Sdim 1937341825Sdim#define _mm_mask_add_round_sd(W, U, A, B, R) \ 1938309124Sdim (__m128d)__builtin_ia32_addsd_round_mask((__v2df)(__m128d)(A), \ 1939309124Sdim (__v2df)(__m128d)(B), \ 1940309124Sdim (__v2df)(__m128d)(W), \ 1941341825Sdim (__mmask8)(U), (int)(R)) 1942296417Sdim 1943341825Sdim#define _mm_maskz_add_round_sd(U, A, B, R) \ 1944309124Sdim (__m128d)__builtin_ia32_addsd_round_mask((__v2df)(__m128d)(A), \ 1945309124Sdim (__v2df)(__m128d)(B), \ 1946309124Sdim (__v2df)_mm_setzero_pd(), \ 1947341825Sdim (__mmask8)(U), (int)(R)) 1948296417Sdim 1949341825Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS512 1950296417Sdim_mm512_mask_add_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) { 1951314564Sdim return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, 1952314564Sdim (__v8df)_mm512_add_pd(__A, __B), 1953314564Sdim (__v8df)__W); 1954296417Sdim} 1955296417Sdim 1956341825Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS512 1957296417Sdim_mm512_maskz_add_pd(__mmask8 __U, __m512d __A, __m512d __B) { 1958314564Sdim return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, 1959314564Sdim (__v8df)_mm512_add_pd(__A, __B), 1960314564Sdim (__v8df)_mm512_setzero_pd()); 1961296417Sdim} 1962296417Sdim 1963341825Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS512 1964296417Sdim_mm512_mask_add_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) { 1965314564Sdim return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, 1966314564Sdim (__v16sf)_mm512_add_ps(__A, __B), 1967314564Sdim (__v16sf)__W); 1968296417Sdim} 1969296417Sdim 1970341825Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS512 1971296417Sdim_mm512_maskz_add_ps(__mmask16 __U, __m512 __A, __m512 __B) { 1972314564Sdim return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, 1973314564Sdim (__v16sf)_mm512_add_ps(__A, __B), 1974314564Sdim (__v16sf)_mm512_setzero_ps()); 1975296417Sdim} 1976296417Sdim 1977341825Sdim#define _mm512_add_round_pd(A, B, R) \ 1978341825Sdim (__m512d)__builtin_ia32_addpd512((__v8df)(__m512d)(A), \ 1979341825Sdim (__v8df)(__m512d)(B), (int)(R)) 1980296417Sdim 1981341825Sdim#define _mm512_mask_add_round_pd(W, U, A, B, R) \ 1982341825Sdim (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ 1983341825Sdim (__v8df)_mm512_add_round_pd((A), (B), (R)), \ 1984353358Sdim (__v8df)(__m512d)(W)) 1985296417Sdim 1986341825Sdim#define _mm512_maskz_add_round_pd(U, A, B, R) \ 1987341825Sdim (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ 1988341825Sdim (__v8df)_mm512_add_round_pd((A), (B), (R)), \ 1989353358Sdim (__v8df)_mm512_setzero_pd()) 1990296417Sdim 1991341825Sdim#define _mm512_add_round_ps(A, B, R) \ 1992341825Sdim (__m512)__builtin_ia32_addps512((__v16sf)(__m512)(A), \ 1993341825Sdim (__v16sf)(__m512)(B), (int)(R)) 1994296417Sdim 1995341825Sdim#define _mm512_mask_add_round_ps(W, U, A, B, R) \ 1996341825Sdim (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ 1997341825Sdim (__v16sf)_mm512_add_round_ps((A), (B), (R)), \ 1998353358Sdim (__v16sf)(__m512)(W)) 1999296417Sdim 2000341825Sdim#define _mm512_maskz_add_round_ps(U, A, B, R) \ 2001341825Sdim (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ 2002341825Sdim (__v16sf)_mm512_add_round_ps((A), (B), (R)), \ 2003353358Sdim (__v16sf)_mm512_setzero_ps()) 2004296417Sdim 2005341825Sdimstatic __inline__ __m128 __DEFAULT_FN_ATTRS128 2006296417Sdim_mm_mask_sub_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) { 2007341825Sdim __A = _mm_sub_ss(__A, __B); 2008341825Sdim return __builtin_ia32_selectss_128(__U, __A, __W); 2009296417Sdim} 2010296417Sdim 2011341825Sdimstatic __inline__ __m128 __DEFAULT_FN_ATTRS128 2012296417Sdim_mm_maskz_sub_ss(__mmask8 __U,__m128 __A, __m128 __B) { 2013341825Sdim __A = _mm_sub_ss(__A, __B); 2014341825Sdim return __builtin_ia32_selectss_128(__U, __A, _mm_setzero_ps()); 2015296417Sdim} 2016341825Sdim#define _mm_sub_round_ss(A, B, R) \ 2017309124Sdim (__m128)__builtin_ia32_subss_round_mask((__v4sf)(__m128)(A), \ 2018309124Sdim (__v4sf)(__m128)(B), \ 2019309124Sdim (__v4sf)_mm_setzero_ps(), \ 2020341825Sdim (__mmask8)-1, (int)(R)) 2021296417Sdim 2022341825Sdim#define _mm_mask_sub_round_ss(W, U, A, B, R) \ 2023309124Sdim (__m128)__builtin_ia32_subss_round_mask((__v4sf)(__m128)(A), \ 2024309124Sdim (__v4sf)(__m128)(B), \ 2025309124Sdim (__v4sf)(__m128)(W), (__mmask8)(U), \ 2026341825Sdim (int)(R)) 2027296417Sdim 2028341825Sdim#define _mm_maskz_sub_round_ss(U, A, B, R) \ 2029309124Sdim (__m128)__builtin_ia32_subss_round_mask((__v4sf)(__m128)(A), \ 2030309124Sdim (__v4sf)(__m128)(B), \ 2031309124Sdim (__v4sf)_mm_setzero_ps(), \ 2032341825Sdim (__mmask8)(U), (int)(R)) 2033296417Sdim 2034341825Sdimstatic __inline__ __m128d __DEFAULT_FN_ATTRS128 2035296417Sdim_mm_mask_sub_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) { 2036341825Sdim __A = _mm_sub_sd(__A, __B); 2037341825Sdim return __builtin_ia32_selectsd_128(__U, __A, __W); 2038296417Sdim} 2039296417Sdim 2040341825Sdimstatic __inline__ __m128d __DEFAULT_FN_ATTRS128 2041296417Sdim_mm_maskz_sub_sd(__mmask8 __U,__m128d __A, __m128d __B) { 2042341825Sdim __A = _mm_sub_sd(__A, __B); 2043341825Sdim return __builtin_ia32_selectsd_128(__U, __A, _mm_setzero_pd()); 2044296417Sdim} 2045296417Sdim 2046341825Sdim#define _mm_sub_round_sd(A, B, R) \ 2047309124Sdim (__m128d)__builtin_ia32_subsd_round_mask((__v2df)(__m128d)(A), \ 2048309124Sdim (__v2df)(__m128d)(B), \ 2049309124Sdim (__v2df)_mm_setzero_pd(), \ 2050341825Sdim (__mmask8)-1, (int)(R)) 2051296417Sdim 2052341825Sdim#define _mm_mask_sub_round_sd(W, U, A, B, R) \ 2053309124Sdim (__m128d)__builtin_ia32_subsd_round_mask((__v2df)(__m128d)(A), \ 2054309124Sdim (__v2df)(__m128d)(B), \ 2055309124Sdim (__v2df)(__m128d)(W), \ 2056341825Sdim (__mmask8)(U), (int)(R)) 2057296417Sdim 2058341825Sdim#define _mm_maskz_sub_round_sd(U, A, B, R) \ 2059309124Sdim (__m128d)__builtin_ia32_subsd_round_mask((__v2df)(__m128d)(A), \ 2060309124Sdim (__v2df)(__m128d)(B), \ 2061309124Sdim (__v2df)_mm_setzero_pd(), \ 2062341825Sdim (__mmask8)(U), (int)(R)) 2063296417Sdim 2064341825Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS512 2065296417Sdim_mm512_mask_sub_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) { 2066314564Sdim return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, 2067314564Sdim (__v8df)_mm512_sub_pd(__A, __B), 2068314564Sdim (__v8df)__W); 2069296417Sdim} 2070296417Sdim 2071341825Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS512 2072296417Sdim_mm512_maskz_sub_pd(__mmask8 __U, __m512d __A, __m512d __B) { 2073314564Sdim return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, 2074314564Sdim (__v8df)_mm512_sub_pd(__A, __B), 2075314564Sdim (__v8df)_mm512_setzero_pd()); 2076296417Sdim} 2077296417Sdim 2078341825Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS512 2079296417Sdim_mm512_mask_sub_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) { 2080314564Sdim return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, 2081314564Sdim (__v16sf)_mm512_sub_ps(__A, __B), 2082314564Sdim (__v16sf)__W); 2083296417Sdim} 2084296417Sdim 2085341825Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS512 2086296417Sdim_mm512_maskz_sub_ps(__mmask16 __U, __m512 __A, __m512 __B) { 2087314564Sdim return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, 2088314564Sdim (__v16sf)_mm512_sub_ps(__A, __B), 2089314564Sdim (__v16sf)_mm512_setzero_ps()); 2090296417Sdim} 2091296417Sdim 2092341825Sdim#define _mm512_sub_round_pd(A, B, R) \ 2093341825Sdim (__m512d)__builtin_ia32_subpd512((__v8df)(__m512d)(A), \ 2094341825Sdim (__v8df)(__m512d)(B), (int)(R)) 2095296417Sdim 2096341825Sdim#define _mm512_mask_sub_round_pd(W, U, A, B, R) \ 2097341825Sdim (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ 2098341825Sdim (__v8df)_mm512_sub_round_pd((A), (B), (R)), \ 2099353358Sdim (__v8df)(__m512d)(W)) 2100296417Sdim 2101341825Sdim#define _mm512_maskz_sub_round_pd(U, A, B, R) \ 2102341825Sdim (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ 2103341825Sdim (__v8df)_mm512_sub_round_pd((A), (B), (R)), \ 2104353358Sdim (__v8df)_mm512_setzero_pd()) 2105296417Sdim 2106341825Sdim#define _mm512_sub_round_ps(A, B, R) \ 2107341825Sdim (__m512)__builtin_ia32_subps512((__v16sf)(__m512)(A), \ 2108341825Sdim (__v16sf)(__m512)(B), (int)(R)) 2109296417Sdim 2110341825Sdim#define _mm512_mask_sub_round_ps(W, U, A, B, R) \ 2111341825Sdim (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ 2112341825Sdim (__v16sf)_mm512_sub_round_ps((A), (B), (R)), \ 2113353358Sdim (__v16sf)(__m512)(W)) 2114296417Sdim 2115341825Sdim#define _mm512_maskz_sub_round_ps(U, A, B, R) \ 2116341825Sdim (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ 2117341825Sdim (__v16sf)_mm512_sub_round_ps((A), (B), (R)), \ 2118353358Sdim (__v16sf)_mm512_setzero_ps()) 2119296417Sdim 2120341825Sdimstatic __inline__ __m128 __DEFAULT_FN_ATTRS128 2121296417Sdim_mm_mask_mul_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) { 2122341825Sdim __A = _mm_mul_ss(__A, __B); 2123341825Sdim return __builtin_ia32_selectss_128(__U, __A, __W); 2124296417Sdim} 2125296417Sdim 2126341825Sdimstatic __inline__ __m128 __DEFAULT_FN_ATTRS128 2127296417Sdim_mm_maskz_mul_ss(__mmask8 __U,__m128 __A, __m128 __B) { 2128341825Sdim __A = _mm_mul_ss(__A, __B); 2129341825Sdim return __builtin_ia32_selectss_128(__U, __A, _mm_setzero_ps()); 2130296417Sdim} 2131341825Sdim#define _mm_mul_round_ss(A, B, R) \ 2132309124Sdim (__m128)__builtin_ia32_mulss_round_mask((__v4sf)(__m128)(A), \ 2133309124Sdim (__v4sf)(__m128)(B), \ 2134309124Sdim (__v4sf)_mm_setzero_ps(), \ 2135341825Sdim (__mmask8)-1, (int)(R)) 2136296417Sdim 2137341825Sdim#define _mm_mask_mul_round_ss(W, U, A, B, R) \ 2138309124Sdim (__m128)__builtin_ia32_mulss_round_mask((__v4sf)(__m128)(A), \ 2139309124Sdim (__v4sf)(__m128)(B), \ 2140309124Sdim (__v4sf)(__m128)(W), (__mmask8)(U), \ 2141341825Sdim (int)(R)) 2142296417Sdim 2143341825Sdim#define _mm_maskz_mul_round_ss(U, A, B, R) \ 2144309124Sdim (__m128)__builtin_ia32_mulss_round_mask((__v4sf)(__m128)(A), \ 2145309124Sdim (__v4sf)(__m128)(B), \ 2146309124Sdim (__v4sf)_mm_setzero_ps(), \ 2147341825Sdim (__mmask8)(U), (int)(R)) 2148296417Sdim 2149341825Sdimstatic __inline__ __m128d __DEFAULT_FN_ATTRS128 2150296417Sdim_mm_mask_mul_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) { 2151341825Sdim __A = _mm_mul_sd(__A, __B); 2152341825Sdim return __builtin_ia32_selectsd_128(__U, __A, __W); 2153296417Sdim} 2154296417Sdim 2155341825Sdimstatic __inline__ __m128d __DEFAULT_FN_ATTRS128 2156296417Sdim_mm_maskz_mul_sd(__mmask8 __U,__m128d __A, __m128d __B) { 2157341825Sdim __A = _mm_mul_sd(__A, __B); 2158341825Sdim return __builtin_ia32_selectsd_128(__U, __A, _mm_setzero_pd()); 2159296417Sdim} 2160296417Sdim 2161341825Sdim#define _mm_mul_round_sd(A, B, R) \ 2162309124Sdim (__m128d)__builtin_ia32_mulsd_round_mask((__v2df)(__m128d)(A), \ 2163309124Sdim (__v2df)(__m128d)(B), \ 2164309124Sdim (__v2df)_mm_setzero_pd(), \ 2165341825Sdim (__mmask8)-1, (int)(R)) 2166296417Sdim 2167341825Sdim#define _mm_mask_mul_round_sd(W, U, A, B, R) \ 2168309124Sdim (__m128d)__builtin_ia32_mulsd_round_mask((__v2df)(__m128d)(A), \ 2169309124Sdim (__v2df)(__m128d)(B), \ 2170309124Sdim (__v2df)(__m128d)(W), \ 2171341825Sdim (__mmask8)(U), (int)(R)) 2172296417Sdim 2173341825Sdim#define _mm_maskz_mul_round_sd(U, A, B, R) \ 2174309124Sdim (__m128d)__builtin_ia32_mulsd_round_mask((__v2df)(__m128d)(A), \ 2175309124Sdim (__v2df)(__m128d)(B), \ 2176309124Sdim (__v2df)_mm_setzero_pd(), \ 2177341825Sdim (__mmask8)(U), (int)(R)) 2178296417Sdim 2179341825Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS512 2180296417Sdim_mm512_mask_mul_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) { 2181314564Sdim return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, 2182314564Sdim (__v8df)_mm512_mul_pd(__A, __B), 2183314564Sdim (__v8df)__W); 2184296417Sdim} 2185296417Sdim 2186341825Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS512 2187296417Sdim_mm512_maskz_mul_pd(__mmask8 __U, __m512d __A, __m512d __B) { 2188314564Sdim return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, 2189314564Sdim (__v8df)_mm512_mul_pd(__A, __B), 2190314564Sdim (__v8df)_mm512_setzero_pd()); 2191296417Sdim} 2192296417Sdim 2193341825Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS512 2194296417Sdim_mm512_mask_mul_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) { 2195314564Sdim return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, 2196314564Sdim (__v16sf)_mm512_mul_ps(__A, __B), 2197314564Sdim (__v16sf)__W); 2198296417Sdim} 2199296417Sdim 2200341825Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS512 2201296417Sdim_mm512_maskz_mul_ps(__mmask16 __U, __m512 __A, __m512 __B) { 2202314564Sdim return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, 2203314564Sdim (__v16sf)_mm512_mul_ps(__A, __B), 2204314564Sdim (__v16sf)_mm512_setzero_ps()); 2205296417Sdim} 2206296417Sdim 2207341825Sdim#define _mm512_mul_round_pd(A, B, R) \ 2208341825Sdim (__m512d)__builtin_ia32_mulpd512((__v8df)(__m512d)(A), \ 2209341825Sdim (__v8df)(__m512d)(B), (int)(R)) 2210296417Sdim 2211341825Sdim#define _mm512_mask_mul_round_pd(W, U, A, B, R) \ 2212341825Sdim (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ 2213341825Sdim (__v8df)_mm512_mul_round_pd((A), (B), (R)), \ 2214353358Sdim (__v8df)(__m512d)(W)) 2215296417Sdim 2216341825Sdim#define _mm512_maskz_mul_round_pd(U, A, B, R) \ 2217341825Sdim (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ 2218341825Sdim (__v8df)_mm512_mul_round_pd((A), (B), (R)), \ 2219353358Sdim (__v8df)_mm512_setzero_pd()) 2220296417Sdim 2221341825Sdim#define _mm512_mul_round_ps(A, B, R) \ 2222341825Sdim (__m512)__builtin_ia32_mulps512((__v16sf)(__m512)(A), \ 2223341825Sdim (__v16sf)(__m512)(B), (int)(R)) 2224296417Sdim 2225341825Sdim#define _mm512_mask_mul_round_ps(W, U, A, B, R) \ 2226341825Sdim (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ 2227341825Sdim (__v16sf)_mm512_mul_round_ps((A), (B), (R)), \ 2228353358Sdim (__v16sf)(__m512)(W)) 2229296417Sdim 2230341825Sdim#define _mm512_maskz_mul_round_ps(U, A, B, R) \ 2231341825Sdim (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ 2232341825Sdim (__v16sf)_mm512_mul_round_ps((A), (B), (R)), \ 2233353358Sdim (__v16sf)_mm512_setzero_ps()) 2234296417Sdim 2235341825Sdimstatic __inline__ __m128 __DEFAULT_FN_ATTRS128 2236296417Sdim_mm_mask_div_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) { 2237341825Sdim __A = _mm_div_ss(__A, __B); 2238341825Sdim return __builtin_ia32_selectss_128(__U, __A, __W); 2239296417Sdim} 2240296417Sdim 2241341825Sdimstatic __inline__ __m128 __DEFAULT_FN_ATTRS128 2242296417Sdim_mm_maskz_div_ss(__mmask8 __U,__m128 __A, __m128 __B) { 2243341825Sdim __A = _mm_div_ss(__A, __B); 2244341825Sdim return __builtin_ia32_selectss_128(__U, __A, _mm_setzero_ps()); 2245296417Sdim} 2246296417Sdim 2247341825Sdim#define _mm_div_round_ss(A, B, R) \ 2248309124Sdim (__m128)__builtin_ia32_divss_round_mask((__v4sf)(__m128)(A), \ 2249309124Sdim (__v4sf)(__m128)(B), \ 2250309124Sdim (__v4sf)_mm_setzero_ps(), \ 2251341825Sdim (__mmask8)-1, (int)(R)) 2252296417Sdim 2253341825Sdim#define _mm_mask_div_round_ss(W, U, A, B, R) \ 2254309124Sdim (__m128)__builtin_ia32_divss_round_mask((__v4sf)(__m128)(A), \ 2255309124Sdim (__v4sf)(__m128)(B), \ 2256309124Sdim (__v4sf)(__m128)(W), (__mmask8)(U), \ 2257341825Sdim (int)(R)) 2258296417Sdim 2259341825Sdim#define _mm_maskz_div_round_ss(U, A, B, R) \ 2260309124Sdim (__m128)__builtin_ia32_divss_round_mask((__v4sf)(__m128)(A), \ 2261309124Sdim (__v4sf)(__m128)(B), \ 2262309124Sdim (__v4sf)_mm_setzero_ps(), \ 2263341825Sdim (__mmask8)(U), (int)(R)) 2264296417Sdim 2265341825Sdimstatic __inline__ __m128d __DEFAULT_FN_ATTRS128 2266296417Sdim_mm_mask_div_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) { 2267341825Sdim __A = _mm_div_sd(__A, __B); 2268341825Sdim return __builtin_ia32_selectsd_128(__U, __A, __W); 2269296417Sdim} 2270296417Sdim 2271341825Sdimstatic __inline__ __m128d __DEFAULT_FN_ATTRS128 2272296417Sdim_mm_maskz_div_sd(__mmask8 __U,__m128d __A, __m128d __B) { 2273341825Sdim __A = _mm_div_sd(__A, __B); 2274341825Sdim return __builtin_ia32_selectsd_128(__U, __A, _mm_setzero_pd()); 2275296417Sdim} 2276296417Sdim 2277341825Sdim#define _mm_div_round_sd(A, B, R) \ 2278309124Sdim (__m128d)__builtin_ia32_divsd_round_mask((__v2df)(__m128d)(A), \ 2279309124Sdim (__v2df)(__m128d)(B), \ 2280309124Sdim (__v2df)_mm_setzero_pd(), \ 2281341825Sdim (__mmask8)-1, (int)(R)) 2282296417Sdim 2283341825Sdim#define _mm_mask_div_round_sd(W, U, A, B, R) \ 2284309124Sdim (__m128d)__builtin_ia32_divsd_round_mask((__v2df)(__m128d)(A), \ 2285309124Sdim (__v2df)(__m128d)(B), \ 2286309124Sdim (__v2df)(__m128d)(W), \ 2287341825Sdim (__mmask8)(U), (int)(R)) 2288296417Sdim 2289341825Sdim#define _mm_maskz_div_round_sd(U, A, B, R) \ 2290309124Sdim (__m128d)__builtin_ia32_divsd_round_mask((__v2df)(__m128d)(A), \ 2291309124Sdim (__v2df)(__m128d)(B), \ 2292309124Sdim (__v2df)_mm_setzero_pd(), \ 2293341825Sdim (__mmask8)(U), (int)(R)) 2294296417Sdim 2295341825Sdimstatic __inline __m512d __DEFAULT_FN_ATTRS512 2296309124Sdim_mm512_div_pd(__m512d __a, __m512d __b) 2297309124Sdim{ 2298309124Sdim return (__m512d)((__v8df)__a/(__v8df)__b); 2299309124Sdim} 2300309124Sdim 2301341825Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS512 2302296417Sdim_mm512_mask_div_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) { 2303314564Sdim return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, 2304314564Sdim (__v8df)_mm512_div_pd(__A, __B), 2305314564Sdim (__v8df)__W); 2306296417Sdim} 2307296417Sdim 2308341825Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS512 2309296417Sdim_mm512_maskz_div_pd(__mmask8 __U, __m512d __A, __m512d __B) { 2310314564Sdim return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, 2311314564Sdim (__v8df)_mm512_div_pd(__A, __B), 2312314564Sdim (__v8df)_mm512_setzero_pd()); 2313296417Sdim} 2314296417Sdim 2315341825Sdimstatic __inline __m512 __DEFAULT_FN_ATTRS512 2316309124Sdim_mm512_div_ps(__m512 __a, __m512 __b) 2317309124Sdim{ 2318309124Sdim return (__m512)((__v16sf)__a/(__v16sf)__b); 2319309124Sdim} 2320309124Sdim 2321341825Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS512 2322296417Sdim_mm512_mask_div_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) { 2323314564Sdim return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, 2324314564Sdim (__v16sf)_mm512_div_ps(__A, __B), 2325314564Sdim (__v16sf)__W); 2326296417Sdim} 2327296417Sdim 2328341825Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS512 2329296417Sdim_mm512_maskz_div_ps(__mmask16 __U, __m512 __A, __m512 __B) { 2330314564Sdim return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, 2331314564Sdim (__v16sf)_mm512_div_ps(__A, __B), 2332314564Sdim (__v16sf)_mm512_setzero_ps()); 2333296417Sdim} 2334296417Sdim 2335341825Sdim#define _mm512_div_round_pd(A, B, R) \ 2336341825Sdim (__m512d)__builtin_ia32_divpd512((__v8df)(__m512d)(A), \ 2337341825Sdim (__v8df)(__m512d)(B), (int)(R)) 2338296417Sdim 2339341825Sdim#define _mm512_mask_div_round_pd(W, U, A, B, R) \ 2340341825Sdim (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ 2341341825Sdim (__v8df)_mm512_div_round_pd((A), (B), (R)), \ 2342353358Sdim (__v8df)(__m512d)(W)) 2343296417Sdim 2344341825Sdim#define _mm512_maskz_div_round_pd(U, A, B, R) \ 2345341825Sdim (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ 2346341825Sdim (__v8df)_mm512_div_round_pd((A), (B), (R)), \ 2347353358Sdim (__v8df)_mm512_setzero_pd()) 2348296417Sdim 2349341825Sdim#define _mm512_div_round_ps(A, B, R) \ 2350341825Sdim (__m512)__builtin_ia32_divps512((__v16sf)(__m512)(A), \ 2351341825Sdim (__v16sf)(__m512)(B), (int)(R)) 2352296417Sdim 2353341825Sdim#define _mm512_mask_div_round_ps(W, U, A, B, R) \ 2354341825Sdim (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ 2355341825Sdim (__v16sf)_mm512_div_round_ps((A), (B), (R)), \ 2356353358Sdim (__v16sf)(__m512)(W)) 2357296417Sdim 2358341825Sdim#define _mm512_maskz_div_round_ps(U, A, B, R) \ 2359341825Sdim (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ 2360341825Sdim (__v16sf)_mm512_div_round_ps((A), (B), (R)), \ 2361353358Sdim (__v16sf)_mm512_setzero_ps()) 2362296417Sdim 2363341825Sdim#define _mm512_roundscale_ps(A, B) \ 2364309124Sdim (__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(A), (int)(B), \ 2365341825Sdim (__v16sf)_mm512_undefined_ps(), \ 2366341825Sdim (__mmask16)-1, \ 2367341825Sdim _MM_FROUND_CUR_DIRECTION) 2368288943Sdim 2369341825Sdim#define _mm512_mask_roundscale_ps(A, B, C, imm) \ 2370309124Sdim (__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(C), (int)(imm), \ 2371309124Sdim (__v16sf)(__m512)(A), (__mmask16)(B), \ 2372341825Sdim _MM_FROUND_CUR_DIRECTION) 2373309124Sdim 2374341825Sdim#define _mm512_maskz_roundscale_ps(A, B, imm) \ 2375309124Sdim (__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(B), (int)(imm), \ 2376309124Sdim (__v16sf)_mm512_setzero_ps(), \ 2377309124Sdim (__mmask16)(A), \ 2378341825Sdim _MM_FROUND_CUR_DIRECTION) 2379309124Sdim 2380341825Sdim#define _mm512_mask_roundscale_round_ps(A, B, C, imm, R) \ 2381309124Sdim (__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(C), (int)(imm), \ 2382309124Sdim (__v16sf)(__m512)(A), (__mmask16)(B), \ 2383341825Sdim (int)(R)) 2384309124Sdim 2385341825Sdim#define _mm512_maskz_roundscale_round_ps(A, B, imm, R) \ 2386309124Sdim (__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(B), (int)(imm), \ 2387309124Sdim (__v16sf)_mm512_setzero_ps(), \ 2388341825Sdim (__mmask16)(A), (int)(R)) 2389309124Sdim 2390341825Sdim#define _mm512_roundscale_round_ps(A, imm, R) \ 2391309124Sdim (__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(A), (int)(imm), \ 2392309124Sdim (__v16sf)_mm512_undefined_ps(), \ 2393341825Sdim (__mmask16)-1, (int)(R)) 2394309124Sdim 2395341825Sdim#define _mm512_roundscale_pd(A, B) \ 2396309124Sdim (__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(A), (int)(B), \ 2397341825Sdim (__v8df)_mm512_undefined_pd(), \ 2398341825Sdim (__mmask8)-1, \ 2399341825Sdim _MM_FROUND_CUR_DIRECTION) 2400288943Sdim 2401341825Sdim#define _mm512_mask_roundscale_pd(A, B, C, imm) \ 2402309124Sdim (__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(C), (int)(imm), \ 2403309124Sdim (__v8df)(__m512d)(A), (__mmask8)(B), \ 2404341825Sdim _MM_FROUND_CUR_DIRECTION) 2405309124Sdim 2406341825Sdim#define _mm512_maskz_roundscale_pd(A, B, imm) \ 2407309124Sdim (__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(B), (int)(imm), \ 2408309124Sdim (__v8df)_mm512_setzero_pd(), \ 2409309124Sdim (__mmask8)(A), \ 2410341825Sdim _MM_FROUND_CUR_DIRECTION) 2411309124Sdim 2412341825Sdim#define _mm512_mask_roundscale_round_pd(A, B, C, imm, R) \ 2413309124Sdim (__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(C), (int)(imm), \ 2414309124Sdim (__v8df)(__m512d)(A), (__mmask8)(B), \ 2415341825Sdim (int)(R)) 2416309124Sdim 2417341825Sdim#define _mm512_maskz_roundscale_round_pd(A, B, imm, R) \ 2418309124Sdim (__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(B), (int)(imm), \ 2419309124Sdim (__v8df)_mm512_setzero_pd(), \ 2420341825Sdim (__mmask8)(A), (int)(R)) 2421309124Sdim 2422341825Sdim#define _mm512_roundscale_round_pd(A, imm, R) \ 2423309124Sdim (__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(A), (int)(imm), \ 2424309124Sdim (__v8df)_mm512_undefined_pd(), \ 2425341825Sdim (__mmask8)-1, (int)(R)) 2426309124Sdim 2427341825Sdim#define _mm512_fmadd_round_pd(A, B, C, R) \ 2428309124Sdim (__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \ 2429309124Sdim (__v8df)(__m512d)(B), \ 2430341825Sdim (__v8df)(__m512d)(C), \ 2431341825Sdim (__mmask8)-1, (int)(R)) 2432288943Sdim 2433288943Sdim 2434341825Sdim#define _mm512_mask_fmadd_round_pd(A, U, B, C, R) \ 2435309124Sdim (__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \ 2436309124Sdim (__v8df)(__m512d)(B), \ 2437309124Sdim (__v8df)(__m512d)(C), \ 2438341825Sdim (__mmask8)(U), (int)(R)) 2439288943Sdim 2440288943Sdim 2441341825Sdim#define _mm512_mask3_fmadd_round_pd(A, B, C, U, R) \ 2442309124Sdim (__m512d)__builtin_ia32_vfmaddpd512_mask3((__v8df)(__m512d)(A), \ 2443309124Sdim (__v8df)(__m512d)(B), \ 2444309124Sdim (__v8df)(__m512d)(C), \ 2445341825Sdim (__mmask8)(U), (int)(R)) 2446288943Sdim 2447288943Sdim 2448341825Sdim#define _mm512_maskz_fmadd_round_pd(U, A, B, C, R) \ 2449309124Sdim (__m512d)__builtin_ia32_vfmaddpd512_maskz((__v8df)(__m512d)(A), \ 2450309124Sdim (__v8df)(__m512d)(B), \ 2451309124Sdim (__v8df)(__m512d)(C), \ 2452341825Sdim (__mmask8)(U), (int)(R)) 2453288943Sdim 2454288943Sdim 2455341825Sdim#define _mm512_fmsub_round_pd(A, B, C, R) \ 2456309124Sdim (__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \ 2457309124Sdim (__v8df)(__m512d)(B), \ 2458309124Sdim -(__v8df)(__m512d)(C), \ 2459341825Sdim (__mmask8)-1, (int)(R)) 2460288943Sdim 2461288943Sdim 2462341825Sdim#define _mm512_mask_fmsub_round_pd(A, U, B, C, R) \ 2463309124Sdim (__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \ 2464309124Sdim (__v8df)(__m512d)(B), \ 2465309124Sdim -(__v8df)(__m512d)(C), \ 2466341825Sdim (__mmask8)(U), (int)(R)) 2467288943Sdim 2468288943Sdim 2469341825Sdim#define _mm512_maskz_fmsub_round_pd(U, A, B, C, R) \ 2470309124Sdim (__m512d)__builtin_ia32_vfmaddpd512_maskz((__v8df)(__m512d)(A), \ 2471309124Sdim (__v8df)(__m512d)(B), \ 2472309124Sdim -(__v8df)(__m512d)(C), \ 2473341825Sdim (__mmask8)(U), (int)(R)) 2474288943Sdim 2475288943Sdim 2476341825Sdim#define _mm512_fnmadd_round_pd(A, B, C, R) \ 2477309124Sdim (__m512d)__builtin_ia32_vfmaddpd512_mask(-(__v8df)(__m512d)(A), \ 2478309124Sdim (__v8df)(__m512d)(B), \ 2479341825Sdim (__v8df)(__m512d)(C), \ 2480341825Sdim (__mmask8)-1, (int)(R)) 2481288943Sdim 2482288943Sdim 2483341825Sdim#define _mm512_mask3_fnmadd_round_pd(A, B, C, U, R) \ 2484309124Sdim (__m512d)__builtin_ia32_vfmaddpd512_mask3(-(__v8df)(__m512d)(A), \ 2485309124Sdim (__v8df)(__m512d)(B), \ 2486309124Sdim (__v8df)(__m512d)(C), \ 2487341825Sdim (__mmask8)(U), (int)(R)) 2488288943Sdim 2489288943Sdim 2490341825Sdim#define _mm512_maskz_fnmadd_round_pd(U, A, B, C, R) \ 2491309124Sdim (__m512d)__builtin_ia32_vfmaddpd512_maskz(-(__v8df)(__m512d)(A), \ 2492309124Sdim (__v8df)(__m512d)(B), \ 2493309124Sdim (__v8df)(__m512d)(C), \ 2494341825Sdim (__mmask8)(U), (int)(R)) 2495288943Sdim 2496288943Sdim 2497341825Sdim#define _mm512_fnmsub_round_pd(A, B, C, R) \ 2498309124Sdim (__m512d)__builtin_ia32_vfmaddpd512_mask(-(__v8df)(__m512d)(A), \ 2499309124Sdim (__v8df)(__m512d)(B), \ 2500309124Sdim -(__v8df)(__m512d)(C), \ 2501341825Sdim (__mmask8)-1, (int)(R)) 2502288943Sdim 2503288943Sdim 2504341825Sdim#define _mm512_maskz_fnmsub_round_pd(U, A, B, C, R) \ 2505309124Sdim (__m512d)__builtin_ia32_vfmaddpd512_maskz(-(__v8df)(__m512d)(A), \ 2506309124Sdim (__v8df)(__m512d)(B), \ 2507309124Sdim -(__v8df)(__m512d)(C), \ 2508341825Sdim (__mmask8)(U), (int)(R)) 2509288943Sdim 2510288943Sdim 2511341825Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS512 2512288943Sdim_mm512_fmadd_pd(__m512d __A, __m512d __B, __m512d __C) 2513277325Sdim{ 2514288943Sdim return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A, 2515288943Sdim (__v8df) __B, 2516288943Sdim (__v8df) __C, 2517288943Sdim (__mmask8) -1, 2518288943Sdim _MM_FROUND_CUR_DIRECTION); 2519277325Sdim} 2520288943Sdim 2521341825Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS512 2522288943Sdim_mm512_mask_fmadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C) 2523277325Sdim{ 2524288943Sdim return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A, 2525288943Sdim (__v8df) __B, 2526288943Sdim (__v8df) __C, 2527288943Sdim (__mmask8) __U, 2528288943Sdim _MM_FROUND_CUR_DIRECTION); 2529277325Sdim} 2530277325Sdim 2531341825Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS512 2532288943Sdim_mm512_mask3_fmadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U) 2533277325Sdim{ 2534288943Sdim return (__m512d) __builtin_ia32_vfmaddpd512_mask3 ((__v8df) __A, 2535288943Sdim (__v8df) __B, 2536288943Sdim (__v8df) __C, 2537288943Sdim (__mmask8) __U, 2538288943Sdim _MM_FROUND_CUR_DIRECTION); 2539277325Sdim} 2540277325Sdim 2541341825Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS512 2542288943Sdim_mm512_maskz_fmadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C) 2543288943Sdim{ 2544288943Sdim return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A, 2545288943Sdim (__v8df) __B, 2546288943Sdim (__v8df) __C, 2547288943Sdim (__mmask8) __U, 2548288943Sdim _MM_FROUND_CUR_DIRECTION); 2549288943Sdim} 2550288943Sdim 2551341825Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS512 2552277325Sdim_mm512_fmsub_pd(__m512d __A, __m512d __B, __m512d __C) 2553277325Sdim{ 2554288943Sdim return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A, 2555288943Sdim (__v8df) __B, 2556288943Sdim -(__v8df) __C, 2557288943Sdim (__mmask8) -1, 2558288943Sdim _MM_FROUND_CUR_DIRECTION); 2559277325Sdim} 2560277325Sdim 2561341825Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS512 2562288943Sdim_mm512_mask_fmsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C) 2563288943Sdim{ 2564288943Sdim return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A, 2565288943Sdim (__v8df) __B, 2566288943Sdim -(__v8df) __C, 2567288943Sdim (__mmask8) __U, 2568288943Sdim _MM_FROUND_CUR_DIRECTION); 2569288943Sdim} 2570288943Sdim 2571341825Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS512 2572288943Sdim_mm512_maskz_fmsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C) 2573288943Sdim{ 2574288943Sdim return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A, 2575288943Sdim (__v8df) __B, 2576288943Sdim -(__v8df) __C, 2577288943Sdim (__mmask8) __U, 2578288943Sdim _MM_FROUND_CUR_DIRECTION); 2579288943Sdim} 2580288943Sdim 2581341825Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS512 2582277325Sdim_mm512_fnmadd_pd(__m512d __A, __m512d __B, __m512d __C) 2583277325Sdim{ 2584341825Sdim return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A, 2585341825Sdim -(__v8df) __B, 2586288943Sdim (__v8df) __C, 2587288943Sdim (__mmask8) -1, 2588288943Sdim _MM_FROUND_CUR_DIRECTION); 2589277325Sdim} 2590277325Sdim 2591341825Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS512 2592288943Sdim_mm512_mask3_fnmadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U) 2593288943Sdim{ 2594288943Sdim return (__m512d) __builtin_ia32_vfmaddpd512_mask3 (-(__v8df) __A, 2595288943Sdim (__v8df) __B, 2596288943Sdim (__v8df) __C, 2597288943Sdim (__mmask8) __U, 2598288943Sdim _MM_FROUND_CUR_DIRECTION); 2599288943Sdim} 2600288943Sdim 2601341825Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS512 2602288943Sdim_mm512_maskz_fnmadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C) 2603288943Sdim{ 2604288943Sdim return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A, 2605288943Sdim (__v8df) __B, 2606288943Sdim (__v8df) __C, 2607288943Sdim (__mmask8) __U, 2608288943Sdim _MM_FROUND_CUR_DIRECTION); 2609288943Sdim} 2610288943Sdim 2611341825Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS512 2612288943Sdim_mm512_fnmsub_pd(__m512d __A, __m512d __B, __m512d __C) 2613288943Sdim{ 2614341825Sdim return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A, 2615341825Sdim -(__v8df) __B, 2616288943Sdim -(__v8df) __C, 2617288943Sdim (__mmask8) -1, 2618288943Sdim _MM_FROUND_CUR_DIRECTION); 2619288943Sdim} 2620288943Sdim 2621341825Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS512 2622288943Sdim_mm512_maskz_fnmsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C) 2623288943Sdim{ 2624288943Sdim return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A, 2625288943Sdim (__v8df) __B, 2626288943Sdim -(__v8df) __C, 2627288943Sdim (__mmask8) __U, 2628288943Sdim _MM_FROUND_CUR_DIRECTION); 2629288943Sdim} 2630288943Sdim 2631341825Sdim#define _mm512_fmadd_round_ps(A, B, C, R) \ 2632309124Sdim (__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \ 2633309124Sdim (__v16sf)(__m512)(B), \ 2634341825Sdim (__v16sf)(__m512)(C), \ 2635341825Sdim (__mmask16)-1, (int)(R)) 2636288943Sdim 2637288943Sdim 2638341825Sdim#define _mm512_mask_fmadd_round_ps(A, U, B, C, R) \ 2639309124Sdim (__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \ 2640309124Sdim (__v16sf)(__m512)(B), \ 2641309124Sdim (__v16sf)(__m512)(C), \ 2642341825Sdim (__mmask16)(U), (int)(R)) 2643288943Sdim 2644288943Sdim 2645341825Sdim#define _mm512_mask3_fmadd_round_ps(A, B, C, U, R) \ 2646309124Sdim (__m512)__builtin_ia32_vfmaddps512_mask3((__v16sf)(__m512)(A), \ 2647309124Sdim (__v16sf)(__m512)(B), \ 2648309124Sdim (__v16sf)(__m512)(C), \ 2649341825Sdim (__mmask16)(U), (int)(R)) 2650288943Sdim 2651288943Sdim 2652341825Sdim#define _mm512_maskz_fmadd_round_ps(U, A, B, C, R) \ 2653309124Sdim (__m512)__builtin_ia32_vfmaddps512_maskz((__v16sf)(__m512)(A), \ 2654309124Sdim (__v16sf)(__m512)(B), \ 2655309124Sdim (__v16sf)(__m512)(C), \ 2656341825Sdim (__mmask16)(U), (int)(R)) 2657288943Sdim 2658288943Sdim 2659341825Sdim#define _mm512_fmsub_round_ps(A, B, C, R) \ 2660309124Sdim (__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \ 2661309124Sdim (__v16sf)(__m512)(B), \ 2662309124Sdim -(__v16sf)(__m512)(C), \ 2663341825Sdim (__mmask16)-1, (int)(R)) 2664288943Sdim 2665288943Sdim 2666341825Sdim#define _mm512_mask_fmsub_round_ps(A, U, B, C, R) \ 2667309124Sdim (__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \ 2668309124Sdim (__v16sf)(__m512)(B), \ 2669309124Sdim -(__v16sf)(__m512)(C), \ 2670341825Sdim (__mmask16)(U), (int)(R)) 2671288943Sdim 2672288943Sdim 2673341825Sdim#define _mm512_maskz_fmsub_round_ps(U, A, B, C, R) \ 2674309124Sdim (__m512)__builtin_ia32_vfmaddps512_maskz((__v16sf)(__m512)(A), \ 2675309124Sdim (__v16sf)(__m512)(B), \ 2676309124Sdim -(__v16sf)(__m512)(C), \ 2677341825Sdim (__mmask16)(U), (int)(R)) 2678288943Sdim 2679288943Sdim 2680341825Sdim#define _mm512_fnmadd_round_ps(A, B, C, R) \ 2681341825Sdim (__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \ 2682341825Sdim -(__v16sf)(__m512)(B), \ 2683341825Sdim (__v16sf)(__m512)(C), \ 2684341825Sdim (__mmask16)-1, (int)(R)) 2685288943Sdim 2686288943Sdim 2687341825Sdim#define _mm512_mask3_fnmadd_round_ps(A, B, C, U, R) \ 2688309124Sdim (__m512)__builtin_ia32_vfmaddps512_mask3(-(__v16sf)(__m512)(A), \ 2689309124Sdim (__v16sf)(__m512)(B), \ 2690309124Sdim (__v16sf)(__m512)(C), \ 2691341825Sdim (__mmask16)(U), (int)(R)) 2692288943Sdim 2693288943Sdim 2694341825Sdim#define _mm512_maskz_fnmadd_round_ps(U, A, B, C, R) \ 2695309124Sdim (__m512)__builtin_ia32_vfmaddps512_maskz(-(__v16sf)(__m512)(A), \ 2696309124Sdim (__v16sf)(__m512)(B), \ 2697309124Sdim (__v16sf)(__m512)(C), \ 2698341825Sdim (__mmask16)(U), (int)(R)) 2699288943Sdim 2700288943Sdim 2701341825Sdim#define _mm512_fnmsub_round_ps(A, B, C, R) \ 2702341825Sdim (__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \ 2703341825Sdim -(__v16sf)(__m512)(B), \ 2704309124Sdim -(__v16sf)(__m512)(C), \ 2705341825Sdim (__mmask16)-1, (int)(R)) 2706288943Sdim 2707288943Sdim 2708341825Sdim#define _mm512_maskz_fnmsub_round_ps(U, A, B, C, R) \ 2709309124Sdim (__m512)__builtin_ia32_vfmaddps512_maskz(-(__v16sf)(__m512)(A), \ 2710309124Sdim (__v16sf)(__m512)(B), \ 2711309124Sdim -(__v16sf)(__m512)(C), \ 2712341825Sdim (__mmask16)(U), (int)(R)) 2713288943Sdim 2714288943Sdim 2715341825Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS512 2716277325Sdim_mm512_fmadd_ps(__m512 __A, __m512 __B, __m512 __C) 2717277325Sdim{ 2718288943Sdim return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A, 2719288943Sdim (__v16sf) __B, 2720288943Sdim (__v16sf) __C, 2721288943Sdim (__mmask16) -1, 2722288943Sdim _MM_FROUND_CUR_DIRECTION); 2723277325Sdim} 2724277325Sdim 2725341825Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS512 2726288943Sdim_mm512_mask_fmadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C) 2727288943Sdim{ 2728288943Sdim return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A, 2729288943Sdim (__v16sf) __B, 2730288943Sdim (__v16sf) __C, 2731288943Sdim (__mmask16) __U, 2732288943Sdim _MM_FROUND_CUR_DIRECTION); 2733288943Sdim} 2734288943Sdim 2735341825Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS512 2736288943Sdim_mm512_mask3_fmadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U) 2737288943Sdim{ 2738288943Sdim return (__m512) __builtin_ia32_vfmaddps512_mask3 ((__v16sf) __A, 2739288943Sdim (__v16sf) __B, 2740288943Sdim (__v16sf) __C, 2741288943Sdim (__mmask16) __U, 2742288943Sdim _MM_FROUND_CUR_DIRECTION); 2743288943Sdim} 2744288943Sdim 2745341825Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS512 2746288943Sdim_mm512_maskz_fmadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C) 2747288943Sdim{ 2748288943Sdim return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A, 2749288943Sdim (__v16sf) __B, 2750288943Sdim (__v16sf) __C, 2751288943Sdim (__mmask16) __U, 2752288943Sdim _MM_FROUND_CUR_DIRECTION); 2753288943Sdim} 2754288943Sdim 2755341825Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS512 2756277325Sdim_mm512_fmsub_ps(__m512 __A, __m512 __B, __m512 __C) 2757277325Sdim{ 2758288943Sdim return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A, 2759288943Sdim (__v16sf) __B, 2760288943Sdim -(__v16sf) __C, 2761288943Sdim (__mmask16) -1, 2762288943Sdim _MM_FROUND_CUR_DIRECTION); 2763277325Sdim} 2764277325Sdim 2765341825Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS512 2766288943Sdim_mm512_mask_fmsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C) 2767288943Sdim{ 2768288943Sdim return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A, 2769288943Sdim (__v16sf) __B, 2770288943Sdim -(__v16sf) __C, 2771288943Sdim (__mmask16) __U, 2772288943Sdim _MM_FROUND_CUR_DIRECTION); 2773288943Sdim} 2774288943Sdim 2775341825Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS512 2776288943Sdim_mm512_maskz_fmsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C) 2777288943Sdim{ 2778288943Sdim return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A, 2779288943Sdim (__v16sf) __B, 2780288943Sdim -(__v16sf) __C, 2781288943Sdim (__mmask16) __U, 2782288943Sdim _MM_FROUND_CUR_DIRECTION); 2783288943Sdim} 2784288943Sdim 2785341825Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS512 2786277325Sdim_mm512_fnmadd_ps(__m512 __A, __m512 __B, __m512 __C) 2787277325Sdim{ 2788341825Sdim return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A, 2789341825Sdim -(__v16sf) __B, 2790288943Sdim (__v16sf) __C, 2791288943Sdim (__mmask16) -1, 2792288943Sdim _MM_FROUND_CUR_DIRECTION); 2793277325Sdim} 2794277325Sdim 2795341825Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS512 2796288943Sdim_mm512_mask3_fnmadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U) 2797288943Sdim{ 2798288943Sdim return (__m512) __builtin_ia32_vfmaddps512_mask3 (-(__v16sf) __A, 2799288943Sdim (__v16sf) __B, 2800288943Sdim (__v16sf) __C, 2801288943Sdim (__mmask16) __U, 2802288943Sdim _MM_FROUND_CUR_DIRECTION); 2803288943Sdim} 2804288943Sdim 2805341825Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS512 2806288943Sdim_mm512_maskz_fnmadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C) 2807288943Sdim{ 2808288943Sdim return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A, 2809288943Sdim (__v16sf) __B, 2810288943Sdim (__v16sf) __C, 2811288943Sdim (__mmask16) __U, 2812288943Sdim _MM_FROUND_CUR_DIRECTION); 2813288943Sdim} 2814288943Sdim 2815341825Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS512 2816288943Sdim_mm512_fnmsub_ps(__m512 __A, __m512 __B, __m512 __C) 2817288943Sdim{ 2818341825Sdim return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A, 2819341825Sdim -(__v16sf) __B, 2820288943Sdim -(__v16sf) __C, 2821288943Sdim (__mmask16) -1, 2822288943Sdim _MM_FROUND_CUR_DIRECTION); 2823288943Sdim} 2824288943Sdim 2825341825Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS512 2826288943Sdim_mm512_maskz_fnmsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C) 2827288943Sdim{ 2828288943Sdim return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A, 2829288943Sdim (__v16sf) __B, 2830288943Sdim -(__v16sf) __C, 2831288943Sdim (__mmask16) __U, 2832288943Sdim _MM_FROUND_CUR_DIRECTION); 2833288943Sdim} 2834288943Sdim 2835341825Sdim#define _mm512_fmaddsub_round_pd(A, B, C, R) \ 2836309124Sdim (__m512d)__builtin_ia32_vfmaddsubpd512_mask((__v8df)(__m512d)(A), \ 2837309124Sdim (__v8df)(__m512d)(B), \ 2838309124Sdim (__v8df)(__m512d)(C), \ 2839341825Sdim (__mmask8)-1, (int)(R)) 2840288943Sdim 2841288943Sdim 2842341825Sdim#define _mm512_mask_fmaddsub_round_pd(A, U, B, C, R) \ 2843309124Sdim (__m512d)__builtin_ia32_vfmaddsubpd512_mask((__v8df)(__m512d)(A), \ 2844309124Sdim (__v8df)(__m512d)(B), \ 2845309124Sdim (__v8df)(__m512d)(C), \ 2846341825Sdim (__mmask8)(U), (int)(R)) 2847288943Sdim 2848288943Sdim 2849341825Sdim#define _mm512_mask3_fmaddsub_round_pd(A, B, C, U, R) \ 2850309124Sdim (__m512d)__builtin_ia32_vfmaddsubpd512_mask3((__v8df)(__m512d)(A), \ 2851309124Sdim (__v8df)(__m512d)(B), \ 2852309124Sdim (__v8df)(__m512d)(C), \ 2853341825Sdim (__mmask8)(U), (int)(R)) 2854288943Sdim 2855288943Sdim 2856341825Sdim#define _mm512_maskz_fmaddsub_round_pd(U, A, B, C, R) \ 2857309124Sdim (__m512d)__builtin_ia32_vfmaddsubpd512_maskz((__v8df)(__m512d)(A), \ 2858309124Sdim (__v8df)(__m512d)(B), \ 2859309124Sdim (__v8df)(__m512d)(C), \ 2860341825Sdim (__mmask8)(U), (int)(R)) 2861288943Sdim 2862288943Sdim 2863341825Sdim#define _mm512_fmsubadd_round_pd(A, B, C, R) \ 2864309124Sdim (__m512d)__builtin_ia32_vfmaddsubpd512_mask((__v8df)(__m512d)(A), \ 2865309124Sdim (__v8df)(__m512d)(B), \ 2866309124Sdim -(__v8df)(__m512d)(C), \ 2867341825Sdim (__mmask8)-1, (int)(R)) 2868288943Sdim 2869288943Sdim 2870341825Sdim#define _mm512_mask_fmsubadd_round_pd(A, U, B, C, R) \ 2871309124Sdim (__m512d)__builtin_ia32_vfmaddsubpd512_mask((__v8df)(__m512d)(A), \ 2872309124Sdim (__v8df)(__m512d)(B), \ 2873309124Sdim -(__v8df)(__m512d)(C), \ 2874341825Sdim (__mmask8)(U), (int)(R)) 2875288943Sdim 2876288943Sdim 2877341825Sdim#define _mm512_maskz_fmsubadd_round_pd(U, A, B, C, R) \ 2878309124Sdim (__m512d)__builtin_ia32_vfmaddsubpd512_maskz((__v8df)(__m512d)(A), \ 2879309124Sdim (__v8df)(__m512d)(B), \ 2880309124Sdim -(__v8df)(__m512d)(C), \ 2881341825Sdim (__mmask8)(U), (int)(R)) 2882288943Sdim 2883288943Sdim 2884341825Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS512 2885288943Sdim_mm512_fmaddsub_pd(__m512d __A, __m512d __B, __m512d __C) 2886288943Sdim{ 2887288943Sdim return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A, 2888341825Sdim (__v8df) __B, 2889341825Sdim (__v8df) __C, 2890341825Sdim (__mmask8) -1, 2891341825Sdim _MM_FROUND_CUR_DIRECTION); 2892288943Sdim} 2893288943Sdim 2894341825Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS512 2895288943Sdim_mm512_mask_fmaddsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C) 2896288943Sdim{ 2897288943Sdim return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A, 2898341825Sdim (__v8df) __B, 2899341825Sdim (__v8df) __C, 2900341825Sdim (__mmask8) __U, 2901341825Sdim _MM_FROUND_CUR_DIRECTION); 2902288943Sdim} 2903288943Sdim 2904341825Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS512 2905288943Sdim_mm512_mask3_fmaddsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U) 2906288943Sdim{ 2907288943Sdim return (__m512d) __builtin_ia32_vfmaddsubpd512_mask3 ((__v8df) __A, 2908341825Sdim (__v8df) __B, 2909341825Sdim (__v8df) __C, 2910341825Sdim (__mmask8) __U, 2911341825Sdim _MM_FROUND_CUR_DIRECTION); 2912288943Sdim} 2913288943Sdim 2914341825Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS512 2915288943Sdim_mm512_maskz_fmaddsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C) 2916288943Sdim{ 2917288943Sdim return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A, 2918341825Sdim (__v8df) __B, 2919341825Sdim (__v8df) __C, 2920341825Sdim (__mmask8) __U, 2921341825Sdim _MM_FROUND_CUR_DIRECTION); 2922288943Sdim} 2923288943Sdim 2924341825Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS512 2925288943Sdim_mm512_fmsubadd_pd(__m512d __A, __m512d __B, __m512d __C) 2926288943Sdim{ 2927288943Sdim return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A, 2928288943Sdim (__v8df) __B, 2929288943Sdim -(__v8df) __C, 2930288943Sdim (__mmask8) -1, 2931288943Sdim _MM_FROUND_CUR_DIRECTION); 2932288943Sdim} 2933288943Sdim 2934341825Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS512 2935288943Sdim_mm512_mask_fmsubadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C) 2936288943Sdim{ 2937288943Sdim return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A, 2938288943Sdim (__v8df) __B, 2939288943Sdim -(__v8df) __C, 2940288943Sdim (__mmask8) __U, 2941288943Sdim _MM_FROUND_CUR_DIRECTION); 2942288943Sdim} 2943288943Sdim 2944341825Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS512 2945288943Sdim_mm512_maskz_fmsubadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C) 2946288943Sdim{ 2947288943Sdim return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A, 2948288943Sdim (__v8df) __B, 2949288943Sdim -(__v8df) __C, 2950288943Sdim (__mmask8) __U, 2951288943Sdim _MM_FROUND_CUR_DIRECTION); 2952288943Sdim} 2953288943Sdim 2954341825Sdim#define _mm512_fmaddsub_round_ps(A, B, C, R) \ 2955309124Sdim (__m512)__builtin_ia32_vfmaddsubps512_mask((__v16sf)(__m512)(A), \ 2956309124Sdim (__v16sf)(__m512)(B), \ 2957309124Sdim (__v16sf)(__m512)(C), \ 2958341825Sdim (__mmask16)-1, (int)(R)) 2959288943Sdim 2960288943Sdim 2961341825Sdim#define _mm512_mask_fmaddsub_round_ps(A, U, B, C, R) \ 2962309124Sdim (__m512)__builtin_ia32_vfmaddsubps512_mask((__v16sf)(__m512)(A), \ 2963309124Sdim (__v16sf)(__m512)(B), \ 2964309124Sdim (__v16sf)(__m512)(C), \ 2965341825Sdim (__mmask16)(U), (int)(R)) 2966288943Sdim 2967288943Sdim 2968341825Sdim#define _mm512_mask3_fmaddsub_round_ps(A, B, C, U, R) \ 2969309124Sdim (__m512)__builtin_ia32_vfmaddsubps512_mask3((__v16sf)(__m512)(A), \ 2970309124Sdim (__v16sf)(__m512)(B), \ 2971309124Sdim (__v16sf)(__m512)(C), \ 2972341825Sdim (__mmask16)(U), (int)(R)) 2973288943Sdim 2974288943Sdim 2975341825Sdim#define _mm512_maskz_fmaddsub_round_ps(U, A, B, C, R) \ 2976309124Sdim (__m512)__builtin_ia32_vfmaddsubps512_maskz((__v16sf)(__m512)(A), \ 2977309124Sdim (__v16sf)(__m512)(B), \ 2978309124Sdim (__v16sf)(__m512)(C), \ 2979341825Sdim (__mmask16)(U), (int)(R)) 2980288943Sdim 2981288943Sdim 2982341825Sdim#define _mm512_fmsubadd_round_ps(A, B, C, R) \ 2983309124Sdim (__m512)__builtin_ia32_vfmaddsubps512_mask((__v16sf)(__m512)(A), \ 2984309124Sdim (__v16sf)(__m512)(B), \ 2985309124Sdim -(__v16sf)(__m512)(C), \ 2986341825Sdim (__mmask16)-1, (int)(R)) 2987288943Sdim 2988288943Sdim 2989341825Sdim#define _mm512_mask_fmsubadd_round_ps(A, U, B, C, R) \ 2990309124Sdim (__m512)__builtin_ia32_vfmaddsubps512_mask((__v16sf)(__m512)(A), \ 2991309124Sdim (__v16sf)(__m512)(B), \ 2992309124Sdim -(__v16sf)(__m512)(C), \ 2993341825Sdim (__mmask16)(U), (int)(R)) 2994288943Sdim 2995288943Sdim 2996341825Sdim#define _mm512_maskz_fmsubadd_round_ps(U, A, B, C, R) \ 2997309124Sdim (__m512)__builtin_ia32_vfmaddsubps512_maskz((__v16sf)(__m512)(A), \ 2998309124Sdim (__v16sf)(__m512)(B), \ 2999309124Sdim -(__v16sf)(__m512)(C), \ 3000341825Sdim (__mmask16)(U), (int)(R)) 3001288943Sdim 3002288943Sdim 3003341825Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS512 3004288943Sdim_mm512_fmaddsub_ps(__m512 __A, __m512 __B, __m512 __C) 3005288943Sdim{ 3006288943Sdim return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A, 3007288943Sdim (__v16sf) __B, 3008288943Sdim (__v16sf) __C, 3009288943Sdim (__mmask16) -1, 3010288943Sdim _MM_FROUND_CUR_DIRECTION); 3011288943Sdim} 3012288943Sdim 3013341825Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS512 3014288943Sdim_mm512_mask_fmaddsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C) 3015288943Sdim{ 3016288943Sdim return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A, 3017288943Sdim (__v16sf) __B, 3018288943Sdim (__v16sf) __C, 3019288943Sdim (__mmask16) __U, 3020288943Sdim _MM_FROUND_CUR_DIRECTION); 3021288943Sdim} 3022288943Sdim 3023341825Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS512 3024288943Sdim_mm512_mask3_fmaddsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U) 3025288943Sdim{ 3026288943Sdim return (__m512) __builtin_ia32_vfmaddsubps512_mask3 ((__v16sf) __A, 3027288943Sdim (__v16sf) __B, 3028288943Sdim (__v16sf) __C, 3029288943Sdim (__mmask16) __U, 3030288943Sdim _MM_FROUND_CUR_DIRECTION); 3031288943Sdim} 3032288943Sdim 3033341825Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS512 3034288943Sdim_mm512_maskz_fmaddsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C) 3035288943Sdim{ 3036288943Sdim return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A, 3037288943Sdim (__v16sf) __B, 3038288943Sdim (__v16sf) __C, 3039288943Sdim (__mmask16) __U, 3040288943Sdim _MM_FROUND_CUR_DIRECTION); 3041288943Sdim} 3042288943Sdim 3043341825Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS512 3044288943Sdim_mm512_fmsubadd_ps(__m512 __A, __m512 __B, __m512 __C) 3045288943Sdim{ 3046288943Sdim return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A, 3047288943Sdim (__v16sf) __B, 3048288943Sdim -(__v16sf) __C, 3049288943Sdim (__mmask16) -1, 3050288943Sdim _MM_FROUND_CUR_DIRECTION); 3051288943Sdim} 3052288943Sdim 3053341825Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS512 3054288943Sdim_mm512_mask_fmsubadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C) 3055288943Sdim{ 3056288943Sdim return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A, 3057288943Sdim (__v16sf) __B, 3058288943Sdim -(__v16sf) __C, 3059288943Sdim (__mmask16) __U, 3060288943Sdim _MM_FROUND_CUR_DIRECTION); 3061288943Sdim} 3062288943Sdim 3063341825Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS512 3064288943Sdim_mm512_maskz_fmsubadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C) 3065288943Sdim{ 3066288943Sdim return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A, 3067288943Sdim (__v16sf) __B, 3068288943Sdim -(__v16sf) __C, 3069288943Sdim (__mmask16) __U, 3070288943Sdim _MM_FROUND_CUR_DIRECTION); 3071288943Sdim} 3072288943Sdim 3073341825Sdim#define _mm512_mask3_fmsub_round_pd(A, B, C, U, R) \ 3074309124Sdim (__m512d)__builtin_ia32_vfmsubpd512_mask3((__v8df)(__m512d)(A), \ 3075309124Sdim (__v8df)(__m512d)(B), \ 3076309124Sdim (__v8df)(__m512d)(C), \ 3077341825Sdim (__mmask8)(U), (int)(R)) 3078288943Sdim 3079288943Sdim 3080341825Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS512 3081288943Sdim_mm512_mask3_fmsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U) 3082288943Sdim{ 3083341825Sdim return (__m512d)__builtin_ia32_vfmsubpd512_mask3 ((__v8df) __A, 3084341825Sdim (__v8df) __B, 3085341825Sdim (__v8df) __C, 3086341825Sdim (__mmask8) __U, 3087341825Sdim _MM_FROUND_CUR_DIRECTION); 3088288943Sdim} 3089288943Sdim 3090341825Sdim#define _mm512_mask3_fmsub_round_ps(A, B, C, U, R) \ 3091309124Sdim (__m512)__builtin_ia32_vfmsubps512_mask3((__v16sf)(__m512)(A), \ 3092309124Sdim (__v16sf)(__m512)(B), \ 3093309124Sdim (__v16sf)(__m512)(C), \ 3094341825Sdim (__mmask16)(U), (int)(R)) 3095288943Sdim 3096341825Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS512 3097288943Sdim_mm512_mask3_fmsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U) 3098288943Sdim{ 3099341825Sdim return (__m512)__builtin_ia32_vfmsubps512_mask3 ((__v16sf) __A, 3100341825Sdim (__v16sf) __B, 3101341825Sdim (__v16sf) __C, 3102341825Sdim (__mmask16) __U, 3103341825Sdim _MM_FROUND_CUR_DIRECTION); 3104288943Sdim} 3105288943Sdim 3106341825Sdim#define _mm512_mask3_fmsubadd_round_pd(A, B, C, U, R) \ 3107309124Sdim (__m512d)__builtin_ia32_vfmsubaddpd512_mask3((__v8df)(__m512d)(A), \ 3108309124Sdim (__v8df)(__m512d)(B), \ 3109309124Sdim (__v8df)(__m512d)(C), \ 3110341825Sdim (__mmask8)(U), (int)(R)) 3111288943Sdim 3112288943Sdim 3113341825Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS512 3114288943Sdim_mm512_mask3_fmsubadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U) 3115288943Sdim{ 3116341825Sdim return (__m512d)__builtin_ia32_vfmsubaddpd512_mask3 ((__v8df) __A, 3117341825Sdim (__v8df) __B, 3118341825Sdim (__v8df) __C, 3119341825Sdim (__mmask8) __U, 3120341825Sdim _MM_FROUND_CUR_DIRECTION); 3121288943Sdim} 3122288943Sdim 3123341825Sdim#define _mm512_mask3_fmsubadd_round_ps(A, B, C, U, R) \ 3124309124Sdim (__m512)__builtin_ia32_vfmsubaddps512_mask3((__v16sf)(__m512)(A), \ 3125309124Sdim (__v16sf)(__m512)(B), \ 3126309124Sdim (__v16sf)(__m512)(C), \ 3127341825Sdim (__mmask16)(U), (int)(R)) 3128288943Sdim 3129288943Sdim 3130341825Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS512 3131288943Sdim_mm512_mask3_fmsubadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U) 3132288943Sdim{ 3133341825Sdim return (__m512)__builtin_ia32_vfmsubaddps512_mask3 ((__v16sf) __A, 3134341825Sdim (__v16sf) __B, 3135341825Sdim (__v16sf) __C, 3136341825Sdim (__mmask16) __U, 3137341825Sdim _MM_FROUND_CUR_DIRECTION); 3138288943Sdim} 3139288943Sdim 3140341825Sdim#define _mm512_mask_fnmadd_round_pd(A, U, B, C, R) \ 3141341825Sdim (__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \ 3142341825Sdim -(__v8df)(__m512d)(B), \ 3143341825Sdim (__v8df)(__m512d)(C), \ 3144341825Sdim (__mmask8)(U), (int)(R)) 3145288943Sdim 3146288943Sdim 3147341825Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS512 3148288943Sdim_mm512_mask_fnmadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C) 3149288943Sdim{ 3150341825Sdim return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A, 3151341825Sdim -(__v8df) __B, 3152341825Sdim (__v8df) __C, 3153341825Sdim (__mmask8) __U, 3154341825Sdim _MM_FROUND_CUR_DIRECTION); 3155288943Sdim} 3156288943Sdim 3157341825Sdim#define _mm512_mask_fnmadd_round_ps(A, U, B, C, R) \ 3158341825Sdim (__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \ 3159341825Sdim -(__v16sf)(__m512)(B), \ 3160341825Sdim (__v16sf)(__m512)(C), \ 3161341825Sdim (__mmask16)(U), (int)(R)) 3162288943Sdim 3163288943Sdim 3164341825Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS512 3165288943Sdim_mm512_mask_fnmadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C) 3166288943Sdim{ 3167341825Sdim return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A, 3168341825Sdim -(__v16sf) __B, 3169341825Sdim (__v16sf) __C, 3170341825Sdim (__mmask16) __U, 3171341825Sdim _MM_FROUND_CUR_DIRECTION); 3172288943Sdim} 3173288943Sdim 3174341825Sdim#define _mm512_mask_fnmsub_round_pd(A, U, B, C, R) \ 3175341825Sdim (__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \ 3176341825Sdim -(__v8df)(__m512d)(B), \ 3177341825Sdim -(__v8df)(__m512d)(C), \ 3178341825Sdim (__mmask8)(U), (int)(R)) 3179341825Sdim 3180341825Sdim 3181341825Sdim#define _mm512_mask3_fnmsub_round_pd(A, B, C, U, R) \ 3182341825Sdim (__m512d)__builtin_ia32_vfmsubpd512_mask3(-(__v8df)(__m512d)(A), \ 3183309124Sdim (__v8df)(__m512d)(B), \ 3184309124Sdim (__v8df)(__m512d)(C), \ 3185341825Sdim (__mmask8)(U), (int)(R)) 3186288943Sdim 3187288943Sdim 3188341825Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS512 3189288943Sdim_mm512_mask_fnmsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C) 3190288943Sdim{ 3191341825Sdim return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A, 3192341825Sdim -(__v8df) __B, 3193341825Sdim -(__v8df) __C, 3194341825Sdim (__mmask8) __U, 3195341825Sdim _MM_FROUND_CUR_DIRECTION); 3196341825Sdim} 3197341825Sdim 3198341825Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS512 3199341825Sdim_mm512_mask3_fnmsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U) 3200341825Sdim{ 3201341825Sdim return (__m512d) __builtin_ia32_vfmsubpd512_mask3 (-(__v8df) __A, 3202288943Sdim (__v8df) __B, 3203288943Sdim (__v8df) __C, 3204288943Sdim (__mmask8) __U, 3205288943Sdim _MM_FROUND_CUR_DIRECTION); 3206288943Sdim} 3207288943Sdim 3208341825Sdim#define _mm512_mask_fnmsub_round_ps(A, U, B, C, R) \ 3209341825Sdim (__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \ 3210341825Sdim -(__v16sf)(__m512)(B), \ 3211341825Sdim -(__v16sf)(__m512)(C), \ 3212341825Sdim (__mmask16)(U), (int)(R)) 3213288943Sdim 3214341825Sdim 3215341825Sdim#define _mm512_mask3_fnmsub_round_ps(A, B, C, U, R) \ 3216341825Sdim (__m512)__builtin_ia32_vfmsubps512_mask3(-(__v16sf)(__m512)(A), \ 3217309124Sdim (__v16sf)(__m512)(B), \ 3218309124Sdim (__v16sf)(__m512)(C), \ 3219341825Sdim (__mmask16)(U), (int)(R)) 3220288943Sdim 3221288943Sdim 3222341825Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS512 3223288943Sdim_mm512_mask_fnmsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C) 3224288943Sdim{ 3225341825Sdim return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A, 3226341825Sdim -(__v16sf) __B, 3227341825Sdim -(__v16sf) __C, 3228341825Sdim (__mmask16) __U, 3229341825Sdim _MM_FROUND_CUR_DIRECTION); 3230341825Sdim} 3231341825Sdim 3232341825Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS512 3233341825Sdim_mm512_mask3_fnmsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U) 3234341825Sdim{ 3235341825Sdim return (__m512) __builtin_ia32_vfmsubps512_mask3 (-(__v16sf) __A, 3236288943Sdim (__v16sf) __B, 3237288943Sdim (__v16sf) __C, 3238288943Sdim (__mmask16) __U, 3239288943Sdim _MM_FROUND_CUR_DIRECTION); 3240288943Sdim} 3241288943Sdim 3242288943Sdim 3243288943Sdim 3244277325Sdim/* Vector permutations */ 3245277325Sdim 3246341825Sdimstatic __inline __m512i __DEFAULT_FN_ATTRS512 3247277325Sdim_mm512_permutex2var_epi32(__m512i __A, __m512i __I, __m512i __B) 3248277325Sdim{ 3249341825Sdim return (__m512i)__builtin_ia32_vpermi2vard512((__v16si)__A, (__v16si) __I, 3250341825Sdim (__v16si) __B); 3251277325Sdim} 3252309124Sdim 3253341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 3254341825Sdim_mm512_mask_permutex2var_epi32(__m512i __A, __mmask16 __U, __m512i __I, 3255341825Sdim __m512i __B) 3256309124Sdim{ 3257341825Sdim return (__m512i)__builtin_ia32_selectd_512(__U, 3258341825Sdim (__v16si)_mm512_permutex2var_epi32(__A, __I, __B), 3259341825Sdim (__v16si)__A); 3260309124Sdim} 3261309124Sdim 3262341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 3263341825Sdim_mm512_mask2_permutex2var_epi32(__m512i __A, __m512i __I, __mmask16 __U, 3264341825Sdim __m512i __B) 3265309124Sdim{ 3266341825Sdim return (__m512i)__builtin_ia32_selectd_512(__U, 3267341825Sdim (__v16si)_mm512_permutex2var_epi32(__A, __I, __B), 3268341825Sdim (__v16si)__I); 3269309124Sdim} 3270309124Sdim 3271341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 3272341825Sdim_mm512_maskz_permutex2var_epi32(__mmask16 __U, __m512i __A, __m512i __I, 3273341825Sdim __m512i __B) 3274341825Sdim{ 3275341825Sdim return (__m512i)__builtin_ia32_selectd_512(__U, 3276341825Sdim (__v16si)_mm512_permutex2var_epi32(__A, __I, __B), 3277341825Sdim (__v16si)_mm512_setzero_si512()); 3278341825Sdim} 3279341825Sdim 3280341825Sdimstatic __inline __m512i __DEFAULT_FN_ATTRS512 3281277325Sdim_mm512_permutex2var_epi64(__m512i __A, __m512i __I, __m512i __B) 3282277325Sdim{ 3283341825Sdim return (__m512i)__builtin_ia32_vpermi2varq512((__v8di)__A, (__v8di) __I, 3284341825Sdim (__v8di) __B); 3285277325Sdim} 3286277325Sdim 3287341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 3288341825Sdim_mm512_mask_permutex2var_epi64(__m512i __A, __mmask8 __U, __m512i __I, 3289341825Sdim __m512i __B) 3290341825Sdim{ 3291341825Sdim return (__m512i)__builtin_ia32_selectq_512(__U, 3292341825Sdim (__v8di)_mm512_permutex2var_epi64(__A, __I, __B), 3293341825Sdim (__v8di)__A); 3294341825Sdim} 3295341825Sdim 3296341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 3297341825Sdim_mm512_mask2_permutex2var_epi64(__m512i __A, __m512i __I, __mmask8 __U, 3298309124Sdim __m512i __B) 3299277325Sdim{ 3300341825Sdim return (__m512i)__builtin_ia32_selectq_512(__U, 3301341825Sdim (__v8di)_mm512_permutex2var_epi64(__A, __I, __B), 3302341825Sdim (__v8di)__I); 3303277325Sdim} 3304309124Sdim 3305341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 3306341825Sdim_mm512_maskz_permutex2var_epi64(__mmask8 __U, __m512i __A, __m512i __I, 3307341825Sdim __m512i __B) 3308277325Sdim{ 3309341825Sdim return (__m512i)__builtin_ia32_selectq_512(__U, 3310341825Sdim (__v8di)_mm512_permutex2var_epi64(__A, __I, __B), 3311341825Sdim (__v8di)_mm512_setzero_si512()); 3312277325Sdim} 3313277325Sdim 3314341825Sdim#define _mm512_alignr_epi64(A, B, I) \ 3315341825Sdim (__m512i)__builtin_ia32_alignq512((__v8di)(__m512i)(A), \ 3316341825Sdim (__v8di)(__m512i)(B), (int)(I)) 3317277325Sdim 3318341825Sdim#define _mm512_mask_alignr_epi64(W, U, A, B, imm) \ 3319314564Sdim (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \ 3320314564Sdim (__v8di)_mm512_alignr_epi64((A), (B), (imm)), \ 3321341825Sdim (__v8di)(__m512i)(W)) 3322309124Sdim 3323341825Sdim#define _mm512_maskz_alignr_epi64(U, A, B, imm) \ 3324314564Sdim (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \ 3325314564Sdim (__v8di)_mm512_alignr_epi64((A), (B), (imm)), \ 3326341825Sdim (__v8di)_mm512_setzero_si512()) 3327309124Sdim 3328341825Sdim#define _mm512_alignr_epi32(A, B, I) \ 3329341825Sdim (__m512i)__builtin_ia32_alignd512((__v16si)(__m512i)(A), \ 3330341825Sdim (__v16si)(__m512i)(B), (int)(I)) 3331277325Sdim 3332341825Sdim#define _mm512_mask_alignr_epi32(W, U, A, B, imm) \ 3333314564Sdim (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \ 3334314564Sdim (__v16si)_mm512_alignr_epi32((A), (B), (imm)), \ 3335341825Sdim (__v16si)(__m512i)(W)) 3336309124Sdim 3337341825Sdim#define _mm512_maskz_alignr_epi32(U, A, B, imm) \ 3338314564Sdim (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \ 3339314564Sdim (__v16si)_mm512_alignr_epi32((A), (B), (imm)), \ 3340341825Sdim (__v16si)_mm512_setzero_si512()) 3341288943Sdim/* Vector Extract */ 3342288943Sdim 3343341825Sdim#define _mm512_extractf64x4_pd(A, I) \ 3344341825Sdim (__m256d)__builtin_ia32_extractf64x4_mask((__v8df)(__m512d)(A), (int)(I), \ 3345341825Sdim (__v4df)_mm256_undefined_pd(), \ 3346341825Sdim (__mmask8)-1) 3347288943Sdim 3348341825Sdim#define _mm512_mask_extractf64x4_pd(W, U, A, imm) \ 3349341825Sdim (__m256d)__builtin_ia32_extractf64x4_mask((__v8df)(__m512d)(A), (int)(imm), \ 3350341825Sdim (__v4df)(__m256d)(W), \ 3351341825Sdim (__mmask8)(U)) 3352309124Sdim 3353341825Sdim#define _mm512_maskz_extractf64x4_pd(U, A, imm) \ 3354341825Sdim (__m256d)__builtin_ia32_extractf64x4_mask((__v8df)(__m512d)(A), (int)(imm), \ 3355341825Sdim (__v4df)_mm256_setzero_pd(), \ 3356341825Sdim (__mmask8)(U)) 3357309124Sdim 3358341825Sdim#define _mm512_extractf32x4_ps(A, I) \ 3359341825Sdim (__m128)__builtin_ia32_extractf32x4_mask((__v16sf)(__m512)(A), (int)(I), \ 3360341825Sdim (__v4sf)_mm_undefined_ps(), \ 3361341825Sdim (__mmask8)-1) 3362288943Sdim 3363341825Sdim#define _mm512_mask_extractf32x4_ps(W, U, A, imm) \ 3364341825Sdim (__m128)__builtin_ia32_extractf32x4_mask((__v16sf)(__m512)(A), (int)(imm), \ 3365341825Sdim (__v4sf)(__m128)(W), \ 3366341825Sdim (__mmask8)(U)) 3367309124Sdim 3368341825Sdim#define _mm512_maskz_extractf32x4_ps(U, A, imm) \ 3369341825Sdim (__m128)__builtin_ia32_extractf32x4_mask((__v16sf)(__m512)(A), (int)(imm), \ 3370341825Sdim (__v4sf)_mm_setzero_ps(), \ 3371341825Sdim (__mmask8)(U)) 3372314564Sdim 3373277325Sdim/* Vector Blend */ 3374277325Sdim 3375341825Sdimstatic __inline __m512d __DEFAULT_FN_ATTRS512 3376277325Sdim_mm512_mask_blend_pd(__mmask8 __U, __m512d __A, __m512d __W) 3377277325Sdim{ 3378309124Sdim return (__m512d) __builtin_ia32_selectpd_512 ((__mmask8) __U, 3379277325Sdim (__v8df) __W, 3380309124Sdim (__v8df) __A); 3381277325Sdim} 3382277325Sdim 3383341825Sdimstatic __inline __m512 __DEFAULT_FN_ATTRS512 3384277325Sdim_mm512_mask_blend_ps(__mmask16 __U, __m512 __A, __m512 __W) 3385277325Sdim{ 3386309124Sdim return (__m512) __builtin_ia32_selectps_512 ((__mmask16) __U, 3387277325Sdim (__v16sf) __W, 3388309124Sdim (__v16sf) __A); 3389277325Sdim} 3390277325Sdim 3391341825Sdimstatic __inline __m512i __DEFAULT_FN_ATTRS512 3392277325Sdim_mm512_mask_blend_epi64(__mmask8 __U, __m512i __A, __m512i __W) 3393277325Sdim{ 3394309124Sdim return (__m512i) __builtin_ia32_selectq_512 ((__mmask8) __U, 3395277325Sdim (__v8di) __W, 3396309124Sdim (__v8di) __A); 3397277325Sdim} 3398277325Sdim 3399341825Sdimstatic __inline __m512i __DEFAULT_FN_ATTRS512 3400277325Sdim_mm512_mask_blend_epi32(__mmask16 __U, __m512i __A, __m512i __W) 3401277325Sdim{ 3402309124Sdim return (__m512i) __builtin_ia32_selectd_512 ((__mmask16) __U, 3403277325Sdim (__v16si) __W, 3404309124Sdim (__v16si) __A); 3405277325Sdim} 3406277325Sdim 3407277325Sdim/* Compare */ 3408277325Sdim 3409341825Sdim#define _mm512_cmp_round_ps_mask(A, B, P, R) \ 3410288943Sdim (__mmask16)__builtin_ia32_cmpps512_mask((__v16sf)(__m512)(A), \ 3411309124Sdim (__v16sf)(__m512)(B), (int)(P), \ 3412341825Sdim (__mmask16)-1, (int)(R)) 3413277325Sdim 3414341825Sdim#define _mm512_mask_cmp_round_ps_mask(U, A, B, P, R) \ 3415288943Sdim (__mmask16)__builtin_ia32_cmpps512_mask((__v16sf)(__m512)(A), \ 3416309124Sdim (__v16sf)(__m512)(B), (int)(P), \ 3417341825Sdim (__mmask16)(U), (int)(R)) 3418277325Sdim 3419288943Sdim#define _mm512_cmp_ps_mask(A, B, P) \ 3420288943Sdim _mm512_cmp_round_ps_mask((A), (B), (P), _MM_FROUND_CUR_DIRECTION) 3421288943Sdim#define _mm512_mask_cmp_ps_mask(U, A, B, P) \ 3422288943Sdim _mm512_mask_cmp_round_ps_mask((U), (A), (B), (P), _MM_FROUND_CUR_DIRECTION) 3423288943Sdim 3424314564Sdim#define _mm512_cmpeq_ps_mask(A, B) \ 3425314564Sdim _mm512_cmp_ps_mask((A), (B), _CMP_EQ_OQ) 3426314564Sdim#define _mm512_mask_cmpeq_ps_mask(k, A, B) \ 3427314564Sdim _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_EQ_OQ) 3428314564Sdim 3429314564Sdim#define _mm512_cmplt_ps_mask(A, B) \ 3430314564Sdim _mm512_cmp_ps_mask((A), (B), _CMP_LT_OS) 3431314564Sdim#define _mm512_mask_cmplt_ps_mask(k, A, B) \ 3432314564Sdim _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_LT_OS) 3433314564Sdim 3434314564Sdim#define _mm512_cmple_ps_mask(A, B) \ 3435314564Sdim _mm512_cmp_ps_mask((A), (B), _CMP_LE_OS) 3436314564Sdim#define _mm512_mask_cmple_ps_mask(k, A, B) \ 3437314564Sdim _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_LE_OS) 3438314564Sdim 3439314564Sdim#define _mm512_cmpunord_ps_mask(A, B) \ 3440314564Sdim _mm512_cmp_ps_mask((A), (B), _CMP_UNORD_Q) 3441314564Sdim#define _mm512_mask_cmpunord_ps_mask(k, A, B) \ 3442314564Sdim _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_UNORD_Q) 3443314564Sdim 3444314564Sdim#define _mm512_cmpneq_ps_mask(A, B) \ 3445314564Sdim _mm512_cmp_ps_mask((A), (B), _CMP_NEQ_UQ) 3446314564Sdim#define _mm512_mask_cmpneq_ps_mask(k, A, B) \ 3447314564Sdim _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_NEQ_UQ) 3448314564Sdim 3449314564Sdim#define _mm512_cmpnlt_ps_mask(A, B) \ 3450314564Sdim _mm512_cmp_ps_mask((A), (B), _CMP_NLT_US) 3451314564Sdim#define _mm512_mask_cmpnlt_ps_mask(k, A, B) \ 3452314564Sdim _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_NLT_US) 3453314564Sdim 3454314564Sdim#define _mm512_cmpnle_ps_mask(A, B) \ 3455314564Sdim _mm512_cmp_ps_mask((A), (B), _CMP_NLE_US) 3456314564Sdim#define _mm512_mask_cmpnle_ps_mask(k, A, B) \ 3457314564Sdim _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_NLE_US) 3458314564Sdim 3459314564Sdim#define _mm512_cmpord_ps_mask(A, B) \ 3460314564Sdim _mm512_cmp_ps_mask((A), (B), _CMP_ORD_Q) 3461314564Sdim#define _mm512_mask_cmpord_ps_mask(k, A, B) \ 3462314564Sdim _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_ORD_Q) 3463314564Sdim 3464341825Sdim#define _mm512_cmp_round_pd_mask(A, B, P, R) \ 3465288943Sdim (__mmask8)__builtin_ia32_cmppd512_mask((__v8df)(__m512d)(A), \ 3466309124Sdim (__v8df)(__m512d)(B), (int)(P), \ 3467341825Sdim (__mmask8)-1, (int)(R)) 3468288943Sdim 3469341825Sdim#define _mm512_mask_cmp_round_pd_mask(U, A, B, P, R) \ 3470288943Sdim (__mmask8)__builtin_ia32_cmppd512_mask((__v8df)(__m512d)(A), \ 3471309124Sdim (__v8df)(__m512d)(B), (int)(P), \ 3472341825Sdim (__mmask8)(U), (int)(R)) 3473288943Sdim 3474288943Sdim#define _mm512_cmp_pd_mask(A, B, P) \ 3475288943Sdim _mm512_cmp_round_pd_mask((A), (B), (P), _MM_FROUND_CUR_DIRECTION) 3476288943Sdim#define _mm512_mask_cmp_pd_mask(U, A, B, P) \ 3477288943Sdim _mm512_mask_cmp_round_pd_mask((U), (A), (B), (P), _MM_FROUND_CUR_DIRECTION) 3478288943Sdim 3479314564Sdim#define _mm512_cmpeq_pd_mask(A, B) \ 3480314564Sdim _mm512_cmp_pd_mask((A), (B), _CMP_EQ_OQ) 3481314564Sdim#define _mm512_mask_cmpeq_pd_mask(k, A, B) \ 3482314564Sdim _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_EQ_OQ) 3483314564Sdim 3484314564Sdim#define _mm512_cmplt_pd_mask(A, B) \ 3485314564Sdim _mm512_cmp_pd_mask((A), (B), _CMP_LT_OS) 3486314564Sdim#define _mm512_mask_cmplt_pd_mask(k, A, B) \ 3487314564Sdim _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_LT_OS) 3488314564Sdim 3489314564Sdim#define _mm512_cmple_pd_mask(A, B) \ 3490314564Sdim _mm512_cmp_pd_mask((A), (B), _CMP_LE_OS) 3491314564Sdim#define _mm512_mask_cmple_pd_mask(k, A, B) \ 3492314564Sdim _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_LE_OS) 3493314564Sdim 3494314564Sdim#define _mm512_cmpunord_pd_mask(A, B) \ 3495314564Sdim _mm512_cmp_pd_mask((A), (B), _CMP_UNORD_Q) 3496314564Sdim#define _mm512_mask_cmpunord_pd_mask(k, A, B) \ 3497314564Sdim _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_UNORD_Q) 3498314564Sdim 3499314564Sdim#define _mm512_cmpneq_pd_mask(A, B) \ 3500314564Sdim _mm512_cmp_pd_mask((A), (B), _CMP_NEQ_UQ) 3501314564Sdim#define _mm512_mask_cmpneq_pd_mask(k, A, B) \ 3502314564Sdim _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_NEQ_UQ) 3503314564Sdim 3504314564Sdim#define _mm512_cmpnlt_pd_mask(A, B) \ 3505314564Sdim _mm512_cmp_pd_mask((A), (B), _CMP_NLT_US) 3506314564Sdim#define _mm512_mask_cmpnlt_pd_mask(k, A, B) \ 3507314564Sdim _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_NLT_US) 3508314564Sdim 3509314564Sdim#define _mm512_cmpnle_pd_mask(A, B) \ 3510314564Sdim _mm512_cmp_pd_mask((A), (B), _CMP_NLE_US) 3511314564Sdim#define _mm512_mask_cmpnle_pd_mask(k, A, B) \ 3512314564Sdim _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_NLE_US) 3513314564Sdim 3514314564Sdim#define _mm512_cmpord_pd_mask(A, B) \ 3515314564Sdim _mm512_cmp_pd_mask((A), (B), _CMP_ORD_Q) 3516314564Sdim#define _mm512_mask_cmpord_pd_mask(k, A, B) \ 3517314564Sdim _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_ORD_Q) 3518314564Sdim 3519277325Sdim/* Conversion */ 3520277325Sdim 3521341825Sdim#define _mm512_cvtt_roundps_epu32(A, R) \ 3522309124Sdim (__m512i)__builtin_ia32_cvttps2udq512_mask((__v16sf)(__m512)(A), \ 3523309124Sdim (__v16si)_mm512_undefined_epi32(), \ 3524341825Sdim (__mmask16)-1, (int)(R)) 3525309124Sdim 3526341825Sdim#define _mm512_mask_cvtt_roundps_epu32(W, U, A, R) \ 3527309124Sdim (__m512i)__builtin_ia32_cvttps2udq512_mask((__v16sf)(__m512)(A), \ 3528309124Sdim (__v16si)(__m512i)(W), \ 3529341825Sdim (__mmask16)(U), (int)(R)) 3530309124Sdim 3531341825Sdim#define _mm512_maskz_cvtt_roundps_epu32(U, A, R) \ 3532309124Sdim (__m512i)__builtin_ia32_cvttps2udq512_mask((__v16sf)(__m512)(A), \ 3533309124Sdim (__v16si)_mm512_setzero_si512(), \ 3534341825Sdim (__mmask16)(U), (int)(R)) 3535309124Sdim 3536309124Sdim 3537341825Sdimstatic __inline __m512i __DEFAULT_FN_ATTRS512 3538277325Sdim_mm512_cvttps_epu32(__m512 __A) 3539277325Sdim{ 3540277325Sdim return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A, 3541277325Sdim (__v16si) 3542277325Sdim _mm512_setzero_si512 (), 3543277325Sdim (__mmask16) -1, 3544277325Sdim _MM_FROUND_CUR_DIRECTION); 3545277325Sdim} 3546277325Sdim 3547341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 3548309124Sdim_mm512_mask_cvttps_epu32 (__m512i __W, __mmask16 __U, __m512 __A) 3549309124Sdim{ 3550309124Sdim return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A, 3551309124Sdim (__v16si) __W, 3552309124Sdim (__mmask16) __U, 3553309124Sdim _MM_FROUND_CUR_DIRECTION); 3554309124Sdim} 3555309124Sdim 3556341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 3557309124Sdim_mm512_maskz_cvttps_epu32 (__mmask16 __U, __m512 __A) 3558309124Sdim{ 3559309124Sdim return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A, 3560309124Sdim (__v16si) _mm512_setzero_si512 (), 3561309124Sdim (__mmask16) __U, 3562309124Sdim _MM_FROUND_CUR_DIRECTION); 3563309124Sdim} 3564309124Sdim 3565341825Sdim#define _mm512_cvt_roundepi32_ps(A, R) \ 3566309124Sdim (__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(__m512i)(A), \ 3567288943Sdim (__v16sf)_mm512_setzero_ps(), \ 3568341825Sdim (__mmask16)-1, (int)(R)) 3569277325Sdim 3570341825Sdim#define _mm512_mask_cvt_roundepi32_ps(W, U, A, R) \ 3571309124Sdim (__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(__m512i)(A), \ 3572309124Sdim (__v16sf)(__m512)(W), \ 3573341825Sdim (__mmask16)(U), (int)(R)) 3574309124Sdim 3575341825Sdim#define _mm512_maskz_cvt_roundepi32_ps(U, A, R) \ 3576309124Sdim (__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(__m512i)(A), \ 3577309124Sdim (__v16sf)_mm512_setzero_ps(), \ 3578341825Sdim (__mmask16)(U), (int)(R)) 3579309124Sdim 3580341825Sdim#define _mm512_cvt_roundepu32_ps(A, R) \ 3581309124Sdim (__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(__m512i)(A), \ 3582288943Sdim (__v16sf)_mm512_setzero_ps(), \ 3583341825Sdim (__mmask16)-1, (int)(R)) 3584277325Sdim 3585341825Sdim#define _mm512_mask_cvt_roundepu32_ps(W, U, A, R) \ 3586309124Sdim (__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(__m512i)(A), \ 3587309124Sdim (__v16sf)(__m512)(W), \ 3588341825Sdim (__mmask16)(U), (int)(R)) 3589309124Sdim 3590341825Sdim#define _mm512_maskz_cvt_roundepu32_ps(U, A, R) \ 3591309124Sdim (__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(__m512i)(A), \ 3592309124Sdim (__v16sf)_mm512_setzero_ps(), \ 3593341825Sdim (__mmask16)(U), (int)(R)) 3594309124Sdim 3595341825Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS512 3596309124Sdim_mm512_cvtepu32_ps (__m512i __A) 3597309124Sdim{ 3598341825Sdim return (__m512)__builtin_convertvector((__v16su)__A, __v16sf); 3599309124Sdim} 3600309124Sdim 3601341825Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS512 3602309124Sdim_mm512_mask_cvtepu32_ps (__m512 __W, __mmask16 __U, __m512i __A) 3603309124Sdim{ 3604341825Sdim return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, 3605341825Sdim (__v16sf)_mm512_cvtepu32_ps(__A), 3606341825Sdim (__v16sf)__W); 3607309124Sdim} 3608309124Sdim 3609341825Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS512 3610309124Sdim_mm512_maskz_cvtepu32_ps (__mmask16 __U, __m512i __A) 3611309124Sdim{ 3612341825Sdim return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, 3613341825Sdim (__v16sf)_mm512_cvtepu32_ps(__A), 3614341825Sdim (__v16sf)_mm512_setzero_ps()); 3615309124Sdim} 3616309124Sdim 3617341825Sdimstatic __inline __m512d __DEFAULT_FN_ATTRS512 3618277325Sdim_mm512_cvtepi32_pd(__m256i __A) 3619277325Sdim{ 3620314564Sdim return (__m512d)__builtin_convertvector((__v8si)__A, __v8df); 3621277325Sdim} 3622277325Sdim 3623341825Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS512 3624309124Sdim_mm512_mask_cvtepi32_pd (__m512d __W, __mmask8 __U, __m256i __A) 3625309124Sdim{ 3626314564Sdim return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U, 3627314564Sdim (__v8df)_mm512_cvtepi32_pd(__A), 3628314564Sdim (__v8df)__W); 3629309124Sdim} 3630309124Sdim 3631341825Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS512 3632309124Sdim_mm512_maskz_cvtepi32_pd (__mmask8 __U, __m256i __A) 3633309124Sdim{ 3634314564Sdim return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U, 3635314564Sdim (__v8df)_mm512_cvtepi32_pd(__A), 3636314564Sdim (__v8df)_mm512_setzero_pd()); 3637309124Sdim} 3638309124Sdim 3639341825Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS512 3640314564Sdim_mm512_cvtepi32lo_pd(__m512i __A) 3641314564Sdim{ 3642314564Sdim return (__m512d) _mm512_cvtepi32_pd(_mm512_castsi512_si256(__A)); 3643314564Sdim} 3644314564Sdim 3645341825Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS512 3646314564Sdim_mm512_mask_cvtepi32lo_pd(__m512d __W, __mmask8 __U,__m512i __A) 3647314564Sdim{ 3648314564Sdim return (__m512d) _mm512_mask_cvtepi32_pd(__W, __U, _mm512_castsi512_si256(__A)); 3649314564Sdim} 3650314564Sdim 3651341825Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS512 3652309124Sdim_mm512_cvtepi32_ps (__m512i __A) 3653309124Sdim{ 3654341825Sdim return (__m512)__builtin_convertvector((__v16si)__A, __v16sf); 3655309124Sdim} 3656309124Sdim 3657341825Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS512 3658309124Sdim_mm512_mask_cvtepi32_ps (__m512 __W, __mmask16 __U, __m512i __A) 3659309124Sdim{ 3660341825Sdim return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, 3661341825Sdim (__v16sf)_mm512_cvtepi32_ps(__A), 3662341825Sdim (__v16sf)__W); 3663309124Sdim} 3664309124Sdim 3665341825Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS512 3666309124Sdim_mm512_maskz_cvtepi32_ps (__mmask16 __U, __m512i __A) 3667309124Sdim{ 3668341825Sdim return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, 3669341825Sdim (__v16sf)_mm512_cvtepi32_ps(__A), 3670341825Sdim (__v16sf)_mm512_setzero_ps()); 3671309124Sdim} 3672309124Sdim 3673341825Sdimstatic __inline __m512d __DEFAULT_FN_ATTRS512 3674277325Sdim_mm512_cvtepu32_pd(__m256i __A) 3675277325Sdim{ 3676314564Sdim return (__m512d)__builtin_convertvector((__v8su)__A, __v8df); 3677277325Sdim} 3678277325Sdim 3679341825Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS512 3680309124Sdim_mm512_mask_cvtepu32_pd (__m512d __W, __mmask8 __U, __m256i __A) 3681309124Sdim{ 3682314564Sdim return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U, 3683314564Sdim (__v8df)_mm512_cvtepu32_pd(__A), 3684314564Sdim (__v8df)__W); 3685309124Sdim} 3686309124Sdim 3687341825Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS512 3688309124Sdim_mm512_maskz_cvtepu32_pd (__mmask8 __U, __m256i __A) 3689309124Sdim{ 3690314564Sdim return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U, 3691314564Sdim (__v8df)_mm512_cvtepu32_pd(__A), 3692314564Sdim (__v8df)_mm512_setzero_pd()); 3693309124Sdim} 3694309124Sdim 3695341825Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS512 3696314564Sdim_mm512_cvtepu32lo_pd(__m512i __A) 3697314564Sdim{ 3698314564Sdim return (__m512d) _mm512_cvtepu32_pd(_mm512_castsi512_si256(__A)); 3699314564Sdim} 3700314564Sdim 3701341825Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS512 3702314564Sdim_mm512_mask_cvtepu32lo_pd(__m512d __W, __mmask8 __U,__m512i __A) 3703314564Sdim{ 3704314564Sdim return (__m512d) _mm512_mask_cvtepu32_pd(__W, __U, _mm512_castsi512_si256(__A)); 3705314564Sdim} 3706314564Sdim 3707341825Sdim#define _mm512_cvt_roundpd_ps(A, R) \ 3708309124Sdim (__m256)__builtin_ia32_cvtpd2ps512_mask((__v8df)(__m512d)(A), \ 3709288943Sdim (__v8sf)_mm256_setzero_ps(), \ 3710341825Sdim (__mmask8)-1, (int)(R)) 3711277325Sdim 3712341825Sdim#define _mm512_mask_cvt_roundpd_ps(W, U, A, R) \ 3713309124Sdim (__m256)__builtin_ia32_cvtpd2ps512_mask((__v8df)(__m512d)(A), \ 3714309124Sdim (__v8sf)(__m256)(W), (__mmask8)(U), \ 3715341825Sdim (int)(R)) 3716309124Sdim 3717341825Sdim#define _mm512_maskz_cvt_roundpd_ps(U, A, R) \ 3718309124Sdim (__m256)__builtin_ia32_cvtpd2ps512_mask((__v8df)(__m512d)(A), \ 3719309124Sdim (__v8sf)_mm256_setzero_ps(), \ 3720341825Sdim (__mmask8)(U), (int)(R)) 3721309124Sdim 3722341825Sdimstatic __inline__ __m256 __DEFAULT_FN_ATTRS512 3723309124Sdim_mm512_cvtpd_ps (__m512d __A) 3724309124Sdim{ 3725309124Sdim return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A, 3726309124Sdim (__v8sf) _mm256_undefined_ps (), 3727309124Sdim (__mmask8) -1, 3728309124Sdim _MM_FROUND_CUR_DIRECTION); 3729309124Sdim} 3730309124Sdim 3731341825Sdimstatic __inline__ __m256 __DEFAULT_FN_ATTRS512 3732309124Sdim_mm512_mask_cvtpd_ps (__m256 __W, __mmask8 __U, __m512d __A) 3733309124Sdim{ 3734309124Sdim return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A, 3735309124Sdim (__v8sf) __W, 3736309124Sdim (__mmask8) __U, 3737309124Sdim _MM_FROUND_CUR_DIRECTION); 3738309124Sdim} 3739309124Sdim 3740341825Sdimstatic __inline__ __m256 __DEFAULT_FN_ATTRS512 3741309124Sdim_mm512_maskz_cvtpd_ps (__mmask8 __U, __m512d __A) 3742309124Sdim{ 3743309124Sdim return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A, 3744309124Sdim (__v8sf) _mm256_setzero_ps (), 3745309124Sdim (__mmask8) __U, 3746309124Sdim _MM_FROUND_CUR_DIRECTION); 3747309124Sdim} 3748309124Sdim 3749341825Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS512 3750314564Sdim_mm512_cvtpd_pslo (__m512d __A) 3751314564Sdim{ 3752314564Sdim return (__m512) __builtin_shufflevector((__v8sf) _mm512_cvtpd_ps(__A), 3753314564Sdim (__v8sf) _mm256_setzero_ps (), 3754314564Sdim 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); 3755314564Sdim} 3756314564Sdim 3757341825Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS512 3758314564Sdim_mm512_mask_cvtpd_pslo (__m512 __W, __mmask8 __U,__m512d __A) 3759314564Sdim{ 3760314564Sdim return (__m512) __builtin_shufflevector ( 3761314564Sdim (__v8sf) _mm512_mask_cvtpd_ps (_mm512_castps512_ps256(__W), 3762314564Sdim __U, __A), 3763314564Sdim (__v8sf) _mm256_setzero_ps (), 3764314564Sdim 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); 3765314564Sdim} 3766314564Sdim 3767341825Sdim#define _mm512_cvt_roundps_ph(A, I) \ 3768309124Sdim (__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \ 3769309124Sdim (__v16hi)_mm256_undefined_si256(), \ 3770341825Sdim (__mmask16)-1) 3771309124Sdim 3772341825Sdim#define _mm512_mask_cvt_roundps_ph(U, W, A, I) \ 3773309124Sdim (__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \ 3774309124Sdim (__v16hi)(__m256i)(U), \ 3775341825Sdim (__mmask16)(W)) 3776309124Sdim 3777341825Sdim#define _mm512_maskz_cvt_roundps_ph(W, A, I) \ 3778309124Sdim (__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \ 3779309124Sdim (__v16hi)_mm256_setzero_si256(), \ 3780341825Sdim (__mmask16)(W)) 3781309124Sdim 3782353358Sdim#define _mm512_cvtps_ph _mm512_cvt_roundps_ph 3783353358Sdim#define _mm512_mask_cvtps_ph _mm512_mask_cvt_roundps_ph 3784353358Sdim#define _mm512_maskz_cvtps_ph _mm512_maskz_cvt_roundps_ph 3785288943Sdim 3786341825Sdim#define _mm512_cvt_roundph_ps(A, R) \ 3787309124Sdim (__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(__m256i)(A), \ 3788309124Sdim (__v16sf)_mm512_undefined_ps(), \ 3789341825Sdim (__mmask16)-1, (int)(R)) 3790309124Sdim 3791341825Sdim#define _mm512_mask_cvt_roundph_ps(W, U, A, R) \ 3792309124Sdim (__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(__m256i)(A), \ 3793309124Sdim (__v16sf)(__m512)(W), \ 3794341825Sdim (__mmask16)(U), (int)(R)) 3795309124Sdim 3796341825Sdim#define _mm512_maskz_cvt_roundph_ps(U, A, R) \ 3797309124Sdim (__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(__m256i)(A), \ 3798309124Sdim (__v16sf)_mm512_setzero_ps(), \ 3799341825Sdim (__mmask16)(U), (int)(R)) 3800309124Sdim 3801309124Sdim 3802341825Sdimstatic __inline __m512 __DEFAULT_FN_ATTRS512 3803277325Sdim_mm512_cvtph_ps(__m256i __A) 3804277325Sdim{ 3805277325Sdim return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A, 3806277325Sdim (__v16sf) 3807277325Sdim _mm512_setzero_ps (), 3808277325Sdim (__mmask16) -1, 3809277325Sdim _MM_FROUND_CUR_DIRECTION); 3810277325Sdim} 3811277325Sdim 3812341825Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS512 3813309124Sdim_mm512_mask_cvtph_ps (__m512 __W, __mmask16 __U, __m256i __A) 3814277325Sdim{ 3815309124Sdim return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A, 3816309124Sdim (__v16sf) __W, 3817309124Sdim (__mmask16) __U, 3818309124Sdim _MM_FROUND_CUR_DIRECTION); 3819277325Sdim} 3820277325Sdim 3821341825Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS512 3822309124Sdim_mm512_maskz_cvtph_ps (__mmask16 __U, __m256i __A) 3823309124Sdim{ 3824309124Sdim return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A, 3825309124Sdim (__v16sf) _mm512_setzero_ps (), 3826309124Sdim (__mmask16) __U, 3827309124Sdim _MM_FROUND_CUR_DIRECTION); 3828309124Sdim} 3829309124Sdim 3830341825Sdim#define _mm512_cvtt_roundpd_epi32(A, R) \ 3831309124Sdim (__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df)(__m512d)(A), \ 3832309124Sdim (__v8si)_mm256_setzero_si256(), \ 3833341825Sdim (__mmask8)-1, (int)(R)) 3834309124Sdim 3835341825Sdim#define _mm512_mask_cvtt_roundpd_epi32(W, U, A, R) \ 3836309124Sdim (__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df)(__m512d)(A), \ 3837309124Sdim (__v8si)(__m256i)(W), \ 3838341825Sdim (__mmask8)(U), (int)(R)) 3839309124Sdim 3840341825Sdim#define _mm512_maskz_cvtt_roundpd_epi32(U, A, R) \ 3841309124Sdim (__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df)(__m512d)(A), \ 3842309124Sdim (__v8si)_mm256_setzero_si256(), \ 3843341825Sdim (__mmask8)(U), (int)(R)) 3844309124Sdim 3845341825Sdimstatic __inline __m256i __DEFAULT_FN_ATTRS512 3846296417Sdim_mm512_cvttpd_epi32(__m512d __a) 3847277325Sdim{ 3848296417Sdim return (__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df) __a, 3849277325Sdim (__v8si)_mm256_setzero_si256(), 3850277325Sdim (__mmask8) -1, 3851277325Sdim _MM_FROUND_CUR_DIRECTION); 3852277325Sdim} 3853277325Sdim 3854341825Sdimstatic __inline__ __m256i __DEFAULT_FN_ATTRS512 3855309124Sdim_mm512_mask_cvttpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A) 3856309124Sdim{ 3857309124Sdim return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A, 3858309124Sdim (__v8si) __W, 3859309124Sdim (__mmask8) __U, 3860309124Sdim _MM_FROUND_CUR_DIRECTION); 3861309124Sdim} 3862277325Sdim 3863341825Sdimstatic __inline__ __m256i __DEFAULT_FN_ATTRS512 3864309124Sdim_mm512_maskz_cvttpd_epi32 (__mmask8 __U, __m512d __A) 3865309124Sdim{ 3866309124Sdim return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A, 3867309124Sdim (__v8si) _mm256_setzero_si256 (), 3868309124Sdim (__mmask8) __U, 3869309124Sdim _MM_FROUND_CUR_DIRECTION); 3870309124Sdim} 3871309124Sdim 3872341825Sdim#define _mm512_cvtt_roundps_epi32(A, R) \ 3873309124Sdim (__m512i)__builtin_ia32_cvttps2dq512_mask((__v16sf)(__m512)(A), \ 3874288943Sdim (__v16si)_mm512_setzero_si512(), \ 3875341825Sdim (__mmask16)-1, (int)(R)) 3876277325Sdim 3877341825Sdim#define _mm512_mask_cvtt_roundps_epi32(W, U, A, R) \ 3878309124Sdim (__m512i)__builtin_ia32_cvttps2dq512_mask((__v16sf)(__m512)(A), \ 3879309124Sdim (__v16si)(__m512i)(W), \ 3880341825Sdim (__mmask16)(U), (int)(R)) 3881309124Sdim 3882341825Sdim#define _mm512_maskz_cvtt_roundps_epi32(U, A, R) \ 3883309124Sdim (__m512i)__builtin_ia32_cvttps2dq512_mask((__v16sf)(__m512)(A), \ 3884309124Sdim (__v16si)_mm512_setzero_si512(), \ 3885341825Sdim (__mmask16)(U), (int)(R)) 3886309124Sdim 3887341825Sdimstatic __inline __m512i __DEFAULT_FN_ATTRS512 3888309124Sdim_mm512_cvttps_epi32(__m512 __a) 3889309124Sdim{ 3890309124Sdim return (__m512i) 3891309124Sdim __builtin_ia32_cvttps2dq512_mask((__v16sf) __a, 3892309124Sdim (__v16si) _mm512_setzero_si512 (), 3893309124Sdim (__mmask16) -1, _MM_FROUND_CUR_DIRECTION); 3894309124Sdim} 3895309124Sdim 3896341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 3897309124Sdim_mm512_mask_cvttps_epi32 (__m512i __W, __mmask16 __U, __m512 __A) 3898309124Sdim{ 3899309124Sdim return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A, 3900309124Sdim (__v16si) __W, 3901309124Sdim (__mmask16) __U, 3902309124Sdim _MM_FROUND_CUR_DIRECTION); 3903309124Sdim} 3904309124Sdim 3905341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 3906309124Sdim_mm512_maskz_cvttps_epi32 (__mmask16 __U, __m512 __A) 3907309124Sdim{ 3908309124Sdim return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A, 3909309124Sdim (__v16si) _mm512_setzero_si512 (), 3910309124Sdim (__mmask16) __U, 3911309124Sdim _MM_FROUND_CUR_DIRECTION); 3912309124Sdim} 3913309124Sdim 3914341825Sdim#define _mm512_cvt_roundps_epi32(A, R) \ 3915309124Sdim (__m512i)__builtin_ia32_cvtps2dq512_mask((__v16sf)(__m512)(A), \ 3916288943Sdim (__v16si)_mm512_setzero_si512(), \ 3917341825Sdim (__mmask16)-1, (int)(R)) 3918288943Sdim 3919341825Sdim#define _mm512_mask_cvt_roundps_epi32(W, U, A, R) \ 3920309124Sdim (__m512i)__builtin_ia32_cvtps2dq512_mask((__v16sf)(__m512)(A), \ 3921309124Sdim (__v16si)(__m512i)(W), \ 3922341825Sdim (__mmask16)(U), (int)(R)) 3923309124Sdim 3924341825Sdim#define _mm512_maskz_cvt_roundps_epi32(U, A, R) \ 3925309124Sdim (__m512i)__builtin_ia32_cvtps2dq512_mask((__v16sf)(__m512)(A), \ 3926309124Sdim (__v16si)_mm512_setzero_si512(), \ 3927341825Sdim (__mmask16)(U), (int)(R)) 3928309124Sdim 3929341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 3930309124Sdim_mm512_cvtps_epi32 (__m512 __A) 3931309124Sdim{ 3932309124Sdim return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A, 3933309124Sdim (__v16si) _mm512_undefined_epi32 (), 3934309124Sdim (__mmask16) -1, 3935309124Sdim _MM_FROUND_CUR_DIRECTION); 3936309124Sdim} 3937309124Sdim 3938341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 3939309124Sdim_mm512_mask_cvtps_epi32 (__m512i __W, __mmask16 __U, __m512 __A) 3940309124Sdim{ 3941309124Sdim return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A, 3942309124Sdim (__v16si) __W, 3943309124Sdim (__mmask16) __U, 3944309124Sdim _MM_FROUND_CUR_DIRECTION); 3945309124Sdim} 3946309124Sdim 3947341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 3948309124Sdim_mm512_maskz_cvtps_epi32 (__mmask16 __U, __m512 __A) 3949309124Sdim{ 3950309124Sdim return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A, 3951309124Sdim (__v16si) 3952309124Sdim _mm512_setzero_si512 (), 3953309124Sdim (__mmask16) __U, 3954309124Sdim _MM_FROUND_CUR_DIRECTION); 3955309124Sdim} 3956309124Sdim 3957341825Sdim#define _mm512_cvt_roundpd_epi32(A, R) \ 3958309124Sdim (__m256i)__builtin_ia32_cvtpd2dq512_mask((__v8df)(__m512d)(A), \ 3959288943Sdim (__v8si)_mm256_setzero_si256(), \ 3960341825Sdim (__mmask8)-1, (int)(R)) 3961288943Sdim 3962341825Sdim#define _mm512_mask_cvt_roundpd_epi32(W, U, A, R) \ 3963309124Sdim (__m256i)__builtin_ia32_cvtpd2dq512_mask((__v8df)(__m512d)(A), \ 3964309124Sdim (__v8si)(__m256i)(W), \ 3965341825Sdim (__mmask8)(U), (int)(R)) 3966309124Sdim 3967341825Sdim#define _mm512_maskz_cvt_roundpd_epi32(U, A, R) \ 3968309124Sdim (__m256i)__builtin_ia32_cvtpd2dq512_mask((__v8df)(__m512d)(A), \ 3969309124Sdim (__v8si)_mm256_setzero_si256(), \ 3970341825Sdim (__mmask8)(U), (int)(R)) 3971309124Sdim 3972341825Sdimstatic __inline__ __m256i __DEFAULT_FN_ATTRS512 3973309124Sdim_mm512_cvtpd_epi32 (__m512d __A) 3974309124Sdim{ 3975309124Sdim return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A, 3976309124Sdim (__v8si) 3977309124Sdim _mm256_undefined_si256 (), 3978309124Sdim (__mmask8) -1, 3979309124Sdim _MM_FROUND_CUR_DIRECTION); 3980309124Sdim} 3981309124Sdim 3982341825Sdimstatic __inline__ __m256i __DEFAULT_FN_ATTRS512 3983309124Sdim_mm512_mask_cvtpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A) 3984309124Sdim{ 3985309124Sdim return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A, 3986309124Sdim (__v8si) __W, 3987309124Sdim (__mmask8) __U, 3988309124Sdim _MM_FROUND_CUR_DIRECTION); 3989309124Sdim} 3990309124Sdim 3991341825Sdimstatic __inline__ __m256i __DEFAULT_FN_ATTRS512 3992309124Sdim_mm512_maskz_cvtpd_epi32 (__mmask8 __U, __m512d __A) 3993309124Sdim{ 3994309124Sdim return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A, 3995309124Sdim (__v8si) 3996309124Sdim _mm256_setzero_si256 (), 3997309124Sdim (__mmask8) __U, 3998309124Sdim _MM_FROUND_CUR_DIRECTION); 3999309124Sdim} 4000309124Sdim 4001341825Sdim#define _mm512_cvt_roundps_epu32(A, R) \ 4002309124Sdim (__m512i)__builtin_ia32_cvtps2udq512_mask((__v16sf)(__m512)(A), \ 4003288943Sdim (__v16si)_mm512_setzero_si512(), \ 4004341825Sdim (__mmask16)-1, (int)(R)) 4005288943Sdim 4006341825Sdim#define _mm512_mask_cvt_roundps_epu32(W, U, A, R) \ 4007309124Sdim (__m512i)__builtin_ia32_cvtps2udq512_mask((__v16sf)(__m512)(A), \ 4008309124Sdim (__v16si)(__m512i)(W), \ 4009341825Sdim (__mmask16)(U), (int)(R)) 4010309124Sdim 4011341825Sdim#define _mm512_maskz_cvt_roundps_epu32(U, A, R) \ 4012309124Sdim (__m512i)__builtin_ia32_cvtps2udq512_mask((__v16sf)(__m512)(A), \ 4013309124Sdim (__v16si)_mm512_setzero_si512(), \ 4014341825Sdim (__mmask16)(U), (int)(R)) 4015309124Sdim 4016341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 4017309124Sdim_mm512_cvtps_epu32 ( __m512 __A) 4018309124Sdim{ 4019309124Sdim return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,\ 4020309124Sdim (__v16si)\ 4021341825Sdim _mm512_undefined_epi32 (), 4022309124Sdim (__mmask16) -1,\ 4023341825Sdim _MM_FROUND_CUR_DIRECTION); 4024309124Sdim} 4025309124Sdim 4026341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 4027309124Sdim_mm512_mask_cvtps_epu32 (__m512i __W, __mmask16 __U, __m512 __A) 4028309124Sdim{ 4029309124Sdim return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A, 4030309124Sdim (__v16si) __W, 4031309124Sdim (__mmask16) __U, 4032309124Sdim _MM_FROUND_CUR_DIRECTION); 4033309124Sdim} 4034309124Sdim 4035341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 4036309124Sdim_mm512_maskz_cvtps_epu32 ( __mmask16 __U, __m512 __A) 4037309124Sdim{ 4038309124Sdim return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A, 4039322320Sdim (__v16si) 4040309124Sdim _mm512_setzero_si512 (), 4041309124Sdim (__mmask16) __U , 4042309124Sdim _MM_FROUND_CUR_DIRECTION); 4043309124Sdim} 4044309124Sdim 4045341825Sdim#define _mm512_cvt_roundpd_epu32(A, R) \ 4046309124Sdim (__m256i)__builtin_ia32_cvtpd2udq512_mask((__v8df)(__m512d)(A), \ 4047288943Sdim (__v8si)_mm256_setzero_si256(), \ 4048341825Sdim (__mmask8)-1, (int)(R)) 4049288943Sdim 4050341825Sdim#define _mm512_mask_cvt_roundpd_epu32(W, U, A, R) \ 4051309124Sdim (__m256i)__builtin_ia32_cvtpd2udq512_mask((__v8df)(__m512d)(A), \ 4052341825Sdim (__v8si)(__m256i)(W), \ 4053341825Sdim (__mmask8)(U), (int)(R)) 4054309124Sdim 4055341825Sdim#define _mm512_maskz_cvt_roundpd_epu32(U, A, R) \ 4056309124Sdim (__m256i)__builtin_ia32_cvtpd2udq512_mask((__v8df)(__m512d)(A), \ 4057309124Sdim (__v8si)_mm256_setzero_si256(), \ 4058341825Sdim (__mmask8)(U), (int)(R)) 4059309124Sdim 4060341825Sdimstatic __inline__ __m256i __DEFAULT_FN_ATTRS512 4061309124Sdim_mm512_cvtpd_epu32 (__m512d __A) 4062309124Sdim{ 4063309124Sdim return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A, 4064309124Sdim (__v8si) 4065309124Sdim _mm256_undefined_si256 (), 4066309124Sdim (__mmask8) -1, 4067309124Sdim _MM_FROUND_CUR_DIRECTION); 4068309124Sdim} 4069309124Sdim 4070341825Sdimstatic __inline__ __m256i __DEFAULT_FN_ATTRS512 4071309124Sdim_mm512_mask_cvtpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A) 4072309124Sdim{ 4073309124Sdim return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A, 4074309124Sdim (__v8si) __W, 4075309124Sdim (__mmask8) __U, 4076309124Sdim _MM_FROUND_CUR_DIRECTION); 4077309124Sdim} 4078309124Sdim 4079341825Sdimstatic __inline__ __m256i __DEFAULT_FN_ATTRS512 4080309124Sdim_mm512_maskz_cvtpd_epu32 (__mmask8 __U, __m512d __A) 4081309124Sdim{ 4082309124Sdim return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A, 4083309124Sdim (__v8si) 4084309124Sdim _mm256_setzero_si256 (), 4085309124Sdim (__mmask8) __U, 4086309124Sdim _MM_FROUND_CUR_DIRECTION); 4087309124Sdim} 4088309124Sdim 4089341825Sdimstatic __inline__ double __DEFAULT_FN_ATTRS512 4090321369Sdim_mm512_cvtsd_f64(__m512d __a) 4091321369Sdim{ 4092321369Sdim return __a[0]; 4093321369Sdim} 4094321369Sdim 4095341825Sdimstatic __inline__ float __DEFAULT_FN_ATTRS512 4096321369Sdim_mm512_cvtss_f32(__m512 __a) 4097321369Sdim{ 4098321369Sdim return __a[0]; 4099321369Sdim} 4100321369Sdim 4101277325Sdim/* Unpack and Interleave */ 4102309124Sdim 4103341825Sdimstatic __inline __m512d __DEFAULT_FN_ATTRS512 4104277325Sdim_mm512_unpackhi_pd(__m512d __a, __m512d __b) 4105277325Sdim{ 4106309124Sdim return (__m512d)__builtin_shufflevector((__v8df)__a, (__v8df)__b, 4107309124Sdim 1, 9, 1+2, 9+2, 1+4, 9+4, 1+6, 9+6); 4108277325Sdim} 4109277325Sdim 4110341825Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS512 4111309124Sdim_mm512_mask_unpackhi_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) 4112309124Sdim{ 4113309124Sdim return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U, 4114309124Sdim (__v8df)_mm512_unpackhi_pd(__A, __B), 4115309124Sdim (__v8df)__W); 4116309124Sdim} 4117309124Sdim 4118341825Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS512 4119309124Sdim_mm512_maskz_unpackhi_pd(__mmask8 __U, __m512d __A, __m512d __B) 4120309124Sdim{ 4121309124Sdim return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U, 4122309124Sdim (__v8df)_mm512_unpackhi_pd(__A, __B), 4123309124Sdim (__v8df)_mm512_setzero_pd()); 4124309124Sdim} 4125309124Sdim 4126341825Sdimstatic __inline __m512d __DEFAULT_FN_ATTRS512 4127277325Sdim_mm512_unpacklo_pd(__m512d __a, __m512d __b) 4128277325Sdim{ 4129309124Sdim return (__m512d)__builtin_shufflevector((__v8df)__a, (__v8df)__b, 4130309124Sdim 0, 8, 0+2, 8+2, 0+4, 8+4, 0+6, 8+6); 4131277325Sdim} 4132277325Sdim 4133341825Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS512 4134309124Sdim_mm512_mask_unpacklo_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) 4135309124Sdim{ 4136309124Sdim return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U, 4137309124Sdim (__v8df)_mm512_unpacklo_pd(__A, __B), 4138309124Sdim (__v8df)__W); 4139309124Sdim} 4140309124Sdim 4141341825Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS512 4142309124Sdim_mm512_maskz_unpacklo_pd (__mmask8 __U, __m512d __A, __m512d __B) 4143309124Sdim{ 4144309124Sdim return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U, 4145309124Sdim (__v8df)_mm512_unpacklo_pd(__A, __B), 4146309124Sdim (__v8df)_mm512_setzero_pd()); 4147309124Sdim} 4148309124Sdim 4149341825Sdimstatic __inline __m512 __DEFAULT_FN_ATTRS512 4150277325Sdim_mm512_unpackhi_ps(__m512 __a, __m512 __b) 4151277325Sdim{ 4152309124Sdim return (__m512)__builtin_shufflevector((__v16sf)__a, (__v16sf)__b, 4153309124Sdim 2, 18, 3, 19, 4154309124Sdim 2+4, 18+4, 3+4, 19+4, 4155309124Sdim 2+8, 18+8, 3+8, 19+8, 4156309124Sdim 2+12, 18+12, 3+12, 19+12); 4157277325Sdim} 4158277325Sdim 4159341825Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS512 4160309124Sdim_mm512_mask_unpackhi_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) 4161309124Sdim{ 4162309124Sdim return (__m512)__builtin_ia32_selectps_512((__mmask16) __U, 4163309124Sdim (__v16sf)_mm512_unpackhi_ps(__A, __B), 4164309124Sdim (__v16sf)__W); 4165309124Sdim} 4166309124Sdim 4167341825Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS512 4168309124Sdim_mm512_maskz_unpackhi_ps (__mmask16 __U, __m512 __A, __m512 __B) 4169309124Sdim{ 4170309124Sdim return (__m512)__builtin_ia32_selectps_512((__mmask16) __U, 4171309124Sdim (__v16sf)_mm512_unpackhi_ps(__A, __B), 4172309124Sdim (__v16sf)_mm512_setzero_ps()); 4173309124Sdim} 4174309124Sdim 4175341825Sdimstatic __inline __m512 __DEFAULT_FN_ATTRS512 4176277325Sdim_mm512_unpacklo_ps(__m512 __a, __m512 __b) 4177277325Sdim{ 4178309124Sdim return (__m512)__builtin_shufflevector((__v16sf)__a, (__v16sf)__b, 4179309124Sdim 0, 16, 1, 17, 4180309124Sdim 0+4, 16+4, 1+4, 17+4, 4181309124Sdim 0+8, 16+8, 1+8, 17+8, 4182309124Sdim 0+12, 16+12, 1+12, 17+12); 4183277325Sdim} 4184277325Sdim 4185341825Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS512 4186309124Sdim_mm512_mask_unpacklo_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) 4187309124Sdim{ 4188309124Sdim return (__m512)__builtin_ia32_selectps_512((__mmask16) __U, 4189309124Sdim (__v16sf)_mm512_unpacklo_ps(__A, __B), 4190309124Sdim (__v16sf)__W); 4191309124Sdim} 4192309124Sdim 4193341825Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS512 4194309124Sdim_mm512_maskz_unpacklo_ps (__mmask16 __U, __m512 __A, __m512 __B) 4195309124Sdim{ 4196309124Sdim return (__m512)__builtin_ia32_selectps_512((__mmask16) __U, 4197309124Sdim (__v16sf)_mm512_unpacklo_ps(__A, __B), 4198309124Sdim (__v16sf)_mm512_setzero_ps()); 4199309124Sdim} 4200309124Sdim 4201341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 4202309124Sdim_mm512_unpackhi_epi32(__m512i __A, __m512i __B) 4203309124Sdim{ 4204309124Sdim return (__m512i)__builtin_shufflevector((__v16si)__A, (__v16si)__B, 4205309124Sdim 2, 18, 3, 19, 4206309124Sdim 2+4, 18+4, 3+4, 19+4, 4207309124Sdim 2+8, 18+8, 3+8, 19+8, 4208309124Sdim 2+12, 18+12, 3+12, 19+12); 4209309124Sdim} 4210309124Sdim 4211341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 4212309124Sdim_mm512_mask_unpackhi_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B) 4213309124Sdim{ 4214309124Sdim return (__m512i)__builtin_ia32_selectd_512((__mmask16) __U, 4215309124Sdim (__v16si)_mm512_unpackhi_epi32(__A, __B), 4216309124Sdim (__v16si)__W); 4217309124Sdim} 4218309124Sdim 4219341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 4220309124Sdim_mm512_maskz_unpackhi_epi32(__mmask16 __U, __m512i __A, __m512i __B) 4221309124Sdim{ 4222309124Sdim return (__m512i)__builtin_ia32_selectd_512((__mmask16) __U, 4223309124Sdim (__v16si)_mm512_unpackhi_epi32(__A, __B), 4224309124Sdim (__v16si)_mm512_setzero_si512()); 4225309124Sdim} 4226309124Sdim 4227341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 4228309124Sdim_mm512_unpacklo_epi32(__m512i __A, __m512i __B) 4229309124Sdim{ 4230309124Sdim return (__m512i)__builtin_shufflevector((__v16si)__A, (__v16si)__B, 4231309124Sdim 0, 16, 1, 17, 4232309124Sdim 0+4, 16+4, 1+4, 17+4, 4233309124Sdim 0+8, 16+8, 1+8, 17+8, 4234309124Sdim 0+12, 16+12, 1+12, 17+12); 4235309124Sdim} 4236309124Sdim 4237341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 4238309124Sdim_mm512_mask_unpacklo_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B) 4239309124Sdim{ 4240309124Sdim return (__m512i)__builtin_ia32_selectd_512((__mmask16) __U, 4241309124Sdim (__v16si)_mm512_unpacklo_epi32(__A, __B), 4242309124Sdim (__v16si)__W); 4243309124Sdim} 4244309124Sdim 4245341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 4246309124Sdim_mm512_maskz_unpacklo_epi32(__mmask16 __U, __m512i __A, __m512i __B) 4247309124Sdim{ 4248309124Sdim return (__m512i)__builtin_ia32_selectd_512((__mmask16) __U, 4249309124Sdim (__v16si)_mm512_unpacklo_epi32(__A, __B), 4250309124Sdim (__v16si)_mm512_setzero_si512()); 4251309124Sdim} 4252309124Sdim 4253341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 4254309124Sdim_mm512_unpackhi_epi64(__m512i __A, __m512i __B) 4255309124Sdim{ 4256309124Sdim return (__m512i)__builtin_shufflevector((__v8di)__A, (__v8di)__B, 4257309124Sdim 1, 9, 1+2, 9+2, 1+4, 9+4, 1+6, 9+6); 4258309124Sdim} 4259309124Sdim 4260341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 4261309124Sdim_mm512_mask_unpackhi_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) 4262309124Sdim{ 4263309124Sdim return (__m512i)__builtin_ia32_selectq_512((__mmask8) __U, 4264309124Sdim (__v8di)_mm512_unpackhi_epi64(__A, __B), 4265309124Sdim (__v8di)__W); 4266309124Sdim} 4267309124Sdim 4268341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 4269309124Sdim_mm512_maskz_unpackhi_epi64(__mmask8 __U, __m512i __A, __m512i __B) 4270309124Sdim{ 4271309124Sdim return (__m512i)__builtin_ia32_selectq_512((__mmask8) __U, 4272309124Sdim (__v8di)_mm512_unpackhi_epi64(__A, __B), 4273309124Sdim (__v8di)_mm512_setzero_si512()); 4274309124Sdim} 4275309124Sdim 4276341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 4277309124Sdim_mm512_unpacklo_epi64 (__m512i __A, __m512i __B) 4278309124Sdim{ 4279309124Sdim return (__m512i)__builtin_shufflevector((__v8di)__A, (__v8di)__B, 4280309124Sdim 0, 8, 0+2, 8+2, 0+4, 8+4, 0+6, 8+6); 4281309124Sdim} 4282309124Sdim 4283341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 4284309124Sdim_mm512_mask_unpacklo_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) 4285309124Sdim{ 4286309124Sdim return (__m512i)__builtin_ia32_selectq_512((__mmask8) __U, 4287309124Sdim (__v8di)_mm512_unpacklo_epi64(__A, __B), 4288309124Sdim (__v8di)__W); 4289309124Sdim} 4290309124Sdim 4291341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 4292309124Sdim_mm512_maskz_unpacklo_epi64 (__mmask8 __U, __m512i __A, __m512i __B) 4293309124Sdim{ 4294309124Sdim return (__m512i)__builtin_ia32_selectq_512((__mmask8) __U, 4295309124Sdim (__v8di)_mm512_unpacklo_epi64(__A, __B), 4296309124Sdim (__v8di)_mm512_setzero_si512()); 4297309124Sdim} 4298309124Sdim 4299277325Sdim 4300277325Sdim/* SIMD load ops */ 4301277325Sdim 4302341825Sdimstatic __inline __m512i __DEFAULT_FN_ATTRS512 4303309124Sdim_mm512_loadu_si512 (void const *__P) 4304309124Sdim{ 4305341825Sdim struct __loadu_si512 { 4306353358Sdim __m512i_u __v; 4307341825Sdim } __attribute__((__packed__, __may_alias__)); 4308360784Sdim return ((const struct __loadu_si512*)__P)->__v; 4309309124Sdim} 4310309124Sdim 4311341825Sdimstatic __inline __m512i __DEFAULT_FN_ATTRS512 4312344779Sdim_mm512_loadu_epi32 (void const *__P) 4313344779Sdim{ 4314344779Sdim struct __loadu_epi32 { 4315353358Sdim __m512i_u __v; 4316344779Sdim } __attribute__((__packed__, __may_alias__)); 4317360784Sdim return ((const struct __loadu_epi32*)__P)->__v; 4318344779Sdim} 4319344779Sdim 4320344779Sdimstatic __inline __m512i __DEFAULT_FN_ATTRS512 4321309124Sdim_mm512_mask_loadu_epi32 (__m512i __W, __mmask16 __U, void const *__P) 4322309124Sdim{ 4323309124Sdim return (__m512i) __builtin_ia32_loaddqusi512_mask ((const int *) __P, 4324309124Sdim (__v16si) __W, 4325309124Sdim (__mmask16) __U); 4326309124Sdim} 4327309124Sdim 4328309124Sdim 4329341825Sdimstatic __inline __m512i __DEFAULT_FN_ATTRS512 4330277325Sdim_mm512_maskz_loadu_epi32(__mmask16 __U, void const *__P) 4331277325Sdim{ 4332309124Sdim return (__m512i) __builtin_ia32_loaddqusi512_mask ((const int *)__P, 4333277325Sdim (__v16si) 4334277325Sdim _mm512_setzero_si512 (), 4335277325Sdim (__mmask16) __U); 4336277325Sdim} 4337277325Sdim 4338341825Sdimstatic __inline __m512i __DEFAULT_FN_ATTRS512 4339344779Sdim_mm512_loadu_epi64 (void const *__P) 4340344779Sdim{ 4341344779Sdim struct __loadu_epi64 { 4342353358Sdim __m512i_u __v; 4343344779Sdim } __attribute__((__packed__, __may_alias__)); 4344360784Sdim return ((const struct __loadu_epi64*)__P)->__v; 4345344779Sdim} 4346344779Sdim 4347344779Sdimstatic __inline __m512i __DEFAULT_FN_ATTRS512 4348309124Sdim_mm512_mask_loadu_epi64 (__m512i __W, __mmask8 __U, void const *__P) 4349309124Sdim{ 4350309124Sdim return (__m512i) __builtin_ia32_loaddqudi512_mask ((const long long *) __P, 4351309124Sdim (__v8di) __W, 4352309124Sdim (__mmask8) __U); 4353309124Sdim} 4354309124Sdim 4355341825Sdimstatic __inline __m512i __DEFAULT_FN_ATTRS512 4356277325Sdim_mm512_maskz_loadu_epi64(__mmask8 __U, void const *__P) 4357277325Sdim{ 4358309124Sdim return (__m512i) __builtin_ia32_loaddqudi512_mask ((const long long *)__P, 4359277325Sdim (__v8di) 4360277325Sdim _mm512_setzero_si512 (), 4361277325Sdim (__mmask8) __U); 4362277325Sdim} 4363277325Sdim 4364341825Sdimstatic __inline __m512 __DEFAULT_FN_ATTRS512 4365309124Sdim_mm512_mask_loadu_ps (__m512 __W, __mmask16 __U, void const *__P) 4366309124Sdim{ 4367309124Sdim return (__m512) __builtin_ia32_loadups512_mask ((const float *) __P, 4368309124Sdim (__v16sf) __W, 4369309124Sdim (__mmask16) __U); 4370309124Sdim} 4371309124Sdim 4372341825Sdimstatic __inline __m512 __DEFAULT_FN_ATTRS512 4373277325Sdim_mm512_maskz_loadu_ps(__mmask16 __U, void const *__P) 4374277325Sdim{ 4375309124Sdim return (__m512) __builtin_ia32_loadups512_mask ((const float *)__P, 4376277325Sdim (__v16sf) 4377277325Sdim _mm512_setzero_ps (), 4378277325Sdim (__mmask16) __U); 4379277325Sdim} 4380277325Sdim 4381341825Sdimstatic __inline __m512d __DEFAULT_FN_ATTRS512 4382309124Sdim_mm512_mask_loadu_pd (__m512d __W, __mmask8 __U, void const *__P) 4383277325Sdim{ 4384309124Sdim return (__m512d) __builtin_ia32_loadupd512_mask ((const double *) __P, 4385309124Sdim (__v8df) __W, 4386309124Sdim (__mmask8) __U); 4387277325Sdim} 4388277325Sdim 4389341825Sdimstatic __inline __m512d __DEFAULT_FN_ATTRS512 4390309124Sdim_mm512_maskz_loadu_pd(__mmask8 __U, void const *__P) 4391288943Sdim{ 4392309124Sdim return (__m512d) __builtin_ia32_loadupd512_mask ((const double *)__P, 4393288943Sdim (__v8df) 4394288943Sdim _mm512_setzero_pd (), 4395288943Sdim (__mmask8) __U); 4396288943Sdim} 4397288943Sdim 4398341825Sdimstatic __inline __m512d __DEFAULT_FN_ATTRS512 4399321369Sdim_mm512_loadu_pd(void const *__p) 4400277325Sdim{ 4401277325Sdim struct __loadu_pd { 4402353358Sdim __m512d_u __v; 4403288943Sdim } __attribute__((__packed__, __may_alias__)); 4404360784Sdim return ((const struct __loadu_pd*)__p)->__v; 4405277325Sdim} 4406277325Sdim 4407341825Sdimstatic __inline __m512 __DEFAULT_FN_ATTRS512 4408321369Sdim_mm512_loadu_ps(void const *__p) 4409277325Sdim{ 4410277325Sdim struct __loadu_ps { 4411353358Sdim __m512_u __v; 4412288943Sdim } __attribute__((__packed__, __may_alias__)); 4413360784Sdim return ((const struct __loadu_ps*)__p)->__v; 4414277325Sdim} 4415277325Sdim 4416341825Sdimstatic __inline __m512 __DEFAULT_FN_ATTRS512 4417321369Sdim_mm512_load_ps(void const *__p) 4418288943Sdim{ 4419360784Sdim return *(const __m512*)__p; 4420288943Sdim} 4421288943Sdim 4422341825Sdimstatic __inline __m512 __DEFAULT_FN_ATTRS512 4423309124Sdim_mm512_mask_load_ps (__m512 __W, __mmask16 __U, void const *__P) 4424309124Sdim{ 4425309124Sdim return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *) __P, 4426309124Sdim (__v16sf) __W, 4427309124Sdim (__mmask16) __U); 4428309124Sdim} 4429309124Sdim 4430341825Sdimstatic __inline __m512 __DEFAULT_FN_ATTRS512 4431309124Sdim_mm512_maskz_load_ps(__mmask16 __U, void const *__P) 4432309124Sdim{ 4433309124Sdim return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *)__P, 4434309124Sdim (__v16sf) 4435309124Sdim _mm512_setzero_ps (), 4436309124Sdim (__mmask16) __U); 4437309124Sdim} 4438309124Sdim 4439341825Sdimstatic __inline __m512d __DEFAULT_FN_ATTRS512 4440321369Sdim_mm512_load_pd(void const *__p) 4441288943Sdim{ 4442360784Sdim return *(const __m512d*)__p; 4443288943Sdim} 4444288943Sdim 4445341825Sdimstatic __inline __m512d __DEFAULT_FN_ATTRS512 4446309124Sdim_mm512_mask_load_pd (__m512d __W, __mmask8 __U, void const *__P) 4447309124Sdim{ 4448309124Sdim return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df *) __P, 4449309124Sdim (__v8df) __W, 4450309124Sdim (__mmask8) __U); 4451309124Sdim} 4452309124Sdim 4453341825Sdimstatic __inline __m512d __DEFAULT_FN_ATTRS512 4454309124Sdim_mm512_maskz_load_pd(__mmask8 __U, void const *__P) 4455309124Sdim{ 4456309124Sdim return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df *)__P, 4457309124Sdim (__v8df) 4458309124Sdim _mm512_setzero_pd (), 4459309124Sdim (__mmask8) __U); 4460309124Sdim} 4461309124Sdim 4462341825Sdimstatic __inline __m512i __DEFAULT_FN_ATTRS512 4463309124Sdim_mm512_load_si512 (void const *__P) 4464309124Sdim{ 4465360784Sdim return *(const __m512i *) __P; 4466309124Sdim} 4467309124Sdim 4468341825Sdimstatic __inline __m512i __DEFAULT_FN_ATTRS512 4469309124Sdim_mm512_load_epi32 (void const *__P) 4470309124Sdim{ 4471360784Sdim return *(const __m512i *) __P; 4472309124Sdim} 4473309124Sdim 4474341825Sdimstatic __inline __m512i __DEFAULT_FN_ATTRS512 4475309124Sdim_mm512_load_epi64 (void const *__P) 4476309124Sdim{ 4477360784Sdim return *(const __m512i *) __P; 4478309124Sdim} 4479309124Sdim 4480277325Sdim/* SIMD store ops */ 4481277325Sdim 4482341825Sdimstatic __inline void __DEFAULT_FN_ATTRS512 4483344779Sdim_mm512_storeu_epi64 (void *__P, __m512i __A) 4484344779Sdim{ 4485344779Sdim struct __storeu_epi64 { 4486353358Sdim __m512i_u __v; 4487344779Sdim } __attribute__((__packed__, __may_alias__)); 4488344779Sdim ((struct __storeu_epi64*)__P)->__v = __A; 4489344779Sdim} 4490344779Sdim 4491344779Sdimstatic __inline void __DEFAULT_FN_ATTRS512 4492277325Sdim_mm512_mask_storeu_epi64(void *__P, __mmask8 __U, __m512i __A) 4493277325Sdim{ 4494309124Sdim __builtin_ia32_storedqudi512_mask ((long long *)__P, (__v8di) __A, 4495277325Sdim (__mmask8) __U); 4496277325Sdim} 4497277325Sdim 4498341825Sdimstatic __inline void __DEFAULT_FN_ATTRS512 4499309124Sdim_mm512_storeu_si512 (void *__P, __m512i __A) 4500309124Sdim{ 4501341825Sdim struct __storeu_si512 { 4502353358Sdim __m512i_u __v; 4503341825Sdim } __attribute__((__packed__, __may_alias__)); 4504341825Sdim ((struct __storeu_si512*)__P)->__v = __A; 4505309124Sdim} 4506309124Sdim 4507341825Sdimstatic __inline void __DEFAULT_FN_ATTRS512 4508344779Sdim_mm512_storeu_epi32 (void *__P, __m512i __A) 4509344779Sdim{ 4510344779Sdim struct __storeu_epi32 { 4511353358Sdim __m512i_u __v; 4512344779Sdim } __attribute__((__packed__, __may_alias__)); 4513344779Sdim ((struct __storeu_epi32*)__P)->__v = __A; 4514344779Sdim} 4515344779Sdim 4516344779Sdimstatic __inline void __DEFAULT_FN_ATTRS512 4517277325Sdim_mm512_mask_storeu_epi32(void *__P, __mmask16 __U, __m512i __A) 4518277325Sdim{ 4519309124Sdim __builtin_ia32_storedqusi512_mask ((int *)__P, (__v16si) __A, 4520277325Sdim (__mmask16) __U); 4521277325Sdim} 4522277325Sdim 4523341825Sdimstatic __inline void __DEFAULT_FN_ATTRS512 4524277325Sdim_mm512_mask_storeu_pd(void *__P, __mmask8 __U, __m512d __A) 4525277325Sdim{ 4526309124Sdim __builtin_ia32_storeupd512_mask ((double *)__P, (__v8df) __A, (__mmask8) __U); 4527277325Sdim} 4528277325Sdim 4529341825Sdimstatic __inline void __DEFAULT_FN_ATTRS512 4530277325Sdim_mm512_storeu_pd(void *__P, __m512d __A) 4531277325Sdim{ 4532341825Sdim struct __storeu_pd { 4533353358Sdim __m512d_u __v; 4534341825Sdim } __attribute__((__packed__, __may_alias__)); 4535341825Sdim ((struct __storeu_pd*)__P)->__v = __A; 4536277325Sdim} 4537277325Sdim 4538341825Sdimstatic __inline void __DEFAULT_FN_ATTRS512 4539277325Sdim_mm512_mask_storeu_ps(void *__P, __mmask16 __U, __m512 __A) 4540277325Sdim{ 4541309124Sdim __builtin_ia32_storeups512_mask ((float *)__P, (__v16sf) __A, 4542277325Sdim (__mmask16) __U); 4543277325Sdim} 4544277325Sdim 4545341825Sdimstatic __inline void __DEFAULT_FN_ATTRS512 4546277325Sdim_mm512_storeu_ps(void *__P, __m512 __A) 4547277325Sdim{ 4548341825Sdim struct __storeu_ps { 4549353358Sdim __m512_u __v; 4550341825Sdim } __attribute__((__packed__, __may_alias__)); 4551341825Sdim ((struct __storeu_ps*)__P)->__v = __A; 4552277325Sdim} 4553277325Sdim 4554341825Sdimstatic __inline void __DEFAULT_FN_ATTRS512 4555288943Sdim_mm512_mask_store_pd(void *__P, __mmask8 __U, __m512d __A) 4556277325Sdim{ 4557288943Sdim __builtin_ia32_storeapd512_mask ((__v8df *)__P, (__v8df) __A, (__mmask8) __U); 4558277325Sdim} 4559277325Sdim 4560341825Sdimstatic __inline void __DEFAULT_FN_ATTRS512 4561277325Sdim_mm512_store_pd(void *__P, __m512d __A) 4562277325Sdim{ 4563277325Sdim *(__m512d*)__P = __A; 4564277325Sdim} 4565277325Sdim 4566341825Sdimstatic __inline void __DEFAULT_FN_ATTRS512 4567288943Sdim_mm512_mask_store_ps(void *__P, __mmask16 __U, __m512 __A) 4568288943Sdim{ 4569288943Sdim __builtin_ia32_storeaps512_mask ((__v16sf *)__P, (__v16sf) __A, 4570288943Sdim (__mmask16) __U); 4571288943Sdim} 4572288943Sdim 4573341825Sdimstatic __inline void __DEFAULT_FN_ATTRS512 4574288943Sdim_mm512_store_ps(void *__P, __m512 __A) 4575288943Sdim{ 4576288943Sdim *(__m512*)__P = __A; 4577288943Sdim} 4578288943Sdim 4579341825Sdimstatic __inline void __DEFAULT_FN_ATTRS512 4580309124Sdim_mm512_store_si512 (void *__P, __m512i __A) 4581309124Sdim{ 4582309124Sdim *(__m512i *) __P = __A; 4583309124Sdim} 4584309124Sdim 4585341825Sdimstatic __inline void __DEFAULT_FN_ATTRS512 4586309124Sdim_mm512_store_epi32 (void *__P, __m512i __A) 4587309124Sdim{ 4588309124Sdim *(__m512i *) __P = __A; 4589309124Sdim} 4590309124Sdim 4591341825Sdimstatic __inline void __DEFAULT_FN_ATTRS512 4592309124Sdim_mm512_store_epi64 (void *__P, __m512i __A) 4593309124Sdim{ 4594309124Sdim *(__m512i *) __P = __A; 4595309124Sdim} 4596309124Sdim 4597277325Sdim/* Mask ops */ 4598277325Sdim 4599344779Sdimstatic __inline __mmask16 __DEFAULT_FN_ATTRS 4600277325Sdim_mm512_knot(__mmask16 __M) 4601277325Sdim{ 4602277325Sdim return __builtin_ia32_knothi(__M); 4603277325Sdim} 4604277325Sdim 4605277325Sdim/* Integer compare */ 4606277325Sdim 4607327952Sdim#define _mm512_cmpeq_epi32_mask(A, B) \ 4608327952Sdim _mm512_cmp_epi32_mask((A), (B), _MM_CMPINT_EQ) 4609327952Sdim#define _mm512_mask_cmpeq_epi32_mask(k, A, B) \ 4610327952Sdim _mm512_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_EQ) 4611327952Sdim#define _mm512_cmpge_epi32_mask(A, B) \ 4612327952Sdim _mm512_cmp_epi32_mask((A), (B), _MM_CMPINT_GE) 4613327952Sdim#define _mm512_mask_cmpge_epi32_mask(k, A, B) \ 4614327952Sdim _mm512_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_GE) 4615327952Sdim#define _mm512_cmpgt_epi32_mask(A, B) \ 4616327952Sdim _mm512_cmp_epi32_mask((A), (B), _MM_CMPINT_GT) 4617327952Sdim#define _mm512_mask_cmpgt_epi32_mask(k, A, B) \ 4618327952Sdim _mm512_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_GT) 4619327952Sdim#define _mm512_cmple_epi32_mask(A, B) \ 4620327952Sdim _mm512_cmp_epi32_mask((A), (B), _MM_CMPINT_LE) 4621327952Sdim#define _mm512_mask_cmple_epi32_mask(k, A, B) \ 4622327952Sdim _mm512_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_LE) 4623327952Sdim#define _mm512_cmplt_epi32_mask(A, B) \ 4624327952Sdim _mm512_cmp_epi32_mask((A), (B), _MM_CMPINT_LT) 4625327952Sdim#define _mm512_mask_cmplt_epi32_mask(k, A, B) \ 4626327952Sdim _mm512_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_LT) 4627327952Sdim#define _mm512_cmpneq_epi32_mask(A, B) \ 4628327952Sdim _mm512_cmp_epi32_mask((A), (B), _MM_CMPINT_NE) 4629327952Sdim#define _mm512_mask_cmpneq_epi32_mask(k, A, B) \ 4630327952Sdim _mm512_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_NE) 4631277325Sdim 4632327952Sdim#define _mm512_cmpeq_epu32_mask(A, B) \ 4633327952Sdim _mm512_cmp_epu32_mask((A), (B), _MM_CMPINT_EQ) 4634327952Sdim#define _mm512_mask_cmpeq_epu32_mask(k, A, B) \ 4635327952Sdim _mm512_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_EQ) 4636327952Sdim#define _mm512_cmpge_epu32_mask(A, B) \ 4637327952Sdim _mm512_cmp_epu32_mask((A), (B), _MM_CMPINT_GE) 4638327952Sdim#define _mm512_mask_cmpge_epu32_mask(k, A, B) \ 4639327952Sdim _mm512_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_GE) 4640327952Sdim#define _mm512_cmpgt_epu32_mask(A, B) \ 4641327952Sdim _mm512_cmp_epu32_mask((A), (B), _MM_CMPINT_GT) 4642327952Sdim#define _mm512_mask_cmpgt_epu32_mask(k, A, B) \ 4643327952Sdim _mm512_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_GT) 4644327952Sdim#define _mm512_cmple_epu32_mask(A, B) \ 4645327952Sdim _mm512_cmp_epu32_mask((A), (B), _MM_CMPINT_LE) 4646327952Sdim#define _mm512_mask_cmple_epu32_mask(k, A, B) \ 4647327952Sdim _mm512_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_LE) 4648327952Sdim#define _mm512_cmplt_epu32_mask(A, B) \ 4649327952Sdim _mm512_cmp_epu32_mask((A), (B), _MM_CMPINT_LT) 4650327952Sdim#define _mm512_mask_cmplt_epu32_mask(k, A, B) \ 4651327952Sdim _mm512_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_LT) 4652327952Sdim#define _mm512_cmpneq_epu32_mask(A, B) \ 4653327952Sdim _mm512_cmp_epu32_mask((A), (B), _MM_CMPINT_NE) 4654327952Sdim#define _mm512_mask_cmpneq_epu32_mask(k, A, B) \ 4655327952Sdim _mm512_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_NE) 4656277325Sdim 4657327952Sdim#define _mm512_cmpeq_epi64_mask(A, B) \ 4658327952Sdim _mm512_cmp_epi64_mask((A), (B), _MM_CMPINT_EQ) 4659327952Sdim#define _mm512_mask_cmpeq_epi64_mask(k, A, B) \ 4660327952Sdim _mm512_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_EQ) 4661327952Sdim#define _mm512_cmpge_epi64_mask(A, B) \ 4662327952Sdim _mm512_cmp_epi64_mask((A), (B), _MM_CMPINT_GE) 4663327952Sdim#define _mm512_mask_cmpge_epi64_mask(k, A, B) \ 4664327952Sdim _mm512_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_GE) 4665327952Sdim#define _mm512_cmpgt_epi64_mask(A, B) \ 4666327952Sdim _mm512_cmp_epi64_mask((A), (B), _MM_CMPINT_GT) 4667327952Sdim#define _mm512_mask_cmpgt_epi64_mask(k, A, B) \ 4668327952Sdim _mm512_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_GT) 4669327952Sdim#define _mm512_cmple_epi64_mask(A, B) \ 4670327952Sdim _mm512_cmp_epi64_mask((A), (B), _MM_CMPINT_LE) 4671327952Sdim#define _mm512_mask_cmple_epi64_mask(k, A, B) \ 4672327952Sdim _mm512_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_LE) 4673327952Sdim#define _mm512_cmplt_epi64_mask(A, B) \ 4674327952Sdim _mm512_cmp_epi64_mask((A), (B), _MM_CMPINT_LT) 4675327952Sdim#define _mm512_mask_cmplt_epi64_mask(k, A, B) \ 4676327952Sdim _mm512_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_LT) 4677327952Sdim#define _mm512_cmpneq_epi64_mask(A, B) \ 4678327952Sdim _mm512_cmp_epi64_mask((A), (B), _MM_CMPINT_NE) 4679327952Sdim#define _mm512_mask_cmpneq_epi64_mask(k, A, B) \ 4680327952Sdim _mm512_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_NE) 4681288943Sdim 4682327952Sdim#define _mm512_cmpeq_epu64_mask(A, B) \ 4683327952Sdim _mm512_cmp_epu64_mask((A), (B), _MM_CMPINT_EQ) 4684327952Sdim#define _mm512_mask_cmpeq_epu64_mask(k, A, B) \ 4685327952Sdim _mm512_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_EQ) 4686327952Sdim#define _mm512_cmpge_epu64_mask(A, B) \ 4687327952Sdim _mm512_cmp_epu64_mask((A), (B), _MM_CMPINT_GE) 4688327952Sdim#define _mm512_mask_cmpge_epu64_mask(k, A, B) \ 4689327952Sdim _mm512_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_GE) 4690327952Sdim#define _mm512_cmpgt_epu64_mask(A, B) \ 4691327952Sdim _mm512_cmp_epu64_mask((A), (B), _MM_CMPINT_GT) 4692327952Sdim#define _mm512_mask_cmpgt_epu64_mask(k, A, B) \ 4693327952Sdim _mm512_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_GT) 4694327952Sdim#define _mm512_cmple_epu64_mask(A, B) \ 4695327952Sdim _mm512_cmp_epu64_mask((A), (B), _MM_CMPINT_LE) 4696327952Sdim#define _mm512_mask_cmple_epu64_mask(k, A, B) \ 4697327952Sdim _mm512_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_LE) 4698327952Sdim#define _mm512_cmplt_epu64_mask(A, B) \ 4699327952Sdim _mm512_cmp_epu64_mask((A), (B), _MM_CMPINT_LT) 4700327952Sdim#define _mm512_mask_cmplt_epu64_mask(k, A, B) \ 4701327952Sdim _mm512_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_LT) 4702327952Sdim#define _mm512_cmpneq_epu64_mask(A, B) \ 4703327952Sdim _mm512_cmp_epu64_mask((A), (B), _MM_CMPINT_NE) 4704327952Sdim#define _mm512_mask_cmpneq_epu64_mask(k, A, B) \ 4705327952Sdim _mm512_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_NE) 4706288943Sdim 4707341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 4708314564Sdim_mm512_cvtepi8_epi32(__m128i __A) 4709309124Sdim{ 4710314564Sdim /* This function always performs a signed extension, but __v16qi is a char 4711314564Sdim which may be signed or unsigned, so use __v16qs. */ 4712314564Sdim return (__m512i)__builtin_convertvector((__v16qs)__A, __v16si); 4713309124Sdim} 4714309124Sdim 4715341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 4716314564Sdim_mm512_mask_cvtepi8_epi32(__m512i __W, __mmask16 __U, __m128i __A) 4717309124Sdim{ 4718314564Sdim return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, 4719314564Sdim (__v16si)_mm512_cvtepi8_epi32(__A), 4720314564Sdim (__v16si)__W); 4721309124Sdim} 4722309124Sdim 4723341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 4724314564Sdim_mm512_maskz_cvtepi8_epi32(__mmask16 __U, __m128i __A) 4725309124Sdim{ 4726314564Sdim return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, 4727314564Sdim (__v16si)_mm512_cvtepi8_epi32(__A), 4728314564Sdim (__v16si)_mm512_setzero_si512()); 4729309124Sdim} 4730309124Sdim 4731341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 4732314564Sdim_mm512_cvtepi8_epi64(__m128i __A) 4733309124Sdim{ 4734314564Sdim /* This function always performs a signed extension, but __v16qi is a char 4735314564Sdim which may be signed or unsigned, so use __v16qs. */ 4736314564Sdim return (__m512i)__builtin_convertvector(__builtin_shufflevector((__v16qs)__A, (__v16qs)__A, 0, 1, 2, 3, 4, 5, 6, 7), __v8di); 4737309124Sdim} 4738309124Sdim 4739341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 4740314564Sdim_mm512_mask_cvtepi8_epi64(__m512i __W, __mmask8 __U, __m128i __A) 4741309124Sdim{ 4742314564Sdim return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, 4743314564Sdim (__v8di)_mm512_cvtepi8_epi64(__A), 4744314564Sdim (__v8di)__W); 4745309124Sdim} 4746309124Sdim 4747341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 4748314564Sdim_mm512_maskz_cvtepi8_epi64(__mmask8 __U, __m128i __A) 4749309124Sdim{ 4750314564Sdim return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, 4751314564Sdim (__v8di)_mm512_cvtepi8_epi64(__A), 4752314564Sdim (__v8di)_mm512_setzero_si512 ()); 4753309124Sdim} 4754309124Sdim 4755341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 4756314564Sdim_mm512_cvtepi32_epi64(__m256i __X) 4757309124Sdim{ 4758314564Sdim return (__m512i)__builtin_convertvector((__v8si)__X, __v8di); 4759309124Sdim} 4760309124Sdim 4761341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 4762314564Sdim_mm512_mask_cvtepi32_epi64(__m512i __W, __mmask8 __U, __m256i __X) 4763309124Sdim{ 4764314564Sdim return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, 4765314564Sdim (__v8di)_mm512_cvtepi32_epi64(__X), 4766314564Sdim (__v8di)__W); 4767309124Sdim} 4768309124Sdim 4769341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 4770314564Sdim_mm512_maskz_cvtepi32_epi64(__mmask8 __U, __m256i __X) 4771309124Sdim{ 4772314564Sdim return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, 4773314564Sdim (__v8di)_mm512_cvtepi32_epi64(__X), 4774314564Sdim (__v8di)_mm512_setzero_si512()); 4775309124Sdim} 4776309124Sdim 4777341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 4778314564Sdim_mm512_cvtepi16_epi32(__m256i __A) 4779309124Sdim{ 4780314564Sdim return (__m512i)__builtin_convertvector((__v16hi)__A, __v16si); 4781309124Sdim} 4782309124Sdim 4783341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 4784314564Sdim_mm512_mask_cvtepi16_epi32(__m512i __W, __mmask16 __U, __m256i __A) 4785309124Sdim{ 4786314564Sdim return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, 4787314564Sdim (__v16si)_mm512_cvtepi16_epi32(__A), 4788314564Sdim (__v16si)__W); 4789309124Sdim} 4790309124Sdim 4791341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 4792314564Sdim_mm512_maskz_cvtepi16_epi32(__mmask16 __U, __m256i __A) 4793309124Sdim{ 4794314564Sdim return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, 4795314564Sdim (__v16si)_mm512_cvtepi16_epi32(__A), 4796314564Sdim (__v16si)_mm512_setzero_si512 ()); 4797309124Sdim} 4798309124Sdim 4799341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 4800314564Sdim_mm512_cvtepi16_epi64(__m128i __A) 4801309124Sdim{ 4802314564Sdim return (__m512i)__builtin_convertvector((__v8hi)__A, __v8di); 4803309124Sdim} 4804309124Sdim 4805341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 4806314564Sdim_mm512_mask_cvtepi16_epi64(__m512i __W, __mmask8 __U, __m128i __A) 4807309124Sdim{ 4808314564Sdim return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, 4809314564Sdim (__v8di)_mm512_cvtepi16_epi64(__A), 4810314564Sdim (__v8di)__W); 4811309124Sdim} 4812309124Sdim 4813341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 4814314564Sdim_mm512_maskz_cvtepi16_epi64(__mmask8 __U, __m128i __A) 4815309124Sdim{ 4816314564Sdim return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, 4817314564Sdim (__v8di)_mm512_cvtepi16_epi64(__A), 4818314564Sdim (__v8di)_mm512_setzero_si512()); 4819309124Sdim} 4820309124Sdim 4821341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 4822314564Sdim_mm512_cvtepu8_epi32(__m128i __A) 4823309124Sdim{ 4824314564Sdim return (__m512i)__builtin_convertvector((__v16qu)__A, __v16si); 4825309124Sdim} 4826309124Sdim 4827341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 4828314564Sdim_mm512_mask_cvtepu8_epi32(__m512i __W, __mmask16 __U, __m128i __A) 4829309124Sdim{ 4830314564Sdim return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, 4831314564Sdim (__v16si)_mm512_cvtepu8_epi32(__A), 4832314564Sdim (__v16si)__W); 4833309124Sdim} 4834309124Sdim 4835341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 4836314564Sdim_mm512_maskz_cvtepu8_epi32(__mmask16 __U, __m128i __A) 4837309124Sdim{ 4838314564Sdim return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, 4839314564Sdim (__v16si)_mm512_cvtepu8_epi32(__A), 4840314564Sdim (__v16si)_mm512_setzero_si512()); 4841309124Sdim} 4842309124Sdim 4843341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 4844314564Sdim_mm512_cvtepu8_epi64(__m128i __A) 4845309124Sdim{ 4846314564Sdim return (__m512i)__builtin_convertvector(__builtin_shufflevector((__v16qu)__A, (__v16qu)__A, 0, 1, 2, 3, 4, 5, 6, 7), __v8di); 4847309124Sdim} 4848309124Sdim 4849341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 4850314564Sdim_mm512_mask_cvtepu8_epi64(__m512i __W, __mmask8 __U, __m128i __A) 4851309124Sdim{ 4852314564Sdim return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, 4853314564Sdim (__v8di)_mm512_cvtepu8_epi64(__A), 4854314564Sdim (__v8di)__W); 4855309124Sdim} 4856309124Sdim 4857341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 4858314564Sdim_mm512_maskz_cvtepu8_epi64(__mmask8 __U, __m128i __A) 4859309124Sdim{ 4860314564Sdim return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, 4861314564Sdim (__v8di)_mm512_cvtepu8_epi64(__A), 4862314564Sdim (__v8di)_mm512_setzero_si512()); 4863309124Sdim} 4864309124Sdim 4865341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 4866314564Sdim_mm512_cvtepu32_epi64(__m256i __X) 4867309124Sdim{ 4868314564Sdim return (__m512i)__builtin_convertvector((__v8su)__X, __v8di); 4869309124Sdim} 4870309124Sdim 4871341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 4872314564Sdim_mm512_mask_cvtepu32_epi64(__m512i __W, __mmask8 __U, __m256i __X) 4873309124Sdim{ 4874314564Sdim return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, 4875314564Sdim (__v8di)_mm512_cvtepu32_epi64(__X), 4876314564Sdim (__v8di)__W); 4877309124Sdim} 4878309124Sdim 4879341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 4880314564Sdim_mm512_maskz_cvtepu32_epi64(__mmask8 __U, __m256i __X) 4881309124Sdim{ 4882314564Sdim return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, 4883314564Sdim (__v8di)_mm512_cvtepu32_epi64(__X), 4884314564Sdim (__v8di)_mm512_setzero_si512()); 4885309124Sdim} 4886309124Sdim 4887341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 4888314564Sdim_mm512_cvtepu16_epi32(__m256i __A) 4889309124Sdim{ 4890314564Sdim return (__m512i)__builtin_convertvector((__v16hu)__A, __v16si); 4891309124Sdim} 4892309124Sdim 4893341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 4894314564Sdim_mm512_mask_cvtepu16_epi32(__m512i __W, __mmask16 __U, __m256i __A) 4895309124Sdim{ 4896314564Sdim return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, 4897314564Sdim (__v16si)_mm512_cvtepu16_epi32(__A), 4898314564Sdim (__v16si)__W); 4899309124Sdim} 4900309124Sdim 4901341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 4902314564Sdim_mm512_maskz_cvtepu16_epi32(__mmask16 __U, __m256i __A) 4903309124Sdim{ 4904314564Sdim return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, 4905314564Sdim (__v16si)_mm512_cvtepu16_epi32(__A), 4906314564Sdim (__v16si)_mm512_setzero_si512()); 4907309124Sdim} 4908309124Sdim 4909341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 4910314564Sdim_mm512_cvtepu16_epi64(__m128i __A) 4911309124Sdim{ 4912314564Sdim return (__m512i)__builtin_convertvector((__v8hu)__A, __v8di); 4913309124Sdim} 4914309124Sdim 4915341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 4916314564Sdim_mm512_mask_cvtepu16_epi64(__m512i __W, __mmask8 __U, __m128i __A) 4917309124Sdim{ 4918314564Sdim return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, 4919314564Sdim (__v8di)_mm512_cvtepu16_epi64(__A), 4920314564Sdim (__v8di)__W); 4921309124Sdim} 4922309124Sdim 4923341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 4924314564Sdim_mm512_maskz_cvtepu16_epi64(__mmask8 __U, __m128i __A) 4925309124Sdim{ 4926314564Sdim return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, 4927314564Sdim (__v8di)_mm512_cvtepu16_epi64(__A), 4928314564Sdim (__v8di)_mm512_setzero_si512()); 4929309124Sdim} 4930309124Sdim 4931341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 4932309124Sdim_mm512_rorv_epi32 (__m512i __A, __m512i __B) 4933309124Sdim{ 4934341825Sdim return (__m512i)__builtin_ia32_prorvd512((__v16si)__A, (__v16si)__B); 4935309124Sdim} 4936309124Sdim 4937341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 4938309124Sdim_mm512_mask_rorv_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B) 4939309124Sdim{ 4940341825Sdim return (__m512i)__builtin_ia32_selectd_512(__U, 4941341825Sdim (__v16si)_mm512_rorv_epi32(__A, __B), 4942341825Sdim (__v16si)__W); 4943309124Sdim} 4944309124Sdim 4945341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 4946309124Sdim_mm512_maskz_rorv_epi32 (__mmask16 __U, __m512i __A, __m512i __B) 4947309124Sdim{ 4948341825Sdim return (__m512i)__builtin_ia32_selectd_512(__U, 4949341825Sdim (__v16si)_mm512_rorv_epi32(__A, __B), 4950341825Sdim (__v16si)_mm512_setzero_si512()); 4951309124Sdim} 4952309124Sdim 4953341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 4954309124Sdim_mm512_rorv_epi64 (__m512i __A, __m512i __B) 4955309124Sdim{ 4956341825Sdim return (__m512i)__builtin_ia32_prorvq512((__v8di)__A, (__v8di)__B); 4957309124Sdim} 4958309124Sdim 4959341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 4960309124Sdim_mm512_mask_rorv_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) 4961309124Sdim{ 4962341825Sdim return (__m512i)__builtin_ia32_selectq_512(__U, 4963341825Sdim (__v8di)_mm512_rorv_epi64(__A, __B), 4964341825Sdim (__v8di)__W); 4965309124Sdim} 4966309124Sdim 4967341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 4968309124Sdim_mm512_maskz_rorv_epi64 (__mmask8 __U, __m512i __A, __m512i __B) 4969309124Sdim{ 4970341825Sdim return (__m512i)__builtin_ia32_selectq_512(__U, 4971341825Sdim (__v8di)_mm512_rorv_epi64(__A, __B), 4972341825Sdim (__v8di)_mm512_setzero_si512()); 4973309124Sdim} 4974309124Sdim 4975309124Sdim 4976309124Sdim 4977341825Sdim#define _mm512_cmp_epi32_mask(a, b, p) \ 4978296417Sdim (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)(__m512i)(a), \ 4979309124Sdim (__v16si)(__m512i)(b), (int)(p), \ 4980341825Sdim (__mmask16)-1) 4981288943Sdim 4982341825Sdim#define _mm512_cmp_epu32_mask(a, b, p) \ 4983296417Sdim (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)(__m512i)(a), \ 4984309124Sdim (__v16si)(__m512i)(b), (int)(p), \ 4985341825Sdim (__mmask16)-1) 4986288943Sdim 4987341825Sdim#define _mm512_cmp_epi64_mask(a, b, p) \ 4988296417Sdim (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)(__m512i)(a), \ 4989309124Sdim (__v8di)(__m512i)(b), (int)(p), \ 4990341825Sdim (__mmask8)-1) 4991288943Sdim 4992341825Sdim#define _mm512_cmp_epu64_mask(a, b, p) \ 4993296417Sdim (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)(__m512i)(a), \ 4994309124Sdim (__v8di)(__m512i)(b), (int)(p), \ 4995341825Sdim (__mmask8)-1) 4996288943Sdim 4997341825Sdim#define _mm512_mask_cmp_epi32_mask(m, a, b, p) \ 4998296417Sdim (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)(__m512i)(a), \ 4999309124Sdim (__v16si)(__m512i)(b), (int)(p), \ 5000341825Sdim (__mmask16)(m)) 5001288943Sdim 5002341825Sdim#define _mm512_mask_cmp_epu32_mask(m, a, b, p) \ 5003296417Sdim (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)(__m512i)(a), \ 5004309124Sdim (__v16si)(__m512i)(b), (int)(p), \ 5005341825Sdim (__mmask16)(m)) 5006288943Sdim 5007341825Sdim#define _mm512_mask_cmp_epi64_mask(m, a, b, p) \ 5008296417Sdim (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)(__m512i)(a), \ 5009309124Sdim (__v8di)(__m512i)(b), (int)(p), \ 5010341825Sdim (__mmask8)(m)) 5011288943Sdim 5012341825Sdim#define _mm512_mask_cmp_epu64_mask(m, a, b, p) \ 5013296417Sdim (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)(__m512i)(a), \ 5014309124Sdim (__v8di)(__m512i)(b), (int)(p), \ 5015341825Sdim (__mmask8)(m)) 5016288943Sdim 5017341825Sdim#define _mm512_rol_epi32(a, b) \ 5018341825Sdim (__m512i)__builtin_ia32_prold512((__v16si)(__m512i)(a), (int)(b)) 5019309124Sdim 5020341825Sdim#define _mm512_mask_rol_epi32(W, U, a, b) \ 5021341825Sdim (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \ 5022341825Sdim (__v16si)_mm512_rol_epi32((a), (b)), \ 5023341825Sdim (__v16si)(__m512i)(W)) 5024309124Sdim 5025341825Sdim#define _mm512_maskz_rol_epi32(U, a, b) \ 5026341825Sdim (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \ 5027341825Sdim (__v16si)_mm512_rol_epi32((a), (b)), \ 5028341825Sdim (__v16si)_mm512_setzero_si512()) 5029309124Sdim 5030341825Sdim#define _mm512_rol_epi64(a, b) \ 5031341825Sdim (__m512i)__builtin_ia32_prolq512((__v8di)(__m512i)(a), (int)(b)) 5032309124Sdim 5033341825Sdim#define _mm512_mask_rol_epi64(W, U, a, b) \ 5034341825Sdim (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \ 5035341825Sdim (__v8di)_mm512_rol_epi64((a), (b)), \ 5036341825Sdim (__v8di)(__m512i)(W)) 5037309124Sdim 5038341825Sdim#define _mm512_maskz_rol_epi64(U, a, b) \ 5039341825Sdim (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \ 5040341825Sdim (__v8di)_mm512_rol_epi64((a), (b)), \ 5041341825Sdim (__v8di)_mm512_setzero_si512()) 5042341825Sdim 5043341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 5044309124Sdim_mm512_rolv_epi32 (__m512i __A, __m512i __B) 5045309124Sdim{ 5046341825Sdim return (__m512i)__builtin_ia32_prolvd512((__v16si)__A, (__v16si)__B); 5047309124Sdim} 5048309124Sdim 5049341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 5050309124Sdim_mm512_mask_rolv_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B) 5051309124Sdim{ 5052341825Sdim return (__m512i)__builtin_ia32_selectd_512(__U, 5053341825Sdim (__v16si)_mm512_rolv_epi32(__A, __B), 5054341825Sdim (__v16si)__W); 5055309124Sdim} 5056309124Sdim 5057341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 5058309124Sdim_mm512_maskz_rolv_epi32 (__mmask16 __U, __m512i __A, __m512i __B) 5059309124Sdim{ 5060341825Sdim return (__m512i)__builtin_ia32_selectd_512(__U, 5061341825Sdim (__v16si)_mm512_rolv_epi32(__A, __B), 5062341825Sdim (__v16si)_mm512_setzero_si512()); 5063309124Sdim} 5064309124Sdim 5065341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 5066309124Sdim_mm512_rolv_epi64 (__m512i __A, __m512i __B) 5067309124Sdim{ 5068341825Sdim return (__m512i)__builtin_ia32_prolvq512((__v8di)__A, (__v8di)__B); 5069309124Sdim} 5070309124Sdim 5071341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 5072309124Sdim_mm512_mask_rolv_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) 5073309124Sdim{ 5074341825Sdim return (__m512i)__builtin_ia32_selectq_512(__U, 5075341825Sdim (__v8di)_mm512_rolv_epi64(__A, __B), 5076341825Sdim (__v8di)__W); 5077309124Sdim} 5078309124Sdim 5079341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 5080309124Sdim_mm512_maskz_rolv_epi64 (__mmask8 __U, __m512i __A, __m512i __B) 5081309124Sdim{ 5082341825Sdim return (__m512i)__builtin_ia32_selectq_512(__U, 5083341825Sdim (__v8di)_mm512_rolv_epi64(__A, __B), 5084341825Sdim (__v8di)_mm512_setzero_si512()); 5085309124Sdim} 5086309124Sdim 5087341825Sdim#define _mm512_ror_epi32(A, B) \ 5088341825Sdim (__m512i)__builtin_ia32_prord512((__v16si)(__m512i)(A), (int)(B)) 5089309124Sdim 5090341825Sdim#define _mm512_mask_ror_epi32(W, U, A, B) \ 5091341825Sdim (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \ 5092341825Sdim (__v16si)_mm512_ror_epi32((A), (B)), \ 5093341825Sdim (__v16si)(__m512i)(W)) 5094309124Sdim 5095341825Sdim#define _mm512_maskz_ror_epi32(U, A, B) \ 5096341825Sdim (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \ 5097341825Sdim (__v16si)_mm512_ror_epi32((A), (B)), \ 5098341825Sdim (__v16si)_mm512_setzero_si512()) 5099309124Sdim 5100341825Sdim#define _mm512_ror_epi64(A, B) \ 5101341825Sdim (__m512i)__builtin_ia32_prorq512((__v8di)(__m512i)(A), (int)(B)) 5102309124Sdim 5103341825Sdim#define _mm512_mask_ror_epi64(W, U, A, B) \ 5104341825Sdim (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \ 5105341825Sdim (__v8di)_mm512_ror_epi64((A), (B)), \ 5106341825Sdim (__v8di)(__m512i)(W)) 5107309124Sdim 5108341825Sdim#define _mm512_maskz_ror_epi64(U, A, B) \ 5109341825Sdim (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \ 5110341825Sdim (__v8di)_mm512_ror_epi64((A), (B)), \ 5111341825Sdim (__v8di)_mm512_setzero_si512()) 5112309124Sdim 5113341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 5114314564Sdim_mm512_slli_epi32(__m512i __A, int __B) 5115314564Sdim{ 5116314564Sdim return (__m512i)__builtin_ia32_pslldi512((__v16si)__A, __B); 5117314564Sdim} 5118309124Sdim 5119341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 5120314564Sdim_mm512_mask_slli_epi32(__m512i __W, __mmask16 __U, __m512i __A, int __B) 5121314564Sdim{ 5122314564Sdim return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, 5123314564Sdim (__v16si)_mm512_slli_epi32(__A, __B), 5124314564Sdim (__v16si)__W); 5125314564Sdim} 5126309124Sdim 5127341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 5128314564Sdim_mm512_maskz_slli_epi32(__mmask16 __U, __m512i __A, int __B) { 5129314564Sdim return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, 5130314564Sdim (__v16si)_mm512_slli_epi32(__A, __B), 5131314564Sdim (__v16si)_mm512_setzero_si512()); 5132314564Sdim} 5133309124Sdim 5134341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 5135314564Sdim_mm512_slli_epi64(__m512i __A, int __B) 5136314564Sdim{ 5137314564Sdim return (__m512i)__builtin_ia32_psllqi512((__v8di)__A, __B); 5138314564Sdim} 5139309124Sdim 5140341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 5141314564Sdim_mm512_mask_slli_epi64(__m512i __W, __mmask8 __U, __m512i __A, int __B) 5142314564Sdim{ 5143314564Sdim return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, 5144314564Sdim (__v8di)_mm512_slli_epi64(__A, __B), 5145314564Sdim (__v8di)__W); 5146314564Sdim} 5147309124Sdim 5148341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 5149314564Sdim_mm512_maskz_slli_epi64(__mmask8 __U, __m512i __A, int __B) 5150314564Sdim{ 5151314564Sdim return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, 5152314564Sdim (__v8di)_mm512_slli_epi64(__A, __B), 5153314564Sdim (__v8di)_mm512_setzero_si512()); 5154314564Sdim} 5155309124Sdim 5156341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 5157314564Sdim_mm512_srli_epi32(__m512i __A, int __B) 5158314564Sdim{ 5159314564Sdim return (__m512i)__builtin_ia32_psrldi512((__v16si)__A, __B); 5160314564Sdim} 5161309124Sdim 5162341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 5163314564Sdim_mm512_mask_srli_epi32(__m512i __W, __mmask16 __U, __m512i __A, int __B) 5164314564Sdim{ 5165314564Sdim return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, 5166314564Sdim (__v16si)_mm512_srli_epi32(__A, __B), 5167314564Sdim (__v16si)__W); 5168314564Sdim} 5169309124Sdim 5170341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 5171314564Sdim_mm512_maskz_srli_epi32(__mmask16 __U, __m512i __A, int __B) { 5172314564Sdim return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, 5173314564Sdim (__v16si)_mm512_srli_epi32(__A, __B), 5174314564Sdim (__v16si)_mm512_setzero_si512()); 5175314564Sdim} 5176309124Sdim 5177341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 5178314564Sdim_mm512_srli_epi64(__m512i __A, int __B) 5179314564Sdim{ 5180314564Sdim return (__m512i)__builtin_ia32_psrlqi512((__v8di)__A, __B); 5181314564Sdim} 5182309124Sdim 5183341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 5184314564Sdim_mm512_mask_srli_epi64(__m512i __W, __mmask8 __U, __m512i __A, int __B) 5185314564Sdim{ 5186314564Sdim return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, 5187314564Sdim (__v8di)_mm512_srli_epi64(__A, __B), 5188314564Sdim (__v8di)__W); 5189314564Sdim} 5190309124Sdim 5191341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 5192314564Sdim_mm512_maskz_srli_epi64(__mmask8 __U, __m512i __A, int __B) 5193314564Sdim{ 5194314564Sdim return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, 5195314564Sdim (__v8di)_mm512_srli_epi64(__A, __B), 5196314564Sdim (__v8di)_mm512_setzero_si512()); 5197314564Sdim} 5198309124Sdim 5199341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 5200309124Sdim_mm512_mask_load_epi32 (__m512i __W, __mmask16 __U, void const *__P) 5201309124Sdim{ 5202309124Sdim return (__m512i) __builtin_ia32_movdqa32load512_mask ((const __v16si *) __P, 5203309124Sdim (__v16si) __W, 5204309124Sdim (__mmask16) __U); 5205309124Sdim} 5206309124Sdim 5207341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 5208309124Sdim_mm512_maskz_load_epi32 (__mmask16 __U, void const *__P) 5209309124Sdim{ 5210309124Sdim return (__m512i) __builtin_ia32_movdqa32load512_mask ((const __v16si *) __P, 5211309124Sdim (__v16si) 5212309124Sdim _mm512_setzero_si512 (), 5213309124Sdim (__mmask16) __U); 5214309124Sdim} 5215309124Sdim 5216341825Sdimstatic __inline__ void __DEFAULT_FN_ATTRS512 5217309124Sdim_mm512_mask_store_epi32 (void *__P, __mmask16 __U, __m512i __A) 5218309124Sdim{ 5219309124Sdim __builtin_ia32_movdqa32store512_mask ((__v16si *) __P, (__v16si) __A, 5220309124Sdim (__mmask16) __U); 5221309124Sdim} 5222309124Sdim 5223341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 5224309124Sdim_mm512_mask_mov_epi32 (__m512i __W, __mmask16 __U, __m512i __A) 5225309124Sdim{ 5226309124Sdim return (__m512i) __builtin_ia32_selectd_512 ((__mmask16) __U, 5227309124Sdim (__v16si) __A, 5228309124Sdim (__v16si) __W); 5229309124Sdim} 5230309124Sdim 5231341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 5232309124Sdim_mm512_maskz_mov_epi32 (__mmask16 __U, __m512i __A) 5233309124Sdim{ 5234309124Sdim return (__m512i) __builtin_ia32_selectd_512 ((__mmask16) __U, 5235309124Sdim (__v16si) __A, 5236309124Sdim (__v16si) _mm512_setzero_si512 ()); 5237309124Sdim} 5238309124Sdim 5239341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 5240309124Sdim_mm512_mask_mov_epi64 (__m512i __W, __mmask8 __U, __m512i __A) 5241309124Sdim{ 5242309124Sdim return (__m512i) __builtin_ia32_selectq_512 ((__mmask8) __U, 5243309124Sdim (__v8di) __A, 5244309124Sdim (__v8di) __W); 5245309124Sdim} 5246309124Sdim 5247341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 5248309124Sdim_mm512_maskz_mov_epi64 (__mmask8 __U, __m512i __A) 5249309124Sdim{ 5250309124Sdim return (__m512i) __builtin_ia32_selectq_512 ((__mmask8) __U, 5251309124Sdim (__v8di) __A, 5252309124Sdim (__v8di) _mm512_setzero_si512 ()); 5253309124Sdim} 5254309124Sdim 5255341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 5256309124Sdim_mm512_mask_load_epi64 (__m512i __W, __mmask8 __U, void const *__P) 5257309124Sdim{ 5258309124Sdim return (__m512i) __builtin_ia32_movdqa64load512_mask ((const __v8di *) __P, 5259309124Sdim (__v8di) __W, 5260309124Sdim (__mmask8) __U); 5261309124Sdim} 5262309124Sdim 5263341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 5264309124Sdim_mm512_maskz_load_epi64 (__mmask8 __U, void const *__P) 5265309124Sdim{ 5266309124Sdim return (__m512i) __builtin_ia32_movdqa64load512_mask ((const __v8di *) __P, 5267309124Sdim (__v8di) 5268309124Sdim _mm512_setzero_si512 (), 5269309124Sdim (__mmask8) __U); 5270309124Sdim} 5271309124Sdim 5272341825Sdimstatic __inline__ void __DEFAULT_FN_ATTRS512 5273309124Sdim_mm512_mask_store_epi64 (void *__P, __mmask8 __U, __m512i __A) 5274309124Sdim{ 5275309124Sdim __builtin_ia32_movdqa64store512_mask ((__v8di *) __P, (__v8di) __A, 5276309124Sdim (__mmask8) __U); 5277309124Sdim} 5278309124Sdim 5279341825Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS512 5280309124Sdim_mm512_movedup_pd (__m512d __A) 5281309124Sdim{ 5282309124Sdim return (__m512d)__builtin_shufflevector((__v8df)__A, (__v8df)__A, 5283309124Sdim 0, 0, 2, 2, 4, 4, 6, 6); 5284309124Sdim} 5285309124Sdim 5286341825Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS512 5287309124Sdim_mm512_mask_movedup_pd (__m512d __W, __mmask8 __U, __m512d __A) 5288309124Sdim{ 5289309124Sdim return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, 5290309124Sdim (__v8df)_mm512_movedup_pd(__A), 5291309124Sdim (__v8df)__W); 5292309124Sdim} 5293309124Sdim 5294341825Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS512 5295309124Sdim_mm512_maskz_movedup_pd (__mmask8 __U, __m512d __A) 5296309124Sdim{ 5297309124Sdim return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, 5298309124Sdim (__v8df)_mm512_movedup_pd(__A), 5299309124Sdim (__v8df)_mm512_setzero_pd()); 5300309124Sdim} 5301309124Sdim 5302341825Sdim#define _mm512_fixupimm_round_pd(A, B, C, imm, R) \ 5303309124Sdim (__m512d)__builtin_ia32_fixupimmpd512_mask((__v8df)(__m512d)(A), \ 5304309124Sdim (__v8df)(__m512d)(B), \ 5305309124Sdim (__v8di)(__m512i)(C), (int)(imm), \ 5306341825Sdim (__mmask8)-1, (int)(R)) 5307309124Sdim 5308341825Sdim#define _mm512_mask_fixupimm_round_pd(A, U, B, C, imm, R) \ 5309309124Sdim (__m512d)__builtin_ia32_fixupimmpd512_mask((__v8df)(__m512d)(A), \ 5310309124Sdim (__v8df)(__m512d)(B), \ 5311309124Sdim (__v8di)(__m512i)(C), (int)(imm), \ 5312341825Sdim (__mmask8)(U), (int)(R)) 5313309124Sdim 5314341825Sdim#define _mm512_fixupimm_pd(A, B, C, imm) \ 5315309124Sdim (__m512d)__builtin_ia32_fixupimmpd512_mask((__v8df)(__m512d)(A), \ 5316309124Sdim (__v8df)(__m512d)(B), \ 5317309124Sdim (__v8di)(__m512i)(C), (int)(imm), \ 5318309124Sdim (__mmask8)-1, \ 5319341825Sdim _MM_FROUND_CUR_DIRECTION) 5320309124Sdim 5321341825Sdim#define _mm512_mask_fixupimm_pd(A, U, B, C, imm) \ 5322309124Sdim (__m512d)__builtin_ia32_fixupimmpd512_mask((__v8df)(__m512d)(A), \ 5323309124Sdim (__v8df)(__m512d)(B), \ 5324309124Sdim (__v8di)(__m512i)(C), (int)(imm), \ 5325309124Sdim (__mmask8)(U), \ 5326341825Sdim _MM_FROUND_CUR_DIRECTION) 5327309124Sdim 5328341825Sdim#define _mm512_maskz_fixupimm_round_pd(U, A, B, C, imm, R) \ 5329309124Sdim (__m512d)__builtin_ia32_fixupimmpd512_maskz((__v8df)(__m512d)(A), \ 5330309124Sdim (__v8df)(__m512d)(B), \ 5331309124Sdim (__v8di)(__m512i)(C), \ 5332309124Sdim (int)(imm), (__mmask8)(U), \ 5333341825Sdim (int)(R)) 5334309124Sdim 5335341825Sdim#define _mm512_maskz_fixupimm_pd(U, A, B, C, imm) \ 5336309124Sdim (__m512d)__builtin_ia32_fixupimmpd512_maskz((__v8df)(__m512d)(A), \ 5337309124Sdim (__v8df)(__m512d)(B), \ 5338309124Sdim (__v8di)(__m512i)(C), \ 5339309124Sdim (int)(imm), (__mmask8)(U), \ 5340341825Sdim _MM_FROUND_CUR_DIRECTION) 5341309124Sdim 5342341825Sdim#define _mm512_fixupimm_round_ps(A, B, C, imm, R) \ 5343309124Sdim (__m512)__builtin_ia32_fixupimmps512_mask((__v16sf)(__m512)(A), \ 5344309124Sdim (__v16sf)(__m512)(B), \ 5345309124Sdim (__v16si)(__m512i)(C), (int)(imm), \ 5346341825Sdim (__mmask16)-1, (int)(R)) 5347309124Sdim 5348341825Sdim#define _mm512_mask_fixupimm_round_ps(A, U, B, C, imm, R) \ 5349309124Sdim (__m512)__builtin_ia32_fixupimmps512_mask((__v16sf)(__m512)(A), \ 5350309124Sdim (__v16sf)(__m512)(B), \ 5351309124Sdim (__v16si)(__m512i)(C), (int)(imm), \ 5352341825Sdim (__mmask16)(U), (int)(R)) 5353309124Sdim 5354341825Sdim#define _mm512_fixupimm_ps(A, B, C, imm) \ 5355309124Sdim (__m512)__builtin_ia32_fixupimmps512_mask((__v16sf)(__m512)(A), \ 5356309124Sdim (__v16sf)(__m512)(B), \ 5357309124Sdim (__v16si)(__m512i)(C), (int)(imm), \ 5358309124Sdim (__mmask16)-1, \ 5359341825Sdim _MM_FROUND_CUR_DIRECTION) 5360309124Sdim 5361341825Sdim#define _mm512_mask_fixupimm_ps(A, U, B, C, imm) \ 5362309124Sdim (__m512)__builtin_ia32_fixupimmps512_mask((__v16sf)(__m512)(A), \ 5363309124Sdim (__v16sf)(__m512)(B), \ 5364309124Sdim (__v16si)(__m512i)(C), (int)(imm), \ 5365309124Sdim (__mmask16)(U), \ 5366341825Sdim _MM_FROUND_CUR_DIRECTION) 5367309124Sdim 5368341825Sdim#define _mm512_maskz_fixupimm_round_ps(U, A, B, C, imm, R) \ 5369309124Sdim (__m512)__builtin_ia32_fixupimmps512_maskz((__v16sf)(__m512)(A), \ 5370309124Sdim (__v16sf)(__m512)(B), \ 5371309124Sdim (__v16si)(__m512i)(C), \ 5372309124Sdim (int)(imm), (__mmask16)(U), \ 5373341825Sdim (int)(R)) 5374309124Sdim 5375341825Sdim#define _mm512_maskz_fixupimm_ps(U, A, B, C, imm) \ 5376309124Sdim (__m512)__builtin_ia32_fixupimmps512_maskz((__v16sf)(__m512)(A), \ 5377309124Sdim (__v16sf)(__m512)(B), \ 5378309124Sdim (__v16si)(__m512i)(C), \ 5379309124Sdim (int)(imm), (__mmask16)(U), \ 5380341825Sdim _MM_FROUND_CUR_DIRECTION) 5381309124Sdim 5382341825Sdim#define _mm_fixupimm_round_sd(A, B, C, imm, R) \ 5383309124Sdim (__m128d)__builtin_ia32_fixupimmsd_mask((__v2df)(__m128d)(A), \ 5384309124Sdim (__v2df)(__m128d)(B), \ 5385309124Sdim (__v2di)(__m128i)(C), (int)(imm), \ 5386341825Sdim (__mmask8)-1, (int)(R)) 5387309124Sdim 5388341825Sdim#define _mm_mask_fixupimm_round_sd(A, U, B, C, imm, R) \ 5389309124Sdim (__m128d)__builtin_ia32_fixupimmsd_mask((__v2df)(__m128d)(A), \ 5390309124Sdim (__v2df)(__m128d)(B), \ 5391309124Sdim (__v2di)(__m128i)(C), (int)(imm), \ 5392341825Sdim (__mmask8)(U), (int)(R)) 5393309124Sdim 5394341825Sdim#define _mm_fixupimm_sd(A, B, C, imm) \ 5395309124Sdim (__m128d)__builtin_ia32_fixupimmsd_mask((__v2df)(__m128d)(A), \ 5396309124Sdim (__v2df)(__m128d)(B), \ 5397309124Sdim (__v2di)(__m128i)(C), (int)(imm), \ 5398309124Sdim (__mmask8)-1, \ 5399341825Sdim _MM_FROUND_CUR_DIRECTION) 5400309124Sdim 5401341825Sdim#define _mm_mask_fixupimm_sd(A, U, B, C, imm) \ 5402309124Sdim (__m128d)__builtin_ia32_fixupimmsd_mask((__v2df)(__m128d)(A), \ 5403309124Sdim (__v2df)(__m128d)(B), \ 5404309124Sdim (__v2di)(__m128i)(C), (int)(imm), \ 5405309124Sdim (__mmask8)(U), \ 5406341825Sdim _MM_FROUND_CUR_DIRECTION) 5407309124Sdim 5408341825Sdim#define _mm_maskz_fixupimm_round_sd(U, A, B, C, imm, R) \ 5409309124Sdim (__m128d)__builtin_ia32_fixupimmsd_maskz((__v2df)(__m128d)(A), \ 5410309124Sdim (__v2df)(__m128d)(B), \ 5411309124Sdim (__v2di)(__m128i)(C), (int)(imm), \ 5412341825Sdim (__mmask8)(U), (int)(R)) 5413309124Sdim 5414341825Sdim#define _mm_maskz_fixupimm_sd(U, A, B, C, imm) \ 5415309124Sdim (__m128d)__builtin_ia32_fixupimmsd_maskz((__v2df)(__m128d)(A), \ 5416309124Sdim (__v2df)(__m128d)(B), \ 5417309124Sdim (__v2di)(__m128i)(C), (int)(imm), \ 5418309124Sdim (__mmask8)(U), \ 5419341825Sdim _MM_FROUND_CUR_DIRECTION) 5420309124Sdim 5421341825Sdim#define _mm_fixupimm_round_ss(A, B, C, imm, R) \ 5422309124Sdim (__m128)__builtin_ia32_fixupimmss_mask((__v4sf)(__m128)(A), \ 5423309124Sdim (__v4sf)(__m128)(B), \ 5424309124Sdim (__v4si)(__m128i)(C), (int)(imm), \ 5425341825Sdim (__mmask8)-1, (int)(R)) 5426309124Sdim 5427341825Sdim#define _mm_mask_fixupimm_round_ss(A, U, B, C, imm, R) \ 5428309124Sdim (__m128)__builtin_ia32_fixupimmss_mask((__v4sf)(__m128)(A), \ 5429309124Sdim (__v4sf)(__m128)(B), \ 5430309124Sdim (__v4si)(__m128i)(C), (int)(imm), \ 5431341825Sdim (__mmask8)(U), (int)(R)) 5432309124Sdim 5433341825Sdim#define _mm_fixupimm_ss(A, B, C, imm) \ 5434309124Sdim (__m128)__builtin_ia32_fixupimmss_mask((__v4sf)(__m128)(A), \ 5435309124Sdim (__v4sf)(__m128)(B), \ 5436309124Sdim (__v4si)(__m128i)(C), (int)(imm), \ 5437309124Sdim (__mmask8)-1, \ 5438341825Sdim _MM_FROUND_CUR_DIRECTION) 5439309124Sdim 5440341825Sdim#define _mm_mask_fixupimm_ss(A, U, B, C, imm) \ 5441309124Sdim (__m128)__builtin_ia32_fixupimmss_mask((__v4sf)(__m128)(A), \ 5442309124Sdim (__v4sf)(__m128)(B), \ 5443309124Sdim (__v4si)(__m128i)(C), (int)(imm), \ 5444309124Sdim (__mmask8)(U), \ 5445341825Sdim _MM_FROUND_CUR_DIRECTION) 5446309124Sdim 5447341825Sdim#define _mm_maskz_fixupimm_round_ss(U, A, B, C, imm, R) \ 5448309124Sdim (__m128)__builtin_ia32_fixupimmss_maskz((__v4sf)(__m128)(A), \ 5449309124Sdim (__v4sf)(__m128)(B), \ 5450309124Sdim (__v4si)(__m128i)(C), (int)(imm), \ 5451341825Sdim (__mmask8)(U), (int)(R)) 5452309124Sdim 5453341825Sdim#define _mm_maskz_fixupimm_ss(U, A, B, C, imm) \ 5454309124Sdim (__m128)__builtin_ia32_fixupimmss_maskz((__v4sf)(__m128)(A), \ 5455309124Sdim (__v4sf)(__m128)(B), \ 5456309124Sdim (__v4si)(__m128i)(C), (int)(imm), \ 5457309124Sdim (__mmask8)(U), \ 5458341825Sdim _MM_FROUND_CUR_DIRECTION) 5459309124Sdim 5460341825Sdim#define _mm_getexp_round_sd(A, B, R) \ 5461309124Sdim (__m128d)__builtin_ia32_getexpsd128_round_mask((__v2df)(__m128d)(A), \ 5462309124Sdim (__v2df)(__m128d)(B), \ 5463309124Sdim (__v2df)_mm_setzero_pd(), \ 5464341825Sdim (__mmask8)-1, (int)(R)) 5465309124Sdim 5466309124Sdim 5467341825Sdimstatic __inline__ __m128d __DEFAULT_FN_ATTRS128 5468309124Sdim_mm_getexp_sd (__m128d __A, __m128d __B) 5469309124Sdim{ 5470309124Sdim return (__m128d) __builtin_ia32_getexpsd128_round_mask ((__v2df) __A, 5471309124Sdim (__v2df) __B, (__v2df) _mm_setzero_pd(), (__mmask8) -1, _MM_FROUND_CUR_DIRECTION); 5472309124Sdim} 5473309124Sdim 5474341825Sdimstatic __inline__ __m128d __DEFAULT_FN_ATTRS128 5475309124Sdim_mm_mask_getexp_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) 5476309124Sdim{ 5477309124Sdim return (__m128d) __builtin_ia32_getexpsd128_round_mask ( (__v2df) __A, 5478309124Sdim (__v2df) __B, 5479309124Sdim (__v2df) __W, 5480309124Sdim (__mmask8) __U, 5481309124Sdim _MM_FROUND_CUR_DIRECTION); 5482309124Sdim} 5483309124Sdim 5484341825Sdim#define _mm_mask_getexp_round_sd(W, U, A, B, R) \ 5485309124Sdim (__m128d)__builtin_ia32_getexpsd128_round_mask((__v2df)(__m128d)(A), \ 5486309124Sdim (__v2df)(__m128d)(B), \ 5487309124Sdim (__v2df)(__m128d)(W), \ 5488341825Sdim (__mmask8)(U), (int)(R)) 5489309124Sdim 5490341825Sdimstatic __inline__ __m128d __DEFAULT_FN_ATTRS128 5491309124Sdim_mm_maskz_getexp_sd (__mmask8 __U, __m128d __A, __m128d __B) 5492309124Sdim{ 5493309124Sdim return (__m128d) __builtin_ia32_getexpsd128_round_mask ( (__v2df) __A, 5494309124Sdim (__v2df) __B, 5495309124Sdim (__v2df) _mm_setzero_pd (), 5496309124Sdim (__mmask8) __U, 5497309124Sdim _MM_FROUND_CUR_DIRECTION); 5498309124Sdim} 5499309124Sdim 5500341825Sdim#define _mm_maskz_getexp_round_sd(U, A, B, R) \ 5501309124Sdim (__m128d)__builtin_ia32_getexpsd128_round_mask((__v2df)(__m128d)(A), \ 5502309124Sdim (__v2df)(__m128d)(B), \ 5503309124Sdim (__v2df)_mm_setzero_pd(), \ 5504341825Sdim (__mmask8)(U), (int)(R)) 5505309124Sdim 5506341825Sdim#define _mm_getexp_round_ss(A, B, R) \ 5507309124Sdim (__m128)__builtin_ia32_getexpss128_round_mask((__v4sf)(__m128)(A), \ 5508309124Sdim (__v4sf)(__m128)(B), \ 5509309124Sdim (__v4sf)_mm_setzero_ps(), \ 5510341825Sdim (__mmask8)-1, (int)(R)) 5511309124Sdim 5512341825Sdimstatic __inline__ __m128 __DEFAULT_FN_ATTRS128 5513309124Sdim_mm_getexp_ss (__m128 __A, __m128 __B) 5514309124Sdim{ 5515309124Sdim return (__m128) __builtin_ia32_getexpss128_round_mask ((__v4sf) __A, 5516309124Sdim (__v4sf) __B, (__v4sf) _mm_setzero_ps(), (__mmask8) -1, _MM_FROUND_CUR_DIRECTION); 5517309124Sdim} 5518309124Sdim 5519341825Sdimstatic __inline__ __m128 __DEFAULT_FN_ATTRS128 5520309124Sdim_mm_mask_getexp_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) 5521309124Sdim{ 5522309124Sdim return (__m128) __builtin_ia32_getexpss128_round_mask ((__v4sf) __A, 5523309124Sdim (__v4sf) __B, 5524309124Sdim (__v4sf) __W, 5525309124Sdim (__mmask8) __U, 5526309124Sdim _MM_FROUND_CUR_DIRECTION); 5527309124Sdim} 5528309124Sdim 5529341825Sdim#define _mm_mask_getexp_round_ss(W, U, A, B, R) \ 5530309124Sdim (__m128)__builtin_ia32_getexpss128_round_mask((__v4sf)(__m128)(A), \ 5531309124Sdim (__v4sf)(__m128)(B), \ 5532309124Sdim (__v4sf)(__m128)(W), \ 5533341825Sdim (__mmask8)(U), (int)(R)) 5534309124Sdim 5535341825Sdimstatic __inline__ __m128 __DEFAULT_FN_ATTRS128 5536309124Sdim_mm_maskz_getexp_ss (__mmask8 __U, __m128 __A, __m128 __B) 5537309124Sdim{ 5538309124Sdim return (__m128) __builtin_ia32_getexpss128_round_mask ((__v4sf) __A, 5539309124Sdim (__v4sf) __B, 5540341825Sdim (__v4sf) _mm_setzero_ps (), 5541309124Sdim (__mmask8) __U, 5542309124Sdim _MM_FROUND_CUR_DIRECTION); 5543309124Sdim} 5544309124Sdim 5545341825Sdim#define _mm_maskz_getexp_round_ss(U, A, B, R) \ 5546309124Sdim (__m128)__builtin_ia32_getexpss128_round_mask((__v4sf)(__m128)(A), \ 5547309124Sdim (__v4sf)(__m128)(B), \ 5548309124Sdim (__v4sf)_mm_setzero_ps(), \ 5549341825Sdim (__mmask8)(U), (int)(R)) 5550309124Sdim 5551341825Sdim#define _mm_getmant_round_sd(A, B, C, D, R) \ 5552309124Sdim (__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \ 5553309124Sdim (__v2df)(__m128d)(B), \ 5554309124Sdim (int)(((D)<<2) | (C)), \ 5555309124Sdim (__v2df)_mm_setzero_pd(), \ 5556341825Sdim (__mmask8)-1, (int)(R)) 5557309124Sdim 5558341825Sdim#define _mm_getmant_sd(A, B, C, D) \ 5559309124Sdim (__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \ 5560309124Sdim (__v2df)(__m128d)(B), \ 5561309124Sdim (int)(((D)<<2) | (C)), \ 5562309124Sdim (__v2df)_mm_setzero_pd(), \ 5563309124Sdim (__mmask8)-1, \ 5564341825Sdim _MM_FROUND_CUR_DIRECTION) 5565309124Sdim 5566341825Sdim#define _mm_mask_getmant_sd(W, U, A, B, C, D) \ 5567309124Sdim (__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \ 5568309124Sdim (__v2df)(__m128d)(B), \ 5569309124Sdim (int)(((D)<<2) | (C)), \ 5570309124Sdim (__v2df)(__m128d)(W), \ 5571309124Sdim (__mmask8)(U), \ 5572341825Sdim _MM_FROUND_CUR_DIRECTION) 5573309124Sdim 5574341825Sdim#define _mm_mask_getmant_round_sd(W, U, A, B, C, D, R) \ 5575309124Sdim (__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \ 5576309124Sdim (__v2df)(__m128d)(B), \ 5577309124Sdim (int)(((D)<<2) | (C)), \ 5578309124Sdim (__v2df)(__m128d)(W), \ 5579341825Sdim (__mmask8)(U), (int)(R)) 5580309124Sdim 5581341825Sdim#define _mm_maskz_getmant_sd(U, A, B, C, D) \ 5582309124Sdim (__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \ 5583309124Sdim (__v2df)(__m128d)(B), \ 5584309124Sdim (int)(((D)<<2) | (C)), \ 5585309124Sdim (__v2df)_mm_setzero_pd(), \ 5586309124Sdim (__mmask8)(U), \ 5587341825Sdim _MM_FROUND_CUR_DIRECTION) 5588309124Sdim 5589341825Sdim#define _mm_maskz_getmant_round_sd(U, A, B, C, D, R) \ 5590309124Sdim (__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \ 5591309124Sdim (__v2df)(__m128d)(B), \ 5592309124Sdim (int)(((D)<<2) | (C)), \ 5593309124Sdim (__v2df)_mm_setzero_pd(), \ 5594341825Sdim (__mmask8)(U), (int)(R)) 5595309124Sdim 5596341825Sdim#define _mm_getmant_round_ss(A, B, C, D, R) \ 5597309124Sdim (__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \ 5598309124Sdim (__v4sf)(__m128)(B), \ 5599309124Sdim (int)(((D)<<2) | (C)), \ 5600309124Sdim (__v4sf)_mm_setzero_ps(), \ 5601341825Sdim (__mmask8)-1, (int)(R)) 5602309124Sdim 5603341825Sdim#define _mm_getmant_ss(A, B, C, D) \ 5604309124Sdim (__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \ 5605309124Sdim (__v4sf)(__m128)(B), \ 5606309124Sdim (int)(((D)<<2) | (C)), \ 5607309124Sdim (__v4sf)_mm_setzero_ps(), \ 5608309124Sdim (__mmask8)-1, \ 5609341825Sdim _MM_FROUND_CUR_DIRECTION) 5610309124Sdim 5611341825Sdim#define _mm_mask_getmant_ss(W, U, A, B, C, D) \ 5612309124Sdim (__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \ 5613309124Sdim (__v4sf)(__m128)(B), \ 5614309124Sdim (int)(((D)<<2) | (C)), \ 5615309124Sdim (__v4sf)(__m128)(W), \ 5616309124Sdim (__mmask8)(U), \ 5617341825Sdim _MM_FROUND_CUR_DIRECTION) 5618309124Sdim 5619341825Sdim#define _mm_mask_getmant_round_ss(W, U, A, B, C, D, R) \ 5620309124Sdim (__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \ 5621309124Sdim (__v4sf)(__m128)(B), \ 5622309124Sdim (int)(((D)<<2) | (C)), \ 5623309124Sdim (__v4sf)(__m128)(W), \ 5624341825Sdim (__mmask8)(U), (int)(R)) 5625309124Sdim 5626341825Sdim#define _mm_maskz_getmant_ss(U, A, B, C, D) \ 5627309124Sdim (__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \ 5628309124Sdim (__v4sf)(__m128)(B), \ 5629309124Sdim (int)(((D)<<2) | (C)), \ 5630341825Sdim (__v4sf)_mm_setzero_ps(), \ 5631309124Sdim (__mmask8)(U), \ 5632341825Sdim _MM_FROUND_CUR_DIRECTION) 5633309124Sdim 5634341825Sdim#define _mm_maskz_getmant_round_ss(U, A, B, C, D, R) \ 5635309124Sdim (__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \ 5636309124Sdim (__v4sf)(__m128)(B), \ 5637309124Sdim (int)(((D)<<2) | (C)), \ 5638309124Sdim (__v4sf)_mm_setzero_ps(), \ 5639341825Sdim (__mmask8)(U), (int)(R)) 5640309124Sdim 5641344779Sdimstatic __inline__ __mmask16 __DEFAULT_FN_ATTRS 5642309124Sdim_mm512_kmov (__mmask16 __A) 5643309124Sdim{ 5644309124Sdim return __A; 5645309124Sdim} 5646309124Sdim 5647341825Sdim#define _mm_comi_round_sd(A, B, P, R) \ 5648309124Sdim (int)__builtin_ia32_vcomisd((__v2df)(__m128d)(A), (__v2df)(__m128d)(B), \ 5649341825Sdim (int)(P), (int)(R)) 5650309124Sdim 5651341825Sdim#define _mm_comi_round_ss(A, B, P, R) \ 5652309124Sdim (int)__builtin_ia32_vcomiss((__v4sf)(__m128)(A), (__v4sf)(__m128)(B), \ 5653341825Sdim (int)(P), (int)(R)) 5654309124Sdim 5655314564Sdim#ifdef __x86_64__ 5656341825Sdim#define _mm_cvt_roundsd_si64(A, R) \ 5657341825Sdim (long long)__builtin_ia32_vcvtsd2si64((__v2df)(__m128d)(A), (int)(R)) 5658314564Sdim#endif 5659309124Sdim 5660341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 5661314564Sdim_mm512_sll_epi32(__m512i __A, __m128i __B) 5662309124Sdim{ 5663314564Sdim return (__m512i)__builtin_ia32_pslld512((__v16si) __A, (__v4si)__B); 5664309124Sdim} 5665309124Sdim 5666341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 5667314564Sdim_mm512_mask_sll_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m128i __B) 5668309124Sdim{ 5669314564Sdim return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, 5670314564Sdim (__v16si)_mm512_sll_epi32(__A, __B), 5671314564Sdim (__v16si)__W); 5672309124Sdim} 5673309124Sdim 5674341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 5675314564Sdim_mm512_maskz_sll_epi32(__mmask16 __U, __m512i __A, __m128i __B) 5676309124Sdim{ 5677314564Sdim return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, 5678314564Sdim (__v16si)_mm512_sll_epi32(__A, __B), 5679314564Sdim (__v16si)_mm512_setzero_si512()); 5680309124Sdim} 5681309124Sdim 5682341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 5683314564Sdim_mm512_sll_epi64(__m512i __A, __m128i __B) 5684309124Sdim{ 5685314564Sdim return (__m512i)__builtin_ia32_psllq512((__v8di)__A, (__v2di)__B); 5686309124Sdim} 5687309124Sdim 5688341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 5689314564Sdim_mm512_mask_sll_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m128i __B) 5690309124Sdim{ 5691314564Sdim return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, 5692314564Sdim (__v8di)_mm512_sll_epi64(__A, __B), 5693314564Sdim (__v8di)__W); 5694309124Sdim} 5695309124Sdim 5696341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 5697314564Sdim_mm512_maskz_sll_epi64(__mmask8 __U, __m512i __A, __m128i __B) 5698309124Sdim{ 5699314564Sdim return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, 5700314564Sdim (__v8di)_mm512_sll_epi64(__A, __B), 5701314564Sdim (__v8di)_mm512_setzero_si512()); 5702309124Sdim} 5703309124Sdim 5704341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 5705314564Sdim_mm512_sllv_epi32(__m512i __X, __m512i __Y) 5706309124Sdim{ 5707314564Sdim return (__m512i)__builtin_ia32_psllv16si((__v16si)__X, (__v16si)__Y); 5708309124Sdim} 5709309124Sdim 5710341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 5711314564Sdim_mm512_mask_sllv_epi32(__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y) 5712309124Sdim{ 5713314564Sdim return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, 5714314564Sdim (__v16si)_mm512_sllv_epi32(__X, __Y), 5715314564Sdim (__v16si)__W); 5716309124Sdim} 5717309124Sdim 5718341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 5719314564Sdim_mm512_maskz_sllv_epi32(__mmask16 __U, __m512i __X, __m512i __Y) 5720309124Sdim{ 5721314564Sdim return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, 5722314564Sdim (__v16si)_mm512_sllv_epi32(__X, __Y), 5723314564Sdim (__v16si)_mm512_setzero_si512()); 5724309124Sdim} 5725309124Sdim 5726341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 5727314564Sdim_mm512_sllv_epi64(__m512i __X, __m512i __Y) 5728309124Sdim{ 5729314564Sdim return (__m512i)__builtin_ia32_psllv8di((__v8di)__X, (__v8di)__Y); 5730309124Sdim} 5731309124Sdim 5732341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 5733314564Sdim_mm512_mask_sllv_epi64(__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y) 5734309124Sdim{ 5735314564Sdim return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, 5736314564Sdim (__v8di)_mm512_sllv_epi64(__X, __Y), 5737314564Sdim (__v8di)__W); 5738309124Sdim} 5739309124Sdim 5740341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 5741314564Sdim_mm512_maskz_sllv_epi64(__mmask8 __U, __m512i __X, __m512i __Y) 5742309124Sdim{ 5743314564Sdim return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, 5744314564Sdim (__v8di)_mm512_sllv_epi64(__X, __Y), 5745314564Sdim (__v8di)_mm512_setzero_si512()); 5746309124Sdim} 5747309124Sdim 5748341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 5749314564Sdim_mm512_sra_epi32(__m512i __A, __m128i __B) 5750309124Sdim{ 5751314564Sdim return (__m512i)__builtin_ia32_psrad512((__v16si) __A, (__v4si)__B); 5752309124Sdim} 5753309124Sdim 5754341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 5755314564Sdim_mm512_mask_sra_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m128i __B) 5756309124Sdim{ 5757314564Sdim return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, 5758314564Sdim (__v16si)_mm512_sra_epi32(__A, __B), 5759314564Sdim (__v16si)__W); 5760309124Sdim} 5761309124Sdim 5762341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 5763314564Sdim_mm512_maskz_sra_epi32(__mmask16 __U, __m512i __A, __m128i __B) 5764309124Sdim{ 5765314564Sdim return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, 5766314564Sdim (__v16si)_mm512_sra_epi32(__A, __B), 5767314564Sdim (__v16si)_mm512_setzero_si512()); 5768309124Sdim} 5769309124Sdim 5770341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 5771314564Sdim_mm512_sra_epi64(__m512i __A, __m128i __B) 5772309124Sdim{ 5773314564Sdim return (__m512i)__builtin_ia32_psraq512((__v8di)__A, (__v2di)__B); 5774309124Sdim} 5775309124Sdim 5776341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 5777314564Sdim_mm512_mask_sra_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m128i __B) 5778309124Sdim{ 5779314564Sdim return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, 5780314564Sdim (__v8di)_mm512_sra_epi64(__A, __B), 5781314564Sdim (__v8di)__W); 5782309124Sdim} 5783309124Sdim 5784341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 5785314564Sdim_mm512_maskz_sra_epi64(__mmask8 __U, __m512i __A, __m128i __B) 5786309124Sdim{ 5787314564Sdim return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, 5788314564Sdim (__v8di)_mm512_sra_epi64(__A, __B), 5789314564Sdim (__v8di)_mm512_setzero_si512()); 5790309124Sdim} 5791309124Sdim 5792341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 5793314564Sdim_mm512_srav_epi32(__m512i __X, __m512i __Y) 5794309124Sdim{ 5795314564Sdim return (__m512i)__builtin_ia32_psrav16si((__v16si)__X, (__v16si)__Y); 5796309124Sdim} 5797309124Sdim 5798341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 5799314564Sdim_mm512_mask_srav_epi32(__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y) 5800309124Sdim{ 5801314564Sdim return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, 5802314564Sdim (__v16si)_mm512_srav_epi32(__X, __Y), 5803314564Sdim (__v16si)__W); 5804309124Sdim} 5805309124Sdim 5806341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 5807314564Sdim_mm512_maskz_srav_epi32(__mmask16 __U, __m512i __X, __m512i __Y) 5808309124Sdim{ 5809314564Sdim return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, 5810314564Sdim (__v16si)_mm512_srav_epi32(__X, __Y), 5811314564Sdim (__v16si)_mm512_setzero_si512()); 5812309124Sdim} 5813309124Sdim 5814341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 5815314564Sdim_mm512_srav_epi64(__m512i __X, __m512i __Y) 5816309124Sdim{ 5817314564Sdim return (__m512i)__builtin_ia32_psrav8di((__v8di)__X, (__v8di)__Y); 5818309124Sdim} 5819309124Sdim 5820341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 5821314564Sdim_mm512_mask_srav_epi64(__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y) 5822309124Sdim{ 5823314564Sdim return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, 5824314564Sdim (__v8di)_mm512_srav_epi64(__X, __Y), 5825314564Sdim (__v8di)__W); 5826309124Sdim} 5827309124Sdim 5828341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 5829314564Sdim_mm512_maskz_srav_epi64(__mmask8 __U, __m512i __X, __m512i __Y) 5830309124Sdim{ 5831314564Sdim return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, 5832314564Sdim (__v8di)_mm512_srav_epi64(__X, __Y), 5833314564Sdim (__v8di)_mm512_setzero_si512()); 5834309124Sdim} 5835309124Sdim 5836341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 5837314564Sdim_mm512_srl_epi32(__m512i __A, __m128i __B) 5838309124Sdim{ 5839314564Sdim return (__m512i)__builtin_ia32_psrld512((__v16si) __A, (__v4si)__B); 5840309124Sdim} 5841309124Sdim 5842341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 5843314564Sdim_mm512_mask_srl_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m128i __B) 5844309124Sdim{ 5845314564Sdim return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, 5846314564Sdim (__v16si)_mm512_srl_epi32(__A, __B), 5847314564Sdim (__v16si)__W); 5848309124Sdim} 5849309124Sdim 5850341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 5851314564Sdim_mm512_maskz_srl_epi32(__mmask16 __U, __m512i __A, __m128i __B) 5852309124Sdim{ 5853314564Sdim return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, 5854314564Sdim (__v16si)_mm512_srl_epi32(__A, __B), 5855314564Sdim (__v16si)_mm512_setzero_si512()); 5856309124Sdim} 5857309124Sdim 5858341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 5859314564Sdim_mm512_srl_epi64(__m512i __A, __m128i __B) 5860309124Sdim{ 5861314564Sdim return (__m512i)__builtin_ia32_psrlq512((__v8di)__A, (__v2di)__B); 5862309124Sdim} 5863309124Sdim 5864341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 5865314564Sdim_mm512_mask_srl_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m128i __B) 5866309124Sdim{ 5867314564Sdim return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, 5868314564Sdim (__v8di)_mm512_srl_epi64(__A, __B), 5869314564Sdim (__v8di)__W); 5870309124Sdim} 5871309124Sdim 5872341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 5873314564Sdim_mm512_maskz_srl_epi64(__mmask8 __U, __m512i __A, __m128i __B) 5874309124Sdim{ 5875314564Sdim return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, 5876314564Sdim (__v8di)_mm512_srl_epi64(__A, __B), 5877314564Sdim (__v8di)_mm512_setzero_si512()); 5878309124Sdim} 5879309124Sdim 5880341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 5881314564Sdim_mm512_srlv_epi32(__m512i __X, __m512i __Y) 5882309124Sdim{ 5883314564Sdim return (__m512i)__builtin_ia32_psrlv16si((__v16si)__X, (__v16si)__Y); 5884309124Sdim} 5885309124Sdim 5886341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 5887314564Sdim_mm512_mask_srlv_epi32(__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y) 5888309124Sdim{ 5889314564Sdim return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, 5890314564Sdim (__v16si)_mm512_srlv_epi32(__X, __Y), 5891314564Sdim (__v16si)__W); 5892309124Sdim} 5893309124Sdim 5894341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 5895314564Sdim_mm512_maskz_srlv_epi32(__mmask16 __U, __m512i __X, __m512i __Y) 5896309124Sdim{ 5897314564Sdim return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, 5898314564Sdim (__v16si)_mm512_srlv_epi32(__X, __Y), 5899314564Sdim (__v16si)_mm512_setzero_si512()); 5900309124Sdim} 5901309124Sdim 5902341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 5903309124Sdim_mm512_srlv_epi64 (__m512i __X, __m512i __Y) 5904309124Sdim{ 5905314564Sdim return (__m512i)__builtin_ia32_psrlv8di((__v8di)__X, (__v8di)__Y); 5906309124Sdim} 5907309124Sdim 5908341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 5909314564Sdim_mm512_mask_srlv_epi64(__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y) 5910309124Sdim{ 5911314564Sdim return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, 5912314564Sdim (__v8di)_mm512_srlv_epi64(__X, __Y), 5913314564Sdim (__v8di)__W); 5914309124Sdim} 5915309124Sdim 5916341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 5917314564Sdim_mm512_maskz_srlv_epi64(__mmask8 __U, __m512i __X, __m512i __Y) 5918309124Sdim{ 5919314564Sdim return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, 5920314564Sdim (__v8di)_mm512_srlv_epi64(__X, __Y), 5921314564Sdim (__v8di)_mm512_setzero_si512()); 5922309124Sdim} 5923309124Sdim 5924341825Sdim#define _mm512_ternarylogic_epi32(A, B, C, imm) \ 5925309124Sdim (__m512i)__builtin_ia32_pternlogd512_mask((__v16si)(__m512i)(A), \ 5926309124Sdim (__v16si)(__m512i)(B), \ 5927309124Sdim (__v16si)(__m512i)(C), (int)(imm), \ 5928341825Sdim (__mmask16)-1) 5929309124Sdim 5930341825Sdim#define _mm512_mask_ternarylogic_epi32(A, U, B, C, imm) \ 5931309124Sdim (__m512i)__builtin_ia32_pternlogd512_mask((__v16si)(__m512i)(A), \ 5932309124Sdim (__v16si)(__m512i)(B), \ 5933309124Sdim (__v16si)(__m512i)(C), (int)(imm), \ 5934341825Sdim (__mmask16)(U)) 5935309124Sdim 5936341825Sdim#define _mm512_maskz_ternarylogic_epi32(U, A, B, C, imm) \ 5937309124Sdim (__m512i)__builtin_ia32_pternlogd512_maskz((__v16si)(__m512i)(A), \ 5938309124Sdim (__v16si)(__m512i)(B), \ 5939309124Sdim (__v16si)(__m512i)(C), \ 5940341825Sdim (int)(imm), (__mmask16)(U)) 5941309124Sdim 5942341825Sdim#define _mm512_ternarylogic_epi64(A, B, C, imm) \ 5943309124Sdim (__m512i)__builtin_ia32_pternlogq512_mask((__v8di)(__m512i)(A), \ 5944309124Sdim (__v8di)(__m512i)(B), \ 5945309124Sdim (__v8di)(__m512i)(C), (int)(imm), \ 5946341825Sdim (__mmask8)-1) 5947309124Sdim 5948341825Sdim#define _mm512_mask_ternarylogic_epi64(A, U, B, C, imm) \ 5949309124Sdim (__m512i)__builtin_ia32_pternlogq512_mask((__v8di)(__m512i)(A), \ 5950309124Sdim (__v8di)(__m512i)(B), \ 5951309124Sdim (__v8di)(__m512i)(C), (int)(imm), \ 5952341825Sdim (__mmask8)(U)) 5953309124Sdim 5954341825Sdim#define _mm512_maskz_ternarylogic_epi64(U, A, B, C, imm) \ 5955309124Sdim (__m512i)__builtin_ia32_pternlogq512_maskz((__v8di)(__m512i)(A), \ 5956309124Sdim (__v8di)(__m512i)(B), \ 5957309124Sdim (__v8di)(__m512i)(C), (int)(imm), \ 5958341825Sdim (__mmask8)(U)) 5959309124Sdim 5960314564Sdim#ifdef __x86_64__ 5961341825Sdim#define _mm_cvt_roundsd_i64(A, R) \ 5962341825Sdim (long long)__builtin_ia32_vcvtsd2si64((__v2df)(__m128d)(A), (int)(R)) 5963314564Sdim#endif 5964309124Sdim 5965341825Sdim#define _mm_cvt_roundsd_si32(A, R) \ 5966341825Sdim (int)__builtin_ia32_vcvtsd2si32((__v2df)(__m128d)(A), (int)(R)) 5967309124Sdim 5968341825Sdim#define _mm_cvt_roundsd_i32(A, R) \ 5969341825Sdim (int)__builtin_ia32_vcvtsd2si32((__v2df)(__m128d)(A), (int)(R)) 5970309124Sdim 5971341825Sdim#define _mm_cvt_roundsd_u32(A, R) \ 5972341825Sdim (unsigned int)__builtin_ia32_vcvtsd2usi32((__v2df)(__m128d)(A), (int)(R)) 5973309124Sdim 5974341825Sdimstatic __inline__ unsigned __DEFAULT_FN_ATTRS128 5975309124Sdim_mm_cvtsd_u32 (__m128d __A) 5976309124Sdim{ 5977309124Sdim return (unsigned) __builtin_ia32_vcvtsd2usi32 ((__v2df) __A, 5978309124Sdim _MM_FROUND_CUR_DIRECTION); 5979309124Sdim} 5980309124Sdim 5981314564Sdim#ifdef __x86_64__ 5982341825Sdim#define _mm_cvt_roundsd_u64(A, R) \ 5983309124Sdim (unsigned long long)__builtin_ia32_vcvtsd2usi64((__v2df)(__m128d)(A), \ 5984341825Sdim (int)(R)) 5985309124Sdim 5986341825Sdimstatic __inline__ unsigned long long __DEFAULT_FN_ATTRS128 5987309124Sdim_mm_cvtsd_u64 (__m128d __A) 5988309124Sdim{ 5989309124Sdim return (unsigned long long) __builtin_ia32_vcvtsd2usi64 ((__v2df) 5990309124Sdim __A, 5991309124Sdim _MM_FROUND_CUR_DIRECTION); 5992309124Sdim} 5993314564Sdim#endif 5994309124Sdim 5995341825Sdim#define _mm_cvt_roundss_si32(A, R) \ 5996341825Sdim (int)__builtin_ia32_vcvtss2si32((__v4sf)(__m128)(A), (int)(R)) 5997309124Sdim 5998341825Sdim#define _mm_cvt_roundss_i32(A, R) \ 5999341825Sdim (int)__builtin_ia32_vcvtss2si32((__v4sf)(__m128)(A), (int)(R)) 6000309124Sdim 6001314564Sdim#ifdef __x86_64__ 6002341825Sdim#define _mm_cvt_roundss_si64(A, R) \ 6003341825Sdim (long long)__builtin_ia32_vcvtss2si64((__v4sf)(__m128)(A), (int)(R)) 6004309124Sdim 6005341825Sdim#define _mm_cvt_roundss_i64(A, R) \ 6006341825Sdim (long long)__builtin_ia32_vcvtss2si64((__v4sf)(__m128)(A), (int)(R)) 6007314564Sdim#endif 6008309124Sdim 6009341825Sdim#define _mm_cvt_roundss_u32(A, R) \ 6010341825Sdim (unsigned int)__builtin_ia32_vcvtss2usi32((__v4sf)(__m128)(A), (int)(R)) 6011309124Sdim 6012341825Sdimstatic __inline__ unsigned __DEFAULT_FN_ATTRS128 6013309124Sdim_mm_cvtss_u32 (__m128 __A) 6014309124Sdim{ 6015309124Sdim return (unsigned) __builtin_ia32_vcvtss2usi32 ((__v4sf) __A, 6016309124Sdim _MM_FROUND_CUR_DIRECTION); 6017309124Sdim} 6018309124Sdim 6019314564Sdim#ifdef __x86_64__ 6020341825Sdim#define _mm_cvt_roundss_u64(A, R) \ 6021309124Sdim (unsigned long long)__builtin_ia32_vcvtss2usi64((__v4sf)(__m128)(A), \ 6022341825Sdim (int)(R)) 6023309124Sdim 6024341825Sdimstatic __inline__ unsigned long long __DEFAULT_FN_ATTRS128 6025309124Sdim_mm_cvtss_u64 (__m128 __A) 6026309124Sdim{ 6027309124Sdim return (unsigned long long) __builtin_ia32_vcvtss2usi64 ((__v4sf) 6028309124Sdim __A, 6029309124Sdim _MM_FROUND_CUR_DIRECTION); 6030309124Sdim} 6031314564Sdim#endif 6032309124Sdim 6033341825Sdim#define _mm_cvtt_roundsd_i32(A, R) \ 6034341825Sdim (int)__builtin_ia32_vcvttsd2si32((__v2df)(__m128d)(A), (int)(R)) 6035309124Sdim 6036341825Sdim#define _mm_cvtt_roundsd_si32(A, R) \ 6037341825Sdim (int)__builtin_ia32_vcvttsd2si32((__v2df)(__m128d)(A), (int)(R)) 6038309124Sdim 6039341825Sdimstatic __inline__ int __DEFAULT_FN_ATTRS128 6040309124Sdim_mm_cvttsd_i32 (__m128d __A) 6041309124Sdim{ 6042309124Sdim return (int) __builtin_ia32_vcvttsd2si32 ((__v2df) __A, 6043309124Sdim _MM_FROUND_CUR_DIRECTION); 6044309124Sdim} 6045309124Sdim 6046314564Sdim#ifdef __x86_64__ 6047341825Sdim#define _mm_cvtt_roundsd_si64(A, R) \ 6048341825Sdim (long long)__builtin_ia32_vcvttsd2si64((__v2df)(__m128d)(A), (int)(R)) 6049309124Sdim 6050341825Sdim#define _mm_cvtt_roundsd_i64(A, R) \ 6051341825Sdim (long long)__builtin_ia32_vcvttsd2si64((__v2df)(__m128d)(A), (int)(R)) 6052309124Sdim 6053341825Sdimstatic __inline__ long long __DEFAULT_FN_ATTRS128 6054309124Sdim_mm_cvttsd_i64 (__m128d __A) 6055309124Sdim{ 6056309124Sdim return (long long) __builtin_ia32_vcvttsd2si64 ((__v2df) __A, 6057309124Sdim _MM_FROUND_CUR_DIRECTION); 6058309124Sdim} 6059314564Sdim#endif 6060309124Sdim 6061341825Sdim#define _mm_cvtt_roundsd_u32(A, R) \ 6062341825Sdim (unsigned int)__builtin_ia32_vcvttsd2usi32((__v2df)(__m128d)(A), (int)(R)) 6063309124Sdim 6064341825Sdimstatic __inline__ unsigned __DEFAULT_FN_ATTRS128 6065309124Sdim_mm_cvttsd_u32 (__m128d __A) 6066309124Sdim{ 6067309124Sdim return (unsigned) __builtin_ia32_vcvttsd2usi32 ((__v2df) __A, 6068309124Sdim _MM_FROUND_CUR_DIRECTION); 6069309124Sdim} 6070309124Sdim 6071314564Sdim#ifdef __x86_64__ 6072341825Sdim#define _mm_cvtt_roundsd_u64(A, R) \ 6073309124Sdim (unsigned long long)__builtin_ia32_vcvttsd2usi64((__v2df)(__m128d)(A), \ 6074341825Sdim (int)(R)) 6075309124Sdim 6076341825Sdimstatic __inline__ unsigned long long __DEFAULT_FN_ATTRS128 6077309124Sdim_mm_cvttsd_u64 (__m128d __A) 6078309124Sdim{ 6079309124Sdim return (unsigned long long) __builtin_ia32_vcvttsd2usi64 ((__v2df) 6080309124Sdim __A, 6081309124Sdim _MM_FROUND_CUR_DIRECTION); 6082309124Sdim} 6083314564Sdim#endif 6084309124Sdim 6085341825Sdim#define _mm_cvtt_roundss_i32(A, R) \ 6086341825Sdim (int)__builtin_ia32_vcvttss2si32((__v4sf)(__m128)(A), (int)(R)) 6087309124Sdim 6088341825Sdim#define _mm_cvtt_roundss_si32(A, R) \ 6089341825Sdim (int)__builtin_ia32_vcvttss2si32((__v4sf)(__m128)(A), (int)(R)) 6090309124Sdim 6091341825Sdimstatic __inline__ int __DEFAULT_FN_ATTRS128 6092309124Sdim_mm_cvttss_i32 (__m128 __A) 6093309124Sdim{ 6094309124Sdim return (int) __builtin_ia32_vcvttss2si32 ((__v4sf) __A, 6095309124Sdim _MM_FROUND_CUR_DIRECTION); 6096309124Sdim} 6097309124Sdim 6098314564Sdim#ifdef __x86_64__ 6099341825Sdim#define _mm_cvtt_roundss_i64(A, R) \ 6100341825Sdim (long long)__builtin_ia32_vcvttss2si64((__v4sf)(__m128)(A), (int)(R)) 6101309124Sdim 6102341825Sdim#define _mm_cvtt_roundss_si64(A, R) \ 6103341825Sdim (long long)__builtin_ia32_vcvttss2si64((__v4sf)(__m128)(A), (int)(R)) 6104309124Sdim 6105341825Sdimstatic __inline__ long long __DEFAULT_FN_ATTRS128 6106309124Sdim_mm_cvttss_i64 (__m128 __A) 6107309124Sdim{ 6108309124Sdim return (long long) __builtin_ia32_vcvttss2si64 ((__v4sf) __A, 6109309124Sdim _MM_FROUND_CUR_DIRECTION); 6110309124Sdim} 6111314564Sdim#endif 6112309124Sdim 6113341825Sdim#define _mm_cvtt_roundss_u32(A, R) \ 6114341825Sdim (unsigned int)__builtin_ia32_vcvttss2usi32((__v4sf)(__m128)(A), (int)(R)) 6115309124Sdim 6116341825Sdimstatic __inline__ unsigned __DEFAULT_FN_ATTRS128 6117309124Sdim_mm_cvttss_u32 (__m128 __A) 6118309124Sdim{ 6119309124Sdim return (unsigned) __builtin_ia32_vcvttss2usi32 ((__v4sf) __A, 6120309124Sdim _MM_FROUND_CUR_DIRECTION); 6121309124Sdim} 6122309124Sdim 6123314564Sdim#ifdef __x86_64__ 6124341825Sdim#define _mm_cvtt_roundss_u64(A, R) \ 6125309124Sdim (unsigned long long)__builtin_ia32_vcvttss2usi64((__v4sf)(__m128)(A), \ 6126341825Sdim (int)(R)) 6127309124Sdim 6128341825Sdimstatic __inline__ unsigned long long __DEFAULT_FN_ATTRS128 6129309124Sdim_mm_cvttss_u64 (__m128 __A) 6130309124Sdim{ 6131309124Sdim return (unsigned long long) __builtin_ia32_vcvttss2usi64 ((__v4sf) 6132309124Sdim __A, 6133309124Sdim _MM_FROUND_CUR_DIRECTION); 6134309124Sdim} 6135314564Sdim#endif 6136309124Sdim 6137341825Sdim#define _mm512_permute_pd(X, C) \ 6138341825Sdim (__m512d)__builtin_ia32_vpermilpd512((__v8df)(__m512d)(X), (int)(C)) 6139309124Sdim 6140341825Sdim#define _mm512_mask_permute_pd(W, U, X, C) \ 6141309124Sdim (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ 6142309124Sdim (__v8df)_mm512_permute_pd((X), (C)), \ 6143341825Sdim (__v8df)(__m512d)(W)) 6144309124Sdim 6145341825Sdim#define _mm512_maskz_permute_pd(U, X, C) \ 6146309124Sdim (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ 6147309124Sdim (__v8df)_mm512_permute_pd((X), (C)), \ 6148341825Sdim (__v8df)_mm512_setzero_pd()) 6149309124Sdim 6150341825Sdim#define _mm512_permute_ps(X, C) \ 6151341825Sdim (__m512)__builtin_ia32_vpermilps512((__v16sf)(__m512)(X), (int)(C)) 6152309124Sdim 6153341825Sdim#define _mm512_mask_permute_ps(W, U, X, C) \ 6154309124Sdim (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ 6155309124Sdim (__v16sf)_mm512_permute_ps((X), (C)), \ 6156341825Sdim (__v16sf)(__m512)(W)) 6157309124Sdim 6158341825Sdim#define _mm512_maskz_permute_ps(U, X, C) \ 6159309124Sdim (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ 6160309124Sdim (__v16sf)_mm512_permute_ps((X), (C)), \ 6161341825Sdim (__v16sf)_mm512_setzero_ps()) 6162309124Sdim 6163341825Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS512 6164314564Sdim_mm512_permutevar_pd(__m512d __A, __m512i __C) 6165309124Sdim{ 6166314564Sdim return (__m512d)__builtin_ia32_vpermilvarpd512((__v8df)__A, (__v8di)__C); 6167309124Sdim} 6168309124Sdim 6169341825Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS512 6170314564Sdim_mm512_mask_permutevar_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512i __C) 6171309124Sdim{ 6172314564Sdim return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, 6173314564Sdim (__v8df)_mm512_permutevar_pd(__A, __C), 6174314564Sdim (__v8df)__W); 6175309124Sdim} 6176309124Sdim 6177341825Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS512 6178314564Sdim_mm512_maskz_permutevar_pd(__mmask8 __U, __m512d __A, __m512i __C) 6179309124Sdim{ 6180314564Sdim return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, 6181314564Sdim (__v8df)_mm512_permutevar_pd(__A, __C), 6182314564Sdim (__v8df)_mm512_setzero_pd()); 6183309124Sdim} 6184309124Sdim 6185341825Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS512 6186314564Sdim_mm512_permutevar_ps(__m512 __A, __m512i __C) 6187309124Sdim{ 6188314564Sdim return (__m512)__builtin_ia32_vpermilvarps512((__v16sf)__A, (__v16si)__C); 6189309124Sdim} 6190309124Sdim 6191341825Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS512 6192314564Sdim_mm512_mask_permutevar_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512i __C) 6193309124Sdim{ 6194314564Sdim return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, 6195314564Sdim (__v16sf)_mm512_permutevar_ps(__A, __C), 6196314564Sdim (__v16sf)__W); 6197309124Sdim} 6198309124Sdim 6199341825Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS512 6200314564Sdim_mm512_maskz_permutevar_ps(__mmask16 __U, __m512 __A, __m512i __C) 6201309124Sdim{ 6202314564Sdim return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, 6203314564Sdim (__v16sf)_mm512_permutevar_ps(__A, __C), 6204314564Sdim (__v16sf)_mm512_setzero_ps()); 6205309124Sdim} 6206309124Sdim 6207341825Sdimstatic __inline __m512d __DEFAULT_FN_ATTRS512 6208309124Sdim_mm512_permutex2var_pd(__m512d __A, __m512i __I, __m512d __B) 6209309124Sdim{ 6210341825Sdim return (__m512d)__builtin_ia32_vpermi2varpd512((__v8df)__A, (__v8di)__I, 6211341825Sdim (__v8df)__B); 6212309124Sdim} 6213309124Sdim 6214341825Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS512 6215341825Sdim_mm512_mask_permutex2var_pd(__m512d __A, __mmask8 __U, __m512i __I, __m512d __B) 6216309124Sdim{ 6217341825Sdim return (__m512d)__builtin_ia32_selectpd_512(__U, 6218341825Sdim (__v8df)_mm512_permutex2var_pd(__A, __I, __B), 6219341825Sdim (__v8df)__A); 6220309124Sdim} 6221309124Sdim 6222341825Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS512 6223341825Sdim_mm512_mask2_permutex2var_pd(__m512d __A, __m512i __I, __mmask8 __U, 6224341825Sdim __m512d __B) 6225309124Sdim{ 6226341825Sdim return (__m512d)__builtin_ia32_selectpd_512(__U, 6227341825Sdim (__v8df)_mm512_permutex2var_pd(__A, __I, __B), 6228341825Sdim (__v8df)(__m512d)__I); 6229309124Sdim} 6230309124Sdim 6231341825Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS512 6232341825Sdim_mm512_maskz_permutex2var_pd(__mmask8 __U, __m512d __A, __m512i __I, 6233341825Sdim __m512d __B) 6234341825Sdim{ 6235341825Sdim return (__m512d)__builtin_ia32_selectpd_512(__U, 6236341825Sdim (__v8df)_mm512_permutex2var_pd(__A, __I, __B), 6237341825Sdim (__v8df)_mm512_setzero_pd()); 6238341825Sdim} 6239341825Sdim 6240341825Sdimstatic __inline __m512 __DEFAULT_FN_ATTRS512 6241309124Sdim_mm512_permutex2var_ps(__m512 __A, __m512i __I, __m512 __B) 6242309124Sdim{ 6243341825Sdim return (__m512)__builtin_ia32_vpermi2varps512((__v16sf)__A, (__v16si)__I, 6244341825Sdim (__v16sf) __B); 6245309124Sdim} 6246309124Sdim 6247341825Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS512 6248341825Sdim_mm512_mask_permutex2var_ps(__m512 __A, __mmask16 __U, __m512i __I, __m512 __B) 6249309124Sdim{ 6250341825Sdim return (__m512)__builtin_ia32_selectps_512(__U, 6251341825Sdim (__v16sf)_mm512_permutex2var_ps(__A, __I, __B), 6252341825Sdim (__v16sf)__A); 6253309124Sdim} 6254309124Sdim 6255341825Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS512 6256341825Sdim_mm512_mask2_permutex2var_ps(__m512 __A, __m512i __I, __mmask16 __U, __m512 __B) 6257309124Sdim{ 6258341825Sdim return (__m512)__builtin_ia32_selectps_512(__U, 6259341825Sdim (__v16sf)_mm512_permutex2var_ps(__A, __I, __B), 6260341825Sdim (__v16sf)(__m512)__I); 6261309124Sdim} 6262309124Sdim 6263341825Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS512 6264341825Sdim_mm512_maskz_permutex2var_ps(__mmask16 __U, __m512 __A, __m512i __I, __m512 __B) 6265341825Sdim{ 6266341825Sdim return (__m512)__builtin_ia32_selectps_512(__U, 6267341825Sdim (__v16sf)_mm512_permutex2var_ps(__A, __I, __B), 6268341825Sdim (__v16sf)_mm512_setzero_ps()); 6269341825Sdim} 6270309124Sdim 6271341825Sdim 6272341825Sdim#define _mm512_cvtt_roundpd_epu32(A, R) \ 6273309124Sdim (__m256i)__builtin_ia32_cvttpd2udq512_mask((__v8df)(__m512d)(A), \ 6274309124Sdim (__v8si)_mm256_undefined_si256(), \ 6275341825Sdim (__mmask8)-1, (int)(R)) 6276309124Sdim 6277341825Sdim#define _mm512_mask_cvtt_roundpd_epu32(W, U, A, R) \ 6278309124Sdim (__m256i)__builtin_ia32_cvttpd2udq512_mask((__v8df)(__m512d)(A), \ 6279309124Sdim (__v8si)(__m256i)(W), \ 6280341825Sdim (__mmask8)(U), (int)(R)) 6281309124Sdim 6282341825Sdim#define _mm512_maskz_cvtt_roundpd_epu32(U, A, R) \ 6283309124Sdim (__m256i)__builtin_ia32_cvttpd2udq512_mask((__v8df)(__m512d)(A), \ 6284309124Sdim (__v8si)_mm256_setzero_si256(), \ 6285341825Sdim (__mmask8)(U), (int)(R)) 6286309124Sdim 6287341825Sdimstatic __inline__ __m256i __DEFAULT_FN_ATTRS512 6288309124Sdim_mm512_cvttpd_epu32 (__m512d __A) 6289309124Sdim{ 6290309124Sdim return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A, 6291309124Sdim (__v8si) 6292309124Sdim _mm256_undefined_si256 (), 6293309124Sdim (__mmask8) -1, 6294309124Sdim _MM_FROUND_CUR_DIRECTION); 6295309124Sdim} 6296309124Sdim 6297341825Sdimstatic __inline__ __m256i __DEFAULT_FN_ATTRS512 6298309124Sdim_mm512_mask_cvttpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A) 6299309124Sdim{ 6300309124Sdim return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A, 6301309124Sdim (__v8si) __W, 6302309124Sdim (__mmask8) __U, 6303309124Sdim _MM_FROUND_CUR_DIRECTION); 6304309124Sdim} 6305309124Sdim 6306341825Sdimstatic __inline__ __m256i __DEFAULT_FN_ATTRS512 6307309124Sdim_mm512_maskz_cvttpd_epu32 (__mmask8 __U, __m512d __A) 6308309124Sdim{ 6309309124Sdim return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A, 6310309124Sdim (__v8si) 6311309124Sdim _mm256_setzero_si256 (), 6312309124Sdim (__mmask8) __U, 6313309124Sdim _MM_FROUND_CUR_DIRECTION); 6314309124Sdim} 6315309124Sdim 6316341825Sdim#define _mm_roundscale_round_sd(A, B, imm, R) \ 6317309124Sdim (__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \ 6318309124Sdim (__v2df)(__m128d)(B), \ 6319309124Sdim (__v2df)_mm_setzero_pd(), \ 6320309124Sdim (__mmask8)-1, (int)(imm), \ 6321341825Sdim (int)(R)) 6322309124Sdim 6323341825Sdim#define _mm_roundscale_sd(A, B, imm) \ 6324309124Sdim (__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \ 6325309124Sdim (__v2df)(__m128d)(B), \ 6326309124Sdim (__v2df)_mm_setzero_pd(), \ 6327309124Sdim (__mmask8)-1, (int)(imm), \ 6328341825Sdim _MM_FROUND_CUR_DIRECTION) 6329309124Sdim 6330341825Sdim#define _mm_mask_roundscale_sd(W, U, A, B, imm) \ 6331309124Sdim (__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \ 6332309124Sdim (__v2df)(__m128d)(B), \ 6333309124Sdim (__v2df)(__m128d)(W), \ 6334309124Sdim (__mmask8)(U), (int)(imm), \ 6335341825Sdim _MM_FROUND_CUR_DIRECTION) 6336309124Sdim 6337341825Sdim#define _mm_mask_roundscale_round_sd(W, U, A, B, I, R) \ 6338309124Sdim (__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \ 6339309124Sdim (__v2df)(__m128d)(B), \ 6340309124Sdim (__v2df)(__m128d)(W), \ 6341309124Sdim (__mmask8)(U), (int)(I), \ 6342341825Sdim (int)(R)) 6343309124Sdim 6344341825Sdim#define _mm_maskz_roundscale_sd(U, A, B, I) \ 6345309124Sdim (__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \ 6346309124Sdim (__v2df)(__m128d)(B), \ 6347309124Sdim (__v2df)_mm_setzero_pd(), \ 6348309124Sdim (__mmask8)(U), (int)(I), \ 6349341825Sdim _MM_FROUND_CUR_DIRECTION) 6350309124Sdim 6351341825Sdim#define _mm_maskz_roundscale_round_sd(U, A, B, I, R) \ 6352309124Sdim (__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \ 6353309124Sdim (__v2df)(__m128d)(B), \ 6354309124Sdim (__v2df)_mm_setzero_pd(), \ 6355309124Sdim (__mmask8)(U), (int)(I), \ 6356341825Sdim (int)(R)) 6357309124Sdim 6358341825Sdim#define _mm_roundscale_round_ss(A, B, imm, R) \ 6359309124Sdim (__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \ 6360309124Sdim (__v4sf)(__m128)(B), \ 6361309124Sdim (__v4sf)_mm_setzero_ps(), \ 6362309124Sdim (__mmask8)-1, (int)(imm), \ 6363341825Sdim (int)(R)) 6364309124Sdim 6365341825Sdim#define _mm_roundscale_ss(A, B, imm) \ 6366309124Sdim (__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \ 6367309124Sdim (__v4sf)(__m128)(B), \ 6368309124Sdim (__v4sf)_mm_setzero_ps(), \ 6369309124Sdim (__mmask8)-1, (int)(imm), \ 6370341825Sdim _MM_FROUND_CUR_DIRECTION) 6371309124Sdim 6372341825Sdim#define _mm_mask_roundscale_ss(W, U, A, B, I) \ 6373309124Sdim (__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \ 6374309124Sdim (__v4sf)(__m128)(B), \ 6375309124Sdim (__v4sf)(__m128)(W), \ 6376309124Sdim (__mmask8)(U), (int)(I), \ 6377341825Sdim _MM_FROUND_CUR_DIRECTION) 6378309124Sdim 6379341825Sdim#define _mm_mask_roundscale_round_ss(W, U, A, B, I, R) \ 6380309124Sdim (__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \ 6381309124Sdim (__v4sf)(__m128)(B), \ 6382309124Sdim (__v4sf)(__m128)(W), \ 6383309124Sdim (__mmask8)(U), (int)(I), \ 6384341825Sdim (int)(R)) 6385309124Sdim 6386341825Sdim#define _mm_maskz_roundscale_ss(U, A, B, I) \ 6387309124Sdim (__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \ 6388309124Sdim (__v4sf)(__m128)(B), \ 6389309124Sdim (__v4sf)_mm_setzero_ps(), \ 6390309124Sdim (__mmask8)(U), (int)(I), \ 6391341825Sdim _MM_FROUND_CUR_DIRECTION) 6392309124Sdim 6393341825Sdim#define _mm_maskz_roundscale_round_ss(U, A, B, I, R) \ 6394309124Sdim (__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \ 6395309124Sdim (__v4sf)(__m128)(B), \ 6396309124Sdim (__v4sf)_mm_setzero_ps(), \ 6397309124Sdim (__mmask8)(U), (int)(I), \ 6398341825Sdim (int)(R)) 6399309124Sdim 6400341825Sdim#define _mm512_scalef_round_pd(A, B, R) \ 6401309124Sdim (__m512d)__builtin_ia32_scalefpd512_mask((__v8df)(__m512d)(A), \ 6402309124Sdim (__v8df)(__m512d)(B), \ 6403309124Sdim (__v8df)_mm512_undefined_pd(), \ 6404341825Sdim (__mmask8)-1, (int)(R)) 6405309124Sdim 6406341825Sdim#define _mm512_mask_scalef_round_pd(W, U, A, B, R) \ 6407309124Sdim (__m512d)__builtin_ia32_scalefpd512_mask((__v8df)(__m512d)(A), \ 6408309124Sdim (__v8df)(__m512d)(B), \ 6409309124Sdim (__v8df)(__m512d)(W), \ 6410341825Sdim (__mmask8)(U), (int)(R)) 6411309124Sdim 6412341825Sdim#define _mm512_maskz_scalef_round_pd(U, A, B, R) \ 6413309124Sdim (__m512d)__builtin_ia32_scalefpd512_mask((__v8df)(__m512d)(A), \ 6414309124Sdim (__v8df)(__m512d)(B), \ 6415309124Sdim (__v8df)_mm512_setzero_pd(), \ 6416341825Sdim (__mmask8)(U), (int)(R)) 6417309124Sdim 6418341825Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS512 6419309124Sdim_mm512_scalef_pd (__m512d __A, __m512d __B) 6420309124Sdim{ 6421309124Sdim return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A, 6422309124Sdim (__v8df) __B, 6423309124Sdim (__v8df) 6424309124Sdim _mm512_undefined_pd (), 6425309124Sdim (__mmask8) -1, 6426309124Sdim _MM_FROUND_CUR_DIRECTION); 6427309124Sdim} 6428309124Sdim 6429341825Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS512 6430309124Sdim_mm512_mask_scalef_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) 6431309124Sdim{ 6432309124Sdim return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A, 6433309124Sdim (__v8df) __B, 6434309124Sdim (__v8df) __W, 6435309124Sdim (__mmask8) __U, 6436309124Sdim _MM_FROUND_CUR_DIRECTION); 6437309124Sdim} 6438309124Sdim 6439341825Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS512 6440309124Sdim_mm512_maskz_scalef_pd (__mmask8 __U, __m512d __A, __m512d __B) 6441309124Sdim{ 6442309124Sdim return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A, 6443309124Sdim (__v8df) __B, 6444309124Sdim (__v8df) 6445309124Sdim _mm512_setzero_pd (), 6446309124Sdim (__mmask8) __U, 6447309124Sdim _MM_FROUND_CUR_DIRECTION); 6448309124Sdim} 6449309124Sdim 6450341825Sdim#define _mm512_scalef_round_ps(A, B, R) \ 6451309124Sdim (__m512)__builtin_ia32_scalefps512_mask((__v16sf)(__m512)(A), \ 6452309124Sdim (__v16sf)(__m512)(B), \ 6453309124Sdim (__v16sf)_mm512_undefined_ps(), \ 6454341825Sdim (__mmask16)-1, (int)(R)) 6455309124Sdim 6456341825Sdim#define _mm512_mask_scalef_round_ps(W, U, A, B, R) \ 6457309124Sdim (__m512)__builtin_ia32_scalefps512_mask((__v16sf)(__m512)(A), \ 6458309124Sdim (__v16sf)(__m512)(B), \ 6459309124Sdim (__v16sf)(__m512)(W), \ 6460341825Sdim (__mmask16)(U), (int)(R)) 6461309124Sdim 6462341825Sdim#define _mm512_maskz_scalef_round_ps(U, A, B, R) \ 6463309124Sdim (__m512)__builtin_ia32_scalefps512_mask((__v16sf)(__m512)(A), \ 6464309124Sdim (__v16sf)(__m512)(B), \ 6465309124Sdim (__v16sf)_mm512_setzero_ps(), \ 6466341825Sdim (__mmask16)(U), (int)(R)) 6467309124Sdim 6468341825Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS512 6469309124Sdim_mm512_scalef_ps (__m512 __A, __m512 __B) 6470309124Sdim{ 6471309124Sdim return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A, 6472309124Sdim (__v16sf) __B, 6473309124Sdim (__v16sf) 6474309124Sdim _mm512_undefined_ps (), 6475309124Sdim (__mmask16) -1, 6476309124Sdim _MM_FROUND_CUR_DIRECTION); 6477309124Sdim} 6478309124Sdim 6479341825Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS512 6480309124Sdim_mm512_mask_scalef_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) 6481309124Sdim{ 6482309124Sdim return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A, 6483309124Sdim (__v16sf) __B, 6484309124Sdim (__v16sf) __W, 6485309124Sdim (__mmask16) __U, 6486309124Sdim _MM_FROUND_CUR_DIRECTION); 6487309124Sdim} 6488309124Sdim 6489341825Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS512 6490309124Sdim_mm512_maskz_scalef_ps (__mmask16 __U, __m512 __A, __m512 __B) 6491309124Sdim{ 6492309124Sdim return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A, 6493309124Sdim (__v16sf) __B, 6494309124Sdim (__v16sf) 6495309124Sdim _mm512_setzero_ps (), 6496309124Sdim (__mmask16) __U, 6497309124Sdim _MM_FROUND_CUR_DIRECTION); 6498309124Sdim} 6499309124Sdim 6500341825Sdim#define _mm_scalef_round_sd(A, B, R) \ 6501309124Sdim (__m128d)__builtin_ia32_scalefsd_round_mask((__v2df)(__m128d)(A), \ 6502309124Sdim (__v2df)(__m128d)(B), \ 6503309124Sdim (__v2df)_mm_setzero_pd(), \ 6504341825Sdim (__mmask8)-1, (int)(R)) 6505309124Sdim 6506341825Sdimstatic __inline__ __m128d __DEFAULT_FN_ATTRS128 6507309124Sdim_mm_scalef_sd (__m128d __A, __m128d __B) 6508309124Sdim{ 6509309124Sdim return (__m128d) __builtin_ia32_scalefsd_round_mask ((__v2df) __A, 6510309124Sdim (__v2df)( __B), (__v2df) _mm_setzero_pd(), 6511309124Sdim (__mmask8) -1, 6512309124Sdim _MM_FROUND_CUR_DIRECTION); 6513309124Sdim} 6514309124Sdim 6515341825Sdimstatic __inline__ __m128d __DEFAULT_FN_ATTRS128 6516309124Sdim_mm_mask_scalef_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) 6517309124Sdim{ 6518309124Sdim return (__m128d) __builtin_ia32_scalefsd_round_mask ( (__v2df) __A, 6519309124Sdim (__v2df) __B, 6520309124Sdim (__v2df) __W, 6521309124Sdim (__mmask8) __U, 6522309124Sdim _MM_FROUND_CUR_DIRECTION); 6523309124Sdim} 6524309124Sdim 6525341825Sdim#define _mm_mask_scalef_round_sd(W, U, A, B, R) \ 6526309124Sdim (__m128d)__builtin_ia32_scalefsd_round_mask((__v2df)(__m128d)(A), \ 6527309124Sdim (__v2df)(__m128d)(B), \ 6528309124Sdim (__v2df)(__m128d)(W), \ 6529341825Sdim (__mmask8)(U), (int)(R)) 6530309124Sdim 6531341825Sdimstatic __inline__ __m128d __DEFAULT_FN_ATTRS128 6532309124Sdim_mm_maskz_scalef_sd (__mmask8 __U, __m128d __A, __m128d __B) 6533309124Sdim{ 6534309124Sdim return (__m128d) __builtin_ia32_scalefsd_round_mask ( (__v2df) __A, 6535309124Sdim (__v2df) __B, 6536309124Sdim (__v2df) _mm_setzero_pd (), 6537309124Sdim (__mmask8) __U, 6538309124Sdim _MM_FROUND_CUR_DIRECTION); 6539309124Sdim} 6540309124Sdim 6541341825Sdim#define _mm_maskz_scalef_round_sd(U, A, B, R) \ 6542309124Sdim (__m128d)__builtin_ia32_scalefsd_round_mask((__v2df)(__m128d)(A), \ 6543309124Sdim (__v2df)(__m128d)(B), \ 6544309124Sdim (__v2df)_mm_setzero_pd(), \ 6545341825Sdim (__mmask8)(U), (int)(R)) 6546309124Sdim 6547341825Sdim#define _mm_scalef_round_ss(A, B, R) \ 6548309124Sdim (__m128)__builtin_ia32_scalefss_round_mask((__v4sf)(__m128)(A), \ 6549309124Sdim (__v4sf)(__m128)(B), \ 6550309124Sdim (__v4sf)_mm_setzero_ps(), \ 6551341825Sdim (__mmask8)-1, (int)(R)) 6552309124Sdim 6553341825Sdimstatic __inline__ __m128 __DEFAULT_FN_ATTRS128 6554309124Sdim_mm_scalef_ss (__m128 __A, __m128 __B) 6555309124Sdim{ 6556309124Sdim return (__m128) __builtin_ia32_scalefss_round_mask ((__v4sf) __A, 6557309124Sdim (__v4sf)( __B), (__v4sf) _mm_setzero_ps(), 6558309124Sdim (__mmask8) -1, 6559309124Sdim _MM_FROUND_CUR_DIRECTION); 6560309124Sdim} 6561309124Sdim 6562341825Sdimstatic __inline__ __m128 __DEFAULT_FN_ATTRS128 6563309124Sdim_mm_mask_scalef_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) 6564309124Sdim{ 6565309124Sdim return (__m128) __builtin_ia32_scalefss_round_mask ( (__v4sf) __A, 6566309124Sdim (__v4sf) __B, 6567309124Sdim (__v4sf) __W, 6568309124Sdim (__mmask8) __U, 6569309124Sdim _MM_FROUND_CUR_DIRECTION); 6570309124Sdim} 6571309124Sdim 6572341825Sdim#define _mm_mask_scalef_round_ss(W, U, A, B, R) \ 6573309124Sdim (__m128)__builtin_ia32_scalefss_round_mask((__v4sf)(__m128)(A), \ 6574309124Sdim (__v4sf)(__m128)(B), \ 6575309124Sdim (__v4sf)(__m128)(W), \ 6576341825Sdim (__mmask8)(U), (int)(R)) 6577309124Sdim 6578341825Sdimstatic __inline__ __m128 __DEFAULT_FN_ATTRS128 6579309124Sdim_mm_maskz_scalef_ss (__mmask8 __U, __m128 __A, __m128 __B) 6580309124Sdim{ 6581309124Sdim return (__m128) __builtin_ia32_scalefss_round_mask ( (__v4sf) __A, 6582309124Sdim (__v4sf) __B, 6583309124Sdim (__v4sf) _mm_setzero_ps (), 6584309124Sdim (__mmask8) __U, 6585309124Sdim _MM_FROUND_CUR_DIRECTION); 6586309124Sdim} 6587309124Sdim 6588341825Sdim#define _mm_maskz_scalef_round_ss(U, A, B, R) \ 6589309124Sdim (__m128)__builtin_ia32_scalefss_round_mask((__v4sf)(__m128)(A), \ 6590309124Sdim (__v4sf)(__m128)(B), \ 6591309124Sdim (__v4sf)_mm_setzero_ps(), \ 6592309124Sdim (__mmask8)(U), \ 6593341825Sdim (int)(R)) 6594309124Sdim 6595341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 6596314564Sdim_mm512_srai_epi32(__m512i __A, int __B) 6597314564Sdim{ 6598314564Sdim return (__m512i)__builtin_ia32_psradi512((__v16si)__A, __B); 6599314564Sdim} 6600309124Sdim 6601341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 6602314564Sdim_mm512_mask_srai_epi32(__m512i __W, __mmask16 __U, __m512i __A, int __B) 6603314564Sdim{ 6604341825Sdim return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, 6605341825Sdim (__v16si)_mm512_srai_epi32(__A, __B), 6606314564Sdim (__v16si)__W); 6607314564Sdim} 6608309124Sdim 6609341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 6610314564Sdim_mm512_maskz_srai_epi32(__mmask16 __U, __m512i __A, int __B) { 6611341825Sdim return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, 6612341825Sdim (__v16si)_mm512_srai_epi32(__A, __B), 6613314564Sdim (__v16si)_mm512_setzero_si512()); 6614314564Sdim} 6615309124Sdim 6616341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 6617314564Sdim_mm512_srai_epi64(__m512i __A, int __B) 6618314564Sdim{ 6619314564Sdim return (__m512i)__builtin_ia32_psraqi512((__v8di)__A, __B); 6620314564Sdim} 6621309124Sdim 6622341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 6623314564Sdim_mm512_mask_srai_epi64(__m512i __W, __mmask8 __U, __m512i __A, int __B) 6624314564Sdim{ 6625341825Sdim return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, 6626341825Sdim (__v8di)_mm512_srai_epi64(__A, __B), 6627314564Sdim (__v8di)__W); 6628314564Sdim} 6629309124Sdim 6630341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 6631314564Sdim_mm512_maskz_srai_epi64(__mmask8 __U, __m512i __A, int __B) 6632314564Sdim{ 6633341825Sdim return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, 6634341825Sdim (__v8di)_mm512_srai_epi64(__A, __B), 6635314564Sdim (__v8di)_mm512_setzero_si512()); 6636314564Sdim} 6637309124Sdim 6638341825Sdim#define _mm512_shuffle_f32x4(A, B, imm) \ 6639341825Sdim (__m512)__builtin_ia32_shuf_f32x4((__v16sf)(__m512)(A), \ 6640341825Sdim (__v16sf)(__m512)(B), (int)(imm)) 6641309124Sdim 6642341825Sdim#define _mm512_mask_shuffle_f32x4(W, U, A, B, imm) \ 6643327952Sdim (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ 6644327952Sdim (__v16sf)_mm512_shuffle_f32x4((A), (B), (imm)), \ 6645341825Sdim (__v16sf)(__m512)(W)) 6646309124Sdim 6647341825Sdim#define _mm512_maskz_shuffle_f32x4(U, A, B, imm) \ 6648327952Sdim (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ 6649327952Sdim (__v16sf)_mm512_shuffle_f32x4((A), (B), (imm)), \ 6650341825Sdim (__v16sf)_mm512_setzero_ps()) 6651309124Sdim 6652341825Sdim#define _mm512_shuffle_f64x2(A, B, imm) \ 6653341825Sdim (__m512d)__builtin_ia32_shuf_f64x2((__v8df)(__m512d)(A), \ 6654341825Sdim (__v8df)(__m512d)(B), (int)(imm)) 6655309124Sdim 6656341825Sdim#define _mm512_mask_shuffle_f64x2(W, U, A, B, imm) \ 6657327952Sdim (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ 6658327952Sdim (__v8df)_mm512_shuffle_f64x2((A), (B), (imm)), \ 6659341825Sdim (__v8df)(__m512d)(W)) 6660309124Sdim 6661341825Sdim#define _mm512_maskz_shuffle_f64x2(U, A, B, imm) \ 6662327952Sdim (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ 6663327952Sdim (__v8df)_mm512_shuffle_f64x2((A), (B), (imm)), \ 6664341825Sdim (__v8df)_mm512_setzero_pd()) 6665309124Sdim 6666341825Sdim#define _mm512_shuffle_i32x4(A, B, imm) \ 6667341825Sdim (__m512i)__builtin_ia32_shuf_i32x4((__v16si)(__m512i)(A), \ 6668341825Sdim (__v16si)(__m512i)(B), (int)(imm)) 6669309124Sdim 6670341825Sdim#define _mm512_mask_shuffle_i32x4(W, U, A, B, imm) \ 6671327952Sdim (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \ 6672327952Sdim (__v16si)_mm512_shuffle_i32x4((A), (B), (imm)), \ 6673341825Sdim (__v16si)(__m512i)(W)) 6674309124Sdim 6675341825Sdim#define _mm512_maskz_shuffle_i32x4(U, A, B, imm) \ 6676327952Sdim (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \ 6677327952Sdim (__v16si)_mm512_shuffle_i32x4((A), (B), (imm)), \ 6678341825Sdim (__v16si)_mm512_setzero_si512()) 6679309124Sdim 6680341825Sdim#define _mm512_shuffle_i64x2(A, B, imm) \ 6681341825Sdim (__m512i)__builtin_ia32_shuf_i64x2((__v8di)(__m512i)(A), \ 6682341825Sdim (__v8di)(__m512i)(B), (int)(imm)) 6683309124Sdim 6684341825Sdim#define _mm512_mask_shuffle_i64x2(W, U, A, B, imm) \ 6685327952Sdim (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \ 6686327952Sdim (__v8di)_mm512_shuffle_i64x2((A), (B), (imm)), \ 6687341825Sdim (__v8di)(__m512i)(W)) 6688309124Sdim 6689341825Sdim#define _mm512_maskz_shuffle_i64x2(U, A, B, imm) \ 6690327952Sdim (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \ 6691327952Sdim (__v8di)_mm512_shuffle_i64x2((A), (B), (imm)), \ 6692341825Sdim (__v8di)_mm512_setzero_si512()) 6693309124Sdim 6694341825Sdim#define _mm512_shuffle_pd(A, B, M) \ 6695341825Sdim (__m512d)__builtin_ia32_shufpd512((__v8df)(__m512d)(A), \ 6696341825Sdim (__v8df)(__m512d)(B), (int)(M)) 6697309124Sdim 6698341825Sdim#define _mm512_mask_shuffle_pd(W, U, A, B, M) \ 6699309124Sdim (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ 6700309124Sdim (__v8df)_mm512_shuffle_pd((A), (B), (M)), \ 6701341825Sdim (__v8df)(__m512d)(W)) 6702309124Sdim 6703341825Sdim#define _mm512_maskz_shuffle_pd(U, A, B, M) \ 6704309124Sdim (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ 6705309124Sdim (__v8df)_mm512_shuffle_pd((A), (B), (M)), \ 6706341825Sdim (__v8df)_mm512_setzero_pd()) 6707309124Sdim 6708341825Sdim#define _mm512_shuffle_ps(A, B, M) \ 6709341825Sdim (__m512)__builtin_ia32_shufps512((__v16sf)(__m512)(A), \ 6710341825Sdim (__v16sf)(__m512)(B), (int)(M)) 6711309124Sdim 6712341825Sdim#define _mm512_mask_shuffle_ps(W, U, A, B, M) \ 6713309124Sdim (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ 6714309124Sdim (__v16sf)_mm512_shuffle_ps((A), (B), (M)), \ 6715341825Sdim (__v16sf)(__m512)(W)) 6716309124Sdim 6717341825Sdim#define _mm512_maskz_shuffle_ps(U, A, B, M) \ 6718309124Sdim (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ 6719309124Sdim (__v16sf)_mm512_shuffle_ps((A), (B), (M)), \ 6720341825Sdim (__v16sf)_mm512_setzero_ps()) 6721309124Sdim 6722341825Sdim#define _mm_sqrt_round_sd(A, B, R) \ 6723309124Sdim (__m128d)__builtin_ia32_sqrtsd_round_mask((__v2df)(__m128d)(A), \ 6724309124Sdim (__v2df)(__m128d)(B), \ 6725309124Sdim (__v2df)_mm_setzero_pd(), \ 6726341825Sdim (__mmask8)-1, (int)(R)) 6727309124Sdim 6728341825Sdimstatic __inline__ __m128d __DEFAULT_FN_ATTRS128 6729309124Sdim_mm_mask_sqrt_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) 6730309124Sdim{ 6731309124Sdim return (__m128d) __builtin_ia32_sqrtsd_round_mask ( (__v2df) __A, 6732309124Sdim (__v2df) __B, 6733309124Sdim (__v2df) __W, 6734309124Sdim (__mmask8) __U, 6735309124Sdim _MM_FROUND_CUR_DIRECTION); 6736309124Sdim} 6737309124Sdim 6738341825Sdim#define _mm_mask_sqrt_round_sd(W, U, A, B, R) \ 6739309124Sdim (__m128d)__builtin_ia32_sqrtsd_round_mask((__v2df)(__m128d)(A), \ 6740309124Sdim (__v2df)(__m128d)(B), \ 6741309124Sdim (__v2df)(__m128d)(W), \ 6742341825Sdim (__mmask8)(U), (int)(R)) 6743309124Sdim 6744341825Sdimstatic __inline__ __m128d __DEFAULT_FN_ATTRS128 6745309124Sdim_mm_maskz_sqrt_sd (__mmask8 __U, __m128d __A, __m128d __B) 6746309124Sdim{ 6747309124Sdim return (__m128d) __builtin_ia32_sqrtsd_round_mask ( (__v2df) __A, 6748309124Sdim (__v2df) __B, 6749309124Sdim (__v2df) _mm_setzero_pd (), 6750309124Sdim (__mmask8) __U, 6751309124Sdim _MM_FROUND_CUR_DIRECTION); 6752309124Sdim} 6753309124Sdim 6754341825Sdim#define _mm_maskz_sqrt_round_sd(U, A, B, R) \ 6755309124Sdim (__m128d)__builtin_ia32_sqrtsd_round_mask((__v2df)(__m128d)(A), \ 6756309124Sdim (__v2df)(__m128d)(B), \ 6757309124Sdim (__v2df)_mm_setzero_pd(), \ 6758341825Sdim (__mmask8)(U), (int)(R)) 6759309124Sdim 6760341825Sdim#define _mm_sqrt_round_ss(A, B, R) \ 6761309124Sdim (__m128)__builtin_ia32_sqrtss_round_mask((__v4sf)(__m128)(A), \ 6762309124Sdim (__v4sf)(__m128)(B), \ 6763309124Sdim (__v4sf)_mm_setzero_ps(), \ 6764341825Sdim (__mmask8)-1, (int)(R)) 6765309124Sdim 6766341825Sdimstatic __inline__ __m128 __DEFAULT_FN_ATTRS128 6767309124Sdim_mm_mask_sqrt_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) 6768309124Sdim{ 6769309124Sdim return (__m128) __builtin_ia32_sqrtss_round_mask ( (__v4sf) __A, 6770309124Sdim (__v4sf) __B, 6771309124Sdim (__v4sf) __W, 6772309124Sdim (__mmask8) __U, 6773309124Sdim _MM_FROUND_CUR_DIRECTION); 6774309124Sdim} 6775309124Sdim 6776341825Sdim#define _mm_mask_sqrt_round_ss(W, U, A, B, R) \ 6777309124Sdim (__m128)__builtin_ia32_sqrtss_round_mask((__v4sf)(__m128)(A), \ 6778309124Sdim (__v4sf)(__m128)(B), \ 6779309124Sdim (__v4sf)(__m128)(W), (__mmask8)(U), \ 6780341825Sdim (int)(R)) 6781309124Sdim 6782341825Sdimstatic __inline__ __m128 __DEFAULT_FN_ATTRS128 6783309124Sdim_mm_maskz_sqrt_ss (__mmask8 __U, __m128 __A, __m128 __B) 6784309124Sdim{ 6785309124Sdim return (__m128) __builtin_ia32_sqrtss_round_mask ( (__v4sf) __A, 6786309124Sdim (__v4sf) __B, 6787309124Sdim (__v4sf) _mm_setzero_ps (), 6788309124Sdim (__mmask8) __U, 6789309124Sdim _MM_FROUND_CUR_DIRECTION); 6790309124Sdim} 6791309124Sdim 6792341825Sdim#define _mm_maskz_sqrt_round_ss(U, A, B, R) \ 6793309124Sdim (__m128)__builtin_ia32_sqrtss_round_mask((__v4sf)(__m128)(A), \ 6794309124Sdim (__v4sf)(__m128)(B), \ 6795309124Sdim (__v4sf)_mm_setzero_ps(), \ 6796341825Sdim (__mmask8)(U), (int)(R)) 6797309124Sdim 6798341825Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS512 6799321369Sdim_mm512_broadcast_f32x4(__m128 __A) 6800309124Sdim{ 6801321369Sdim return (__m512)__builtin_shufflevector((__v4sf)__A, (__v4sf)__A, 6802321369Sdim 0, 1, 2, 3, 0, 1, 2, 3, 6803321369Sdim 0, 1, 2, 3, 0, 1, 2, 3); 6804309124Sdim} 6805309124Sdim 6806341825Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS512 6807321369Sdim_mm512_mask_broadcast_f32x4(__m512 __O, __mmask16 __M, __m128 __A) 6808309124Sdim{ 6809321369Sdim return (__m512)__builtin_ia32_selectps_512((__mmask16)__M, 6810321369Sdim (__v16sf)_mm512_broadcast_f32x4(__A), 6811321369Sdim (__v16sf)__O); 6812309124Sdim} 6813309124Sdim 6814341825Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS512 6815321369Sdim_mm512_maskz_broadcast_f32x4(__mmask16 __M, __m128 __A) 6816309124Sdim{ 6817321369Sdim return (__m512)__builtin_ia32_selectps_512((__mmask16)__M, 6818321369Sdim (__v16sf)_mm512_broadcast_f32x4(__A), 6819321369Sdim (__v16sf)_mm512_setzero_ps()); 6820309124Sdim} 6821309124Sdim 6822341825Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS512 6823321369Sdim_mm512_broadcast_f64x4(__m256d __A) 6824309124Sdim{ 6825321369Sdim return (__m512d)__builtin_shufflevector((__v4df)__A, (__v4df)__A, 6826321369Sdim 0, 1, 2, 3, 0, 1, 2, 3); 6827309124Sdim} 6828309124Sdim 6829341825Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS512 6830321369Sdim_mm512_mask_broadcast_f64x4(__m512d __O, __mmask8 __M, __m256d __A) 6831309124Sdim{ 6832321369Sdim return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__M, 6833321369Sdim (__v8df)_mm512_broadcast_f64x4(__A), 6834321369Sdim (__v8df)__O); 6835309124Sdim} 6836309124Sdim 6837341825Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS512 6838321369Sdim_mm512_maskz_broadcast_f64x4(__mmask8 __M, __m256d __A) 6839309124Sdim{ 6840321369Sdim return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__M, 6841321369Sdim (__v8df)_mm512_broadcast_f64x4(__A), 6842321369Sdim (__v8df)_mm512_setzero_pd()); 6843309124Sdim} 6844309124Sdim 6845341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 6846321369Sdim_mm512_broadcast_i32x4(__m128i __A) 6847309124Sdim{ 6848321369Sdim return (__m512i)__builtin_shufflevector((__v4si)__A, (__v4si)__A, 6849321369Sdim 0, 1, 2, 3, 0, 1, 2, 3, 6850321369Sdim 0, 1, 2, 3, 0, 1, 2, 3); 6851309124Sdim} 6852309124Sdim 6853341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 6854321369Sdim_mm512_mask_broadcast_i32x4(__m512i __O, __mmask16 __M, __m128i __A) 6855309124Sdim{ 6856321369Sdim return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M, 6857321369Sdim (__v16si)_mm512_broadcast_i32x4(__A), 6858321369Sdim (__v16si)__O); 6859309124Sdim} 6860309124Sdim 6861341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 6862321369Sdim_mm512_maskz_broadcast_i32x4(__mmask16 __M, __m128i __A) 6863309124Sdim{ 6864321369Sdim return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M, 6865321369Sdim (__v16si)_mm512_broadcast_i32x4(__A), 6866321369Sdim (__v16si)_mm512_setzero_si512()); 6867309124Sdim} 6868309124Sdim 6869341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 6870321369Sdim_mm512_broadcast_i64x4(__m256i __A) 6871309124Sdim{ 6872321369Sdim return (__m512i)__builtin_shufflevector((__v4di)__A, (__v4di)__A, 6873321369Sdim 0, 1, 2, 3, 0, 1, 2, 3); 6874309124Sdim} 6875309124Sdim 6876341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 6877321369Sdim_mm512_mask_broadcast_i64x4(__m512i __O, __mmask8 __M, __m256i __A) 6878309124Sdim{ 6879321369Sdim return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M, 6880321369Sdim (__v8di)_mm512_broadcast_i64x4(__A), 6881321369Sdim (__v8di)__O); 6882309124Sdim} 6883309124Sdim 6884341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 6885321369Sdim_mm512_maskz_broadcast_i64x4(__mmask8 __M, __m256i __A) 6886309124Sdim{ 6887321369Sdim return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M, 6888321369Sdim (__v8di)_mm512_broadcast_i64x4(__A), 6889321369Sdim (__v8di)_mm512_setzero_si512()); 6890309124Sdim} 6891309124Sdim 6892341825Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS512 6893309124Sdim_mm512_mask_broadcastsd_pd (__m512d __O, __mmask8 __M, __m128d __A) 6894309124Sdim{ 6895309124Sdim return (__m512d)__builtin_ia32_selectpd_512(__M, 6896309124Sdim (__v8df) _mm512_broadcastsd_pd(__A), 6897309124Sdim (__v8df) __O); 6898309124Sdim} 6899309124Sdim 6900341825Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS512 6901309124Sdim_mm512_maskz_broadcastsd_pd (__mmask8 __M, __m128d __A) 6902309124Sdim{ 6903309124Sdim return (__m512d)__builtin_ia32_selectpd_512(__M, 6904309124Sdim (__v8df) _mm512_broadcastsd_pd(__A), 6905309124Sdim (__v8df) _mm512_setzero_pd()); 6906309124Sdim} 6907309124Sdim 6908341825Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS512 6909309124Sdim_mm512_mask_broadcastss_ps (__m512 __O, __mmask16 __M, __m128 __A) 6910309124Sdim{ 6911309124Sdim return (__m512)__builtin_ia32_selectps_512(__M, 6912309124Sdim (__v16sf) _mm512_broadcastss_ps(__A), 6913309124Sdim (__v16sf) __O); 6914309124Sdim} 6915309124Sdim 6916341825Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS512 6917309124Sdim_mm512_maskz_broadcastss_ps (__mmask16 __M, __m128 __A) 6918309124Sdim{ 6919309124Sdim return (__m512)__builtin_ia32_selectps_512(__M, 6920309124Sdim (__v16sf) _mm512_broadcastss_ps(__A), 6921309124Sdim (__v16sf) _mm512_setzero_ps()); 6922309124Sdim} 6923309124Sdim 6924341825Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS512 6925309124Sdim_mm512_cvtsepi32_epi8 (__m512i __A) 6926309124Sdim{ 6927309124Sdim return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A, 6928309124Sdim (__v16qi) _mm_undefined_si128 (), 6929309124Sdim (__mmask16) -1); 6930309124Sdim} 6931309124Sdim 6932341825Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS512 6933309124Sdim_mm512_mask_cvtsepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A) 6934309124Sdim{ 6935309124Sdim return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A, 6936309124Sdim (__v16qi) __O, __M); 6937309124Sdim} 6938309124Sdim 6939341825Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS512 6940309124Sdim_mm512_maskz_cvtsepi32_epi8 (__mmask16 __M, __m512i __A) 6941309124Sdim{ 6942309124Sdim return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A, 6943309124Sdim (__v16qi) _mm_setzero_si128 (), 6944309124Sdim __M); 6945309124Sdim} 6946309124Sdim 6947341825Sdimstatic __inline__ void __DEFAULT_FN_ATTRS512 6948309124Sdim_mm512_mask_cvtsepi32_storeu_epi8 (void * __P, __mmask16 __M, __m512i __A) 6949309124Sdim{ 6950309124Sdim __builtin_ia32_pmovsdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M); 6951309124Sdim} 6952309124Sdim 6953341825Sdimstatic __inline__ __m256i __DEFAULT_FN_ATTRS512 6954309124Sdim_mm512_cvtsepi32_epi16 (__m512i __A) 6955309124Sdim{ 6956309124Sdim return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A, 6957309124Sdim (__v16hi) _mm256_undefined_si256 (), 6958309124Sdim (__mmask16) -1); 6959309124Sdim} 6960309124Sdim 6961341825Sdimstatic __inline__ __m256i __DEFAULT_FN_ATTRS512 6962309124Sdim_mm512_mask_cvtsepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A) 6963309124Sdim{ 6964309124Sdim return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A, 6965309124Sdim (__v16hi) __O, __M); 6966309124Sdim} 6967309124Sdim 6968341825Sdimstatic __inline__ __m256i __DEFAULT_FN_ATTRS512 6969309124Sdim_mm512_maskz_cvtsepi32_epi16 (__mmask16 __M, __m512i __A) 6970309124Sdim{ 6971309124Sdim return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A, 6972309124Sdim (__v16hi) _mm256_setzero_si256 (), 6973309124Sdim __M); 6974309124Sdim} 6975309124Sdim 6976341825Sdimstatic __inline__ void __DEFAULT_FN_ATTRS512 6977309124Sdim_mm512_mask_cvtsepi32_storeu_epi16 (void *__P, __mmask16 __M, __m512i __A) 6978309124Sdim{ 6979309124Sdim __builtin_ia32_pmovsdw512mem_mask ((__v16hi*) __P, (__v16si) __A, __M); 6980309124Sdim} 6981309124Sdim 6982341825Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS512 6983309124Sdim_mm512_cvtsepi64_epi8 (__m512i __A) 6984309124Sdim{ 6985309124Sdim return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A, 6986309124Sdim (__v16qi) _mm_undefined_si128 (), 6987309124Sdim (__mmask8) -1); 6988309124Sdim} 6989309124Sdim 6990341825Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS512 6991309124Sdim_mm512_mask_cvtsepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A) 6992309124Sdim{ 6993309124Sdim return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A, 6994309124Sdim (__v16qi) __O, __M); 6995309124Sdim} 6996309124Sdim 6997341825Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS512 6998309124Sdim_mm512_maskz_cvtsepi64_epi8 (__mmask8 __M, __m512i __A) 6999309124Sdim{ 7000309124Sdim return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A, 7001309124Sdim (__v16qi) _mm_setzero_si128 (), 7002309124Sdim __M); 7003309124Sdim} 7004309124Sdim 7005341825Sdimstatic __inline__ void __DEFAULT_FN_ATTRS512 7006309124Sdim_mm512_mask_cvtsepi64_storeu_epi8 (void * __P, __mmask8 __M, __m512i __A) 7007309124Sdim{ 7008309124Sdim __builtin_ia32_pmovsqb512mem_mask ((__v16qi *) __P, (__v8di) __A, __M); 7009309124Sdim} 7010309124Sdim 7011341825Sdimstatic __inline__ __m256i __DEFAULT_FN_ATTRS512 7012309124Sdim_mm512_cvtsepi64_epi32 (__m512i __A) 7013309124Sdim{ 7014309124Sdim return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A, 7015309124Sdim (__v8si) _mm256_undefined_si256 (), 7016309124Sdim (__mmask8) -1); 7017309124Sdim} 7018309124Sdim 7019341825Sdimstatic __inline__ __m256i __DEFAULT_FN_ATTRS512 7020309124Sdim_mm512_mask_cvtsepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A) 7021309124Sdim{ 7022309124Sdim return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A, 7023309124Sdim (__v8si) __O, __M); 7024309124Sdim} 7025309124Sdim 7026341825Sdimstatic __inline__ __m256i __DEFAULT_FN_ATTRS512 7027309124Sdim_mm512_maskz_cvtsepi64_epi32 (__mmask8 __M, __m512i __A) 7028309124Sdim{ 7029309124Sdim return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A, 7030309124Sdim (__v8si) _mm256_setzero_si256 (), 7031309124Sdim __M); 7032309124Sdim} 7033309124Sdim 7034341825Sdimstatic __inline__ void __DEFAULT_FN_ATTRS512 7035309124Sdim_mm512_mask_cvtsepi64_storeu_epi32 (void *__P, __mmask8 __M, __m512i __A) 7036309124Sdim{ 7037309124Sdim __builtin_ia32_pmovsqd512mem_mask ((__v8si *) __P, (__v8di) __A, __M); 7038309124Sdim} 7039309124Sdim 7040341825Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS512 7041309124Sdim_mm512_cvtsepi64_epi16 (__m512i __A) 7042309124Sdim{ 7043309124Sdim return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A, 7044309124Sdim (__v8hi) _mm_undefined_si128 (), 7045309124Sdim (__mmask8) -1); 7046309124Sdim} 7047309124Sdim 7048341825Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS512 7049309124Sdim_mm512_mask_cvtsepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A) 7050309124Sdim{ 7051309124Sdim return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A, 7052309124Sdim (__v8hi) __O, __M); 7053309124Sdim} 7054309124Sdim 7055341825Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS512 7056309124Sdim_mm512_maskz_cvtsepi64_epi16 (__mmask8 __M, __m512i __A) 7057309124Sdim{ 7058309124Sdim return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A, 7059309124Sdim (__v8hi) _mm_setzero_si128 (), 7060309124Sdim __M); 7061309124Sdim} 7062309124Sdim 7063341825Sdimstatic __inline__ void __DEFAULT_FN_ATTRS512 7064309124Sdim_mm512_mask_cvtsepi64_storeu_epi16 (void * __P, __mmask8 __M, __m512i __A) 7065309124Sdim{ 7066309124Sdim __builtin_ia32_pmovsqw512mem_mask ((__v8hi *) __P, (__v8di) __A, __M); 7067309124Sdim} 7068309124Sdim 7069341825Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS512 7070309124Sdim_mm512_cvtusepi32_epi8 (__m512i __A) 7071309124Sdim{ 7072309124Sdim return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A, 7073309124Sdim (__v16qi) _mm_undefined_si128 (), 7074309124Sdim (__mmask16) -1); 7075309124Sdim} 7076309124Sdim 7077341825Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS512 7078309124Sdim_mm512_mask_cvtusepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A) 7079309124Sdim{ 7080309124Sdim return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A, 7081309124Sdim (__v16qi) __O, 7082309124Sdim __M); 7083309124Sdim} 7084309124Sdim 7085341825Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS512 7086309124Sdim_mm512_maskz_cvtusepi32_epi8 (__mmask16 __M, __m512i __A) 7087309124Sdim{ 7088309124Sdim return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A, 7089309124Sdim (__v16qi) _mm_setzero_si128 (), 7090309124Sdim __M); 7091309124Sdim} 7092309124Sdim 7093341825Sdimstatic __inline__ void __DEFAULT_FN_ATTRS512 7094309124Sdim_mm512_mask_cvtusepi32_storeu_epi8 (void * __P, __mmask16 __M, __m512i __A) 7095309124Sdim{ 7096309124Sdim __builtin_ia32_pmovusdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M); 7097309124Sdim} 7098309124Sdim 7099341825Sdimstatic __inline__ __m256i __DEFAULT_FN_ATTRS512 7100309124Sdim_mm512_cvtusepi32_epi16 (__m512i __A) 7101309124Sdim{ 7102309124Sdim return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A, 7103309124Sdim (__v16hi) _mm256_undefined_si256 (), 7104309124Sdim (__mmask16) -1); 7105309124Sdim} 7106309124Sdim 7107341825Sdimstatic __inline__ __m256i __DEFAULT_FN_ATTRS512 7108309124Sdim_mm512_mask_cvtusepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A) 7109309124Sdim{ 7110309124Sdim return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A, 7111309124Sdim (__v16hi) __O, 7112309124Sdim __M); 7113309124Sdim} 7114309124Sdim 7115341825Sdimstatic __inline__ __m256i __DEFAULT_FN_ATTRS512 7116309124Sdim_mm512_maskz_cvtusepi32_epi16 (__mmask16 __M, __m512i __A) 7117309124Sdim{ 7118309124Sdim return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A, 7119309124Sdim (__v16hi) _mm256_setzero_si256 (), 7120309124Sdim __M); 7121309124Sdim} 7122309124Sdim 7123341825Sdimstatic __inline__ void __DEFAULT_FN_ATTRS512 7124309124Sdim_mm512_mask_cvtusepi32_storeu_epi16 (void *__P, __mmask16 __M, __m512i __A) 7125309124Sdim{ 7126309124Sdim __builtin_ia32_pmovusdw512mem_mask ((__v16hi*) __P, (__v16si) __A, __M); 7127309124Sdim} 7128309124Sdim 7129341825Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS512 7130309124Sdim_mm512_cvtusepi64_epi8 (__m512i __A) 7131309124Sdim{ 7132309124Sdim return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A, 7133309124Sdim (__v16qi) _mm_undefined_si128 (), 7134309124Sdim (__mmask8) -1); 7135309124Sdim} 7136309124Sdim 7137341825Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS512 7138309124Sdim_mm512_mask_cvtusepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A) 7139309124Sdim{ 7140309124Sdim return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A, 7141309124Sdim (__v16qi) __O, 7142309124Sdim __M); 7143309124Sdim} 7144309124Sdim 7145341825Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS512 7146309124Sdim_mm512_maskz_cvtusepi64_epi8 (__mmask8 __M, __m512i __A) 7147309124Sdim{ 7148309124Sdim return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A, 7149309124Sdim (__v16qi) _mm_setzero_si128 (), 7150309124Sdim __M); 7151309124Sdim} 7152309124Sdim 7153341825Sdimstatic __inline__ void __DEFAULT_FN_ATTRS512 7154309124Sdim_mm512_mask_cvtusepi64_storeu_epi8 (void * __P, __mmask8 __M, __m512i __A) 7155309124Sdim{ 7156309124Sdim __builtin_ia32_pmovusqb512mem_mask ((__v16qi *) __P, (__v8di) __A, __M); 7157309124Sdim} 7158309124Sdim 7159341825Sdimstatic __inline__ __m256i __DEFAULT_FN_ATTRS512 7160309124Sdim_mm512_cvtusepi64_epi32 (__m512i __A) 7161309124Sdim{ 7162309124Sdim return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A, 7163309124Sdim (__v8si) _mm256_undefined_si256 (), 7164309124Sdim (__mmask8) -1); 7165309124Sdim} 7166309124Sdim 7167341825Sdimstatic __inline__ __m256i __DEFAULT_FN_ATTRS512 7168309124Sdim_mm512_mask_cvtusepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A) 7169309124Sdim{ 7170309124Sdim return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A, 7171309124Sdim (__v8si) __O, __M); 7172309124Sdim} 7173309124Sdim 7174341825Sdimstatic __inline__ __m256i __DEFAULT_FN_ATTRS512 7175309124Sdim_mm512_maskz_cvtusepi64_epi32 (__mmask8 __M, __m512i __A) 7176309124Sdim{ 7177309124Sdim return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A, 7178309124Sdim (__v8si) _mm256_setzero_si256 (), 7179309124Sdim __M); 7180309124Sdim} 7181309124Sdim 7182341825Sdimstatic __inline__ void __DEFAULT_FN_ATTRS512 7183309124Sdim_mm512_mask_cvtusepi64_storeu_epi32 (void* __P, __mmask8 __M, __m512i __A) 7184309124Sdim{ 7185309124Sdim __builtin_ia32_pmovusqd512mem_mask ((__v8si*) __P, (__v8di) __A, __M); 7186309124Sdim} 7187309124Sdim 7188341825Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS512 7189309124Sdim_mm512_cvtusepi64_epi16 (__m512i __A) 7190309124Sdim{ 7191309124Sdim return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A, 7192309124Sdim (__v8hi) _mm_undefined_si128 (), 7193309124Sdim (__mmask8) -1); 7194309124Sdim} 7195309124Sdim 7196341825Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS512 7197309124Sdim_mm512_mask_cvtusepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A) 7198309124Sdim{ 7199309124Sdim return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A, 7200309124Sdim (__v8hi) __O, __M); 7201309124Sdim} 7202309124Sdim 7203341825Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS512 7204309124Sdim_mm512_maskz_cvtusepi64_epi16 (__mmask8 __M, __m512i __A) 7205309124Sdim{ 7206309124Sdim return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A, 7207309124Sdim (__v8hi) _mm_setzero_si128 (), 7208309124Sdim __M); 7209309124Sdim} 7210309124Sdim 7211341825Sdimstatic __inline__ void __DEFAULT_FN_ATTRS512 7212309124Sdim_mm512_mask_cvtusepi64_storeu_epi16 (void *__P, __mmask8 __M, __m512i __A) 7213309124Sdim{ 7214309124Sdim __builtin_ia32_pmovusqw512mem_mask ((__v8hi*) __P, (__v8di) __A, __M); 7215309124Sdim} 7216309124Sdim 7217341825Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS512 7218309124Sdim_mm512_cvtepi32_epi8 (__m512i __A) 7219309124Sdim{ 7220309124Sdim return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A, 7221309124Sdim (__v16qi) _mm_undefined_si128 (), 7222309124Sdim (__mmask16) -1); 7223309124Sdim} 7224309124Sdim 7225341825Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS512 7226309124Sdim_mm512_mask_cvtepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A) 7227309124Sdim{ 7228309124Sdim return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A, 7229309124Sdim (__v16qi) __O, __M); 7230309124Sdim} 7231309124Sdim 7232341825Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS512 7233309124Sdim_mm512_maskz_cvtepi32_epi8 (__mmask16 __M, __m512i __A) 7234309124Sdim{ 7235309124Sdim return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A, 7236309124Sdim (__v16qi) _mm_setzero_si128 (), 7237309124Sdim __M); 7238309124Sdim} 7239309124Sdim 7240341825Sdimstatic __inline__ void __DEFAULT_FN_ATTRS512 7241309124Sdim_mm512_mask_cvtepi32_storeu_epi8 (void * __P, __mmask16 __M, __m512i __A) 7242309124Sdim{ 7243309124Sdim __builtin_ia32_pmovdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M); 7244309124Sdim} 7245309124Sdim 7246341825Sdimstatic __inline__ __m256i __DEFAULT_FN_ATTRS512 7247309124Sdim_mm512_cvtepi32_epi16 (__m512i __A) 7248309124Sdim{ 7249309124Sdim return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A, 7250309124Sdim (__v16hi) _mm256_undefined_si256 (), 7251309124Sdim (__mmask16) -1); 7252309124Sdim} 7253309124Sdim 7254341825Sdimstatic __inline__ __m256i __DEFAULT_FN_ATTRS512 7255309124Sdim_mm512_mask_cvtepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A) 7256309124Sdim{ 7257309124Sdim return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A, 7258309124Sdim (__v16hi) __O, __M); 7259309124Sdim} 7260309124Sdim 7261341825Sdimstatic __inline__ __m256i __DEFAULT_FN_ATTRS512 7262309124Sdim_mm512_maskz_cvtepi32_epi16 (__mmask16 __M, __m512i __A) 7263309124Sdim{ 7264309124Sdim return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A, 7265309124Sdim (__v16hi) _mm256_setzero_si256 (), 7266309124Sdim __M); 7267309124Sdim} 7268309124Sdim 7269341825Sdimstatic __inline__ void __DEFAULT_FN_ATTRS512 7270309124Sdim_mm512_mask_cvtepi32_storeu_epi16 (void * __P, __mmask16 __M, __m512i __A) 7271309124Sdim{ 7272309124Sdim __builtin_ia32_pmovdw512mem_mask ((__v16hi *) __P, (__v16si) __A, __M); 7273309124Sdim} 7274309124Sdim 7275341825Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS512 7276309124Sdim_mm512_cvtepi64_epi8 (__m512i __A) 7277309124Sdim{ 7278309124Sdim return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A, 7279309124Sdim (__v16qi) _mm_undefined_si128 (), 7280309124Sdim (__mmask8) -1); 7281309124Sdim} 7282309124Sdim 7283341825Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS512 7284309124Sdim_mm512_mask_cvtepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A) 7285309124Sdim{ 7286309124Sdim return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A, 7287309124Sdim (__v16qi) __O, __M); 7288309124Sdim} 7289309124Sdim 7290341825Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS512 7291309124Sdim_mm512_maskz_cvtepi64_epi8 (__mmask8 __M, __m512i __A) 7292309124Sdim{ 7293309124Sdim return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A, 7294309124Sdim (__v16qi) _mm_setzero_si128 (), 7295309124Sdim __M); 7296309124Sdim} 7297309124Sdim 7298341825Sdimstatic __inline__ void __DEFAULT_FN_ATTRS512 7299309124Sdim_mm512_mask_cvtepi64_storeu_epi8 (void * __P, __mmask8 __M, __m512i __A) 7300309124Sdim{ 7301309124Sdim __builtin_ia32_pmovqb512mem_mask ((__v16qi *) __P, (__v8di) __A, __M); 7302309124Sdim} 7303309124Sdim 7304341825Sdimstatic __inline__ __m256i __DEFAULT_FN_ATTRS512 7305309124Sdim_mm512_cvtepi64_epi32 (__m512i __A) 7306309124Sdim{ 7307309124Sdim return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A, 7308309124Sdim (__v8si) _mm256_undefined_si256 (), 7309309124Sdim (__mmask8) -1); 7310309124Sdim} 7311309124Sdim 7312341825Sdimstatic __inline__ __m256i __DEFAULT_FN_ATTRS512 7313309124Sdim_mm512_mask_cvtepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A) 7314309124Sdim{ 7315309124Sdim return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A, 7316309124Sdim (__v8si) __O, __M); 7317309124Sdim} 7318309124Sdim 7319341825Sdimstatic __inline__ __m256i __DEFAULT_FN_ATTRS512 7320309124Sdim_mm512_maskz_cvtepi64_epi32 (__mmask8 __M, __m512i __A) 7321309124Sdim{ 7322309124Sdim return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A, 7323309124Sdim (__v8si) _mm256_setzero_si256 (), 7324309124Sdim __M); 7325309124Sdim} 7326309124Sdim 7327341825Sdimstatic __inline__ void __DEFAULT_FN_ATTRS512 7328309124Sdim_mm512_mask_cvtepi64_storeu_epi32 (void* __P, __mmask8 __M, __m512i __A) 7329309124Sdim{ 7330309124Sdim __builtin_ia32_pmovqd512mem_mask ((__v8si *) __P, (__v8di) __A, __M); 7331309124Sdim} 7332309124Sdim 7333341825Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS512 7334309124Sdim_mm512_cvtepi64_epi16 (__m512i __A) 7335309124Sdim{ 7336309124Sdim return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A, 7337309124Sdim (__v8hi) _mm_undefined_si128 (), 7338309124Sdim (__mmask8) -1); 7339309124Sdim} 7340309124Sdim 7341341825Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS512 7342309124Sdim_mm512_mask_cvtepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A) 7343309124Sdim{ 7344309124Sdim return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A, 7345309124Sdim (__v8hi) __O, __M); 7346309124Sdim} 7347309124Sdim 7348341825Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS512 7349309124Sdim_mm512_maskz_cvtepi64_epi16 (__mmask8 __M, __m512i __A) 7350309124Sdim{ 7351309124Sdim return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A, 7352309124Sdim (__v8hi) _mm_setzero_si128 (), 7353309124Sdim __M); 7354309124Sdim} 7355309124Sdim 7356341825Sdimstatic __inline__ void __DEFAULT_FN_ATTRS512 7357309124Sdim_mm512_mask_cvtepi64_storeu_epi16 (void *__P, __mmask8 __M, __m512i __A) 7358309124Sdim{ 7359309124Sdim __builtin_ia32_pmovqw512mem_mask ((__v8hi *) __P, (__v8di) __A, __M); 7360309124Sdim} 7361309124Sdim 7362341825Sdim#define _mm512_extracti32x4_epi32(A, imm) \ 7363341825Sdim (__m128i)__builtin_ia32_extracti32x4_mask((__v16si)(__m512i)(A), (int)(imm), \ 7364341825Sdim (__v4si)_mm_undefined_si128(), \ 7365341825Sdim (__mmask8)-1) 7366309124Sdim 7367341825Sdim#define _mm512_mask_extracti32x4_epi32(W, U, A, imm) \ 7368341825Sdim (__m128i)__builtin_ia32_extracti32x4_mask((__v16si)(__m512i)(A), (int)(imm), \ 7369341825Sdim (__v4si)(__m128i)(W), \ 7370341825Sdim (__mmask8)(U)) 7371309124Sdim 7372341825Sdim#define _mm512_maskz_extracti32x4_epi32(U, A, imm) \ 7373341825Sdim (__m128i)__builtin_ia32_extracti32x4_mask((__v16si)(__m512i)(A), (int)(imm), \ 7374341825Sdim (__v4si)_mm_setzero_si128(), \ 7375341825Sdim (__mmask8)(U)) 7376309124Sdim 7377341825Sdim#define _mm512_extracti64x4_epi64(A, imm) \ 7378341825Sdim (__m256i)__builtin_ia32_extracti64x4_mask((__v8di)(__m512i)(A), (int)(imm), \ 7379341825Sdim (__v4di)_mm256_undefined_si256(), \ 7380341825Sdim (__mmask8)-1) 7381309124Sdim 7382341825Sdim#define _mm512_mask_extracti64x4_epi64(W, U, A, imm) \ 7383341825Sdim (__m256i)__builtin_ia32_extracti64x4_mask((__v8di)(__m512i)(A), (int)(imm), \ 7384341825Sdim (__v4di)(__m256i)(W), \ 7385341825Sdim (__mmask8)(U)) 7386309124Sdim 7387341825Sdim#define _mm512_maskz_extracti64x4_epi64(U, A, imm) \ 7388341825Sdim (__m256i)__builtin_ia32_extracti64x4_mask((__v8di)(__m512i)(A), (int)(imm), \ 7389341825Sdim (__v4di)_mm256_setzero_si256(), \ 7390341825Sdim (__mmask8)(U)) 7391309124Sdim 7392341825Sdim#define _mm512_insertf64x4(A, B, imm) \ 7393341825Sdim (__m512d)__builtin_ia32_insertf64x4((__v8df)(__m512d)(A), \ 7394341825Sdim (__v4df)(__m256d)(B), (int)(imm)) 7395309124Sdim 7396341825Sdim#define _mm512_mask_insertf64x4(W, U, A, B, imm) \ 7397314564Sdim (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ 7398314564Sdim (__v8df)_mm512_insertf64x4((A), (B), (imm)), \ 7399341825Sdim (__v8df)(__m512d)(W)) 7400309124Sdim 7401341825Sdim#define _mm512_maskz_insertf64x4(U, A, B, imm) \ 7402314564Sdim (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ 7403314564Sdim (__v8df)_mm512_insertf64x4((A), (B), (imm)), \ 7404341825Sdim (__v8df)_mm512_setzero_pd()) 7405309124Sdim 7406341825Sdim#define _mm512_inserti64x4(A, B, imm) \ 7407341825Sdim (__m512i)__builtin_ia32_inserti64x4((__v8di)(__m512i)(A), \ 7408341825Sdim (__v4di)(__m256i)(B), (int)(imm)) 7409309124Sdim 7410341825Sdim#define _mm512_mask_inserti64x4(W, U, A, B, imm) \ 7411314564Sdim (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \ 7412314564Sdim (__v8di)_mm512_inserti64x4((A), (B), (imm)), \ 7413341825Sdim (__v8di)(__m512i)(W)) 7414309124Sdim 7415341825Sdim#define _mm512_maskz_inserti64x4(U, A, B, imm) \ 7416314564Sdim (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \ 7417314564Sdim (__v8di)_mm512_inserti64x4((A), (B), (imm)), \ 7418341825Sdim (__v8di)_mm512_setzero_si512()) 7419309124Sdim 7420341825Sdim#define _mm512_insertf32x4(A, B, imm) \ 7421341825Sdim (__m512)__builtin_ia32_insertf32x4((__v16sf)(__m512)(A), \ 7422341825Sdim (__v4sf)(__m128)(B), (int)(imm)) 7423309124Sdim 7424341825Sdim#define _mm512_mask_insertf32x4(W, U, A, B, imm) \ 7425314564Sdim (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ 7426314564Sdim (__v16sf)_mm512_insertf32x4((A), (B), (imm)), \ 7427341825Sdim (__v16sf)(__m512)(W)) 7428309124Sdim 7429341825Sdim#define _mm512_maskz_insertf32x4(U, A, B, imm) \ 7430314564Sdim (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ 7431314564Sdim (__v16sf)_mm512_insertf32x4((A), (B), (imm)), \ 7432341825Sdim (__v16sf)_mm512_setzero_ps()) 7433309124Sdim 7434341825Sdim#define _mm512_inserti32x4(A, B, imm) \ 7435341825Sdim (__m512i)__builtin_ia32_inserti32x4((__v16si)(__m512i)(A), \ 7436341825Sdim (__v4si)(__m128i)(B), (int)(imm)) 7437309124Sdim 7438341825Sdim#define _mm512_mask_inserti32x4(W, U, A, B, imm) \ 7439314564Sdim (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \ 7440314564Sdim (__v16si)_mm512_inserti32x4((A), (B), (imm)), \ 7441341825Sdim (__v16si)(__m512i)(W)) 7442309124Sdim 7443341825Sdim#define _mm512_maskz_inserti32x4(U, A, B, imm) \ 7444314564Sdim (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \ 7445314564Sdim (__v16si)_mm512_inserti32x4((A), (B), (imm)), \ 7446341825Sdim (__v16si)_mm512_setzero_si512()) 7447309124Sdim 7448341825Sdim#define _mm512_getmant_round_pd(A, B, C, R) \ 7449309124Sdim (__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \ 7450309124Sdim (int)(((C)<<2) | (B)), \ 7451309124Sdim (__v8df)_mm512_undefined_pd(), \ 7452341825Sdim (__mmask8)-1, (int)(R)) 7453309124Sdim 7454341825Sdim#define _mm512_mask_getmant_round_pd(W, U, A, B, C, R) \ 7455309124Sdim (__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \ 7456309124Sdim (int)(((C)<<2) | (B)), \ 7457309124Sdim (__v8df)(__m512d)(W), \ 7458341825Sdim (__mmask8)(U), (int)(R)) 7459309124Sdim 7460341825Sdim#define _mm512_maskz_getmant_round_pd(U, A, B, C, R) \ 7461309124Sdim (__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \ 7462309124Sdim (int)(((C)<<2) | (B)), \ 7463309124Sdim (__v8df)_mm512_setzero_pd(), \ 7464341825Sdim (__mmask8)(U), (int)(R)) 7465309124Sdim 7466341825Sdim#define _mm512_getmant_pd(A, B, C) \ 7467309124Sdim (__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \ 7468309124Sdim (int)(((C)<<2) | (B)), \ 7469309124Sdim (__v8df)_mm512_setzero_pd(), \ 7470309124Sdim (__mmask8)-1, \ 7471341825Sdim _MM_FROUND_CUR_DIRECTION) 7472309124Sdim 7473341825Sdim#define _mm512_mask_getmant_pd(W, U, A, B, C) \ 7474309124Sdim (__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \ 7475309124Sdim (int)(((C)<<2) | (B)), \ 7476309124Sdim (__v8df)(__m512d)(W), \ 7477309124Sdim (__mmask8)(U), \ 7478341825Sdim _MM_FROUND_CUR_DIRECTION) 7479309124Sdim 7480341825Sdim#define _mm512_maskz_getmant_pd(U, A, B, C) \ 7481309124Sdim (__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \ 7482309124Sdim (int)(((C)<<2) | (B)), \ 7483309124Sdim (__v8df)_mm512_setzero_pd(), \ 7484309124Sdim (__mmask8)(U), \ 7485341825Sdim _MM_FROUND_CUR_DIRECTION) 7486309124Sdim 7487341825Sdim#define _mm512_getmant_round_ps(A, B, C, R) \ 7488309124Sdim (__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \ 7489309124Sdim (int)(((C)<<2) | (B)), \ 7490309124Sdim (__v16sf)_mm512_undefined_ps(), \ 7491341825Sdim (__mmask16)-1, (int)(R)) 7492309124Sdim 7493341825Sdim#define _mm512_mask_getmant_round_ps(W, U, A, B, C, R) \ 7494309124Sdim (__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \ 7495309124Sdim (int)(((C)<<2) | (B)), \ 7496309124Sdim (__v16sf)(__m512)(W), \ 7497341825Sdim (__mmask16)(U), (int)(R)) 7498309124Sdim 7499341825Sdim#define _mm512_maskz_getmant_round_ps(U, A, B, C, R) \ 7500309124Sdim (__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \ 7501309124Sdim (int)(((C)<<2) | (B)), \ 7502309124Sdim (__v16sf)_mm512_setzero_ps(), \ 7503341825Sdim (__mmask16)(U), (int)(R)) 7504309124Sdim 7505341825Sdim#define _mm512_getmant_ps(A, B, C) \ 7506309124Sdim (__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \ 7507309124Sdim (int)(((C)<<2)|(B)), \ 7508309124Sdim (__v16sf)_mm512_undefined_ps(), \ 7509309124Sdim (__mmask16)-1, \ 7510341825Sdim _MM_FROUND_CUR_DIRECTION) 7511309124Sdim 7512341825Sdim#define _mm512_mask_getmant_ps(W, U, A, B, C) \ 7513309124Sdim (__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \ 7514309124Sdim (int)(((C)<<2)|(B)), \ 7515309124Sdim (__v16sf)(__m512)(W), \ 7516309124Sdim (__mmask16)(U), \ 7517341825Sdim _MM_FROUND_CUR_DIRECTION) 7518309124Sdim 7519341825Sdim#define _mm512_maskz_getmant_ps(U, A, B, C) \ 7520309124Sdim (__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \ 7521309124Sdim (int)(((C)<<2)|(B)), \ 7522309124Sdim (__v16sf)_mm512_setzero_ps(), \ 7523309124Sdim (__mmask16)(U), \ 7524341825Sdim _MM_FROUND_CUR_DIRECTION) 7525309124Sdim 7526341825Sdim#define _mm512_getexp_round_pd(A, R) \ 7527309124Sdim (__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \ 7528309124Sdim (__v8df)_mm512_undefined_pd(), \ 7529341825Sdim (__mmask8)-1, (int)(R)) 7530309124Sdim 7531341825Sdim#define _mm512_mask_getexp_round_pd(W, U, A, R) \ 7532309124Sdim (__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \ 7533309124Sdim (__v8df)(__m512d)(W), \ 7534341825Sdim (__mmask8)(U), (int)(R)) 7535309124Sdim 7536341825Sdim#define _mm512_maskz_getexp_round_pd(U, A, R) \ 7537309124Sdim (__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \ 7538309124Sdim (__v8df)_mm512_setzero_pd(), \ 7539341825Sdim (__mmask8)(U), (int)(R)) 7540309124Sdim 7541341825Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS512 7542309124Sdim_mm512_getexp_pd (__m512d __A) 7543309124Sdim{ 7544309124Sdim return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A, 7545309124Sdim (__v8df) _mm512_undefined_pd (), 7546309124Sdim (__mmask8) -1, 7547309124Sdim _MM_FROUND_CUR_DIRECTION); 7548309124Sdim} 7549309124Sdim 7550341825Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS512 7551309124Sdim_mm512_mask_getexp_pd (__m512d __W, __mmask8 __U, __m512d __A) 7552309124Sdim{ 7553309124Sdim return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A, 7554309124Sdim (__v8df) __W, 7555309124Sdim (__mmask8) __U, 7556309124Sdim _MM_FROUND_CUR_DIRECTION); 7557309124Sdim} 7558309124Sdim 7559341825Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS512 7560309124Sdim_mm512_maskz_getexp_pd (__mmask8 __U, __m512d __A) 7561309124Sdim{ 7562309124Sdim return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A, 7563309124Sdim (__v8df) _mm512_setzero_pd (), 7564309124Sdim (__mmask8) __U, 7565309124Sdim _MM_FROUND_CUR_DIRECTION); 7566309124Sdim} 7567309124Sdim 7568341825Sdim#define _mm512_getexp_round_ps(A, R) \ 7569309124Sdim (__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \ 7570309124Sdim (__v16sf)_mm512_undefined_ps(), \ 7571341825Sdim (__mmask16)-1, (int)(R)) 7572309124Sdim 7573341825Sdim#define _mm512_mask_getexp_round_ps(W, U, A, R) \ 7574309124Sdim (__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \ 7575309124Sdim (__v16sf)(__m512)(W), \ 7576341825Sdim (__mmask16)(U), (int)(R)) 7577309124Sdim 7578341825Sdim#define _mm512_maskz_getexp_round_ps(U, A, R) \ 7579309124Sdim (__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \ 7580309124Sdim (__v16sf)_mm512_setzero_ps(), \ 7581341825Sdim (__mmask16)(U), (int)(R)) 7582309124Sdim 7583341825Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS512 7584309124Sdim_mm512_getexp_ps (__m512 __A) 7585309124Sdim{ 7586309124Sdim return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A, 7587309124Sdim (__v16sf) _mm512_undefined_ps (), 7588309124Sdim (__mmask16) -1, 7589309124Sdim _MM_FROUND_CUR_DIRECTION); 7590309124Sdim} 7591309124Sdim 7592341825Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS512 7593309124Sdim_mm512_mask_getexp_ps (__m512 __W, __mmask16 __U, __m512 __A) 7594309124Sdim{ 7595309124Sdim return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A, 7596309124Sdim (__v16sf) __W, 7597309124Sdim (__mmask16) __U, 7598309124Sdim _MM_FROUND_CUR_DIRECTION); 7599309124Sdim} 7600309124Sdim 7601341825Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS512 7602309124Sdim_mm512_maskz_getexp_ps (__mmask16 __U, __m512 __A) 7603309124Sdim{ 7604309124Sdim return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A, 7605309124Sdim (__v16sf) _mm512_setzero_ps (), 7606309124Sdim (__mmask16) __U, 7607309124Sdim _MM_FROUND_CUR_DIRECTION); 7608309124Sdim} 7609309124Sdim 7610341825Sdim#define _mm512_i64gather_ps(index, addr, scale) \ 7611309124Sdim (__m256)__builtin_ia32_gatherdiv16sf((__v8sf)_mm256_undefined_ps(), \ 7612344779Sdim (void const *)(addr), \ 7613309124Sdim (__v8di)(__m512i)(index), (__mmask8)-1, \ 7614341825Sdim (int)(scale)) 7615309124Sdim 7616341825Sdim#define _mm512_mask_i64gather_ps(v1_old, mask, index, addr, scale) \ 7617321369Sdim (__m256)__builtin_ia32_gatherdiv16sf((__v8sf)(__m256)(v1_old),\ 7618344779Sdim (void const *)(addr), \ 7619321369Sdim (__v8di)(__m512i)(index), \ 7620341825Sdim (__mmask8)(mask), (int)(scale)) 7621309124Sdim 7622341825Sdim#define _mm512_i64gather_epi32(index, addr, scale) \ 7623341825Sdim (__m256i)__builtin_ia32_gatherdiv16si((__v8si)_mm256_undefined_si256(), \ 7624344779Sdim (void const *)(addr), \ 7625309124Sdim (__v8di)(__m512i)(index), \ 7626341825Sdim (__mmask8)-1, (int)(scale)) 7627309124Sdim 7628341825Sdim#define _mm512_mask_i64gather_epi32(v1_old, mask, index, addr, scale) \ 7629309124Sdim (__m256i)__builtin_ia32_gatherdiv16si((__v8si)(__m256i)(v1_old), \ 7630344779Sdim (void const *)(addr), \ 7631309124Sdim (__v8di)(__m512i)(index), \ 7632341825Sdim (__mmask8)(mask), (int)(scale)) 7633309124Sdim 7634341825Sdim#define _mm512_i64gather_pd(index, addr, scale) \ 7635309124Sdim (__m512d)__builtin_ia32_gatherdiv8df((__v8df)_mm512_undefined_pd(), \ 7636344779Sdim (void const *)(addr), \ 7637309124Sdim (__v8di)(__m512i)(index), (__mmask8)-1, \ 7638341825Sdim (int)(scale)) 7639309124Sdim 7640341825Sdim#define _mm512_mask_i64gather_pd(v1_old, mask, index, addr, scale) \ 7641309124Sdim (__m512d)__builtin_ia32_gatherdiv8df((__v8df)(__m512d)(v1_old), \ 7642344779Sdim (void const *)(addr), \ 7643309124Sdim (__v8di)(__m512i)(index), \ 7644341825Sdim (__mmask8)(mask), (int)(scale)) 7645309124Sdim 7646341825Sdim#define _mm512_i64gather_epi64(index, addr, scale) \ 7647341825Sdim (__m512i)__builtin_ia32_gatherdiv8di((__v8di)_mm512_undefined_epi32(), \ 7648344779Sdim (void const *)(addr), \ 7649309124Sdim (__v8di)(__m512i)(index), (__mmask8)-1, \ 7650341825Sdim (int)(scale)) 7651309124Sdim 7652341825Sdim#define _mm512_mask_i64gather_epi64(v1_old, mask, index, addr, scale) \ 7653309124Sdim (__m512i)__builtin_ia32_gatherdiv8di((__v8di)(__m512i)(v1_old), \ 7654344779Sdim (void const *)(addr), \ 7655309124Sdim (__v8di)(__m512i)(index), \ 7656341825Sdim (__mmask8)(mask), (int)(scale)) 7657309124Sdim 7658341825Sdim#define _mm512_i32gather_ps(index, addr, scale) \ 7659309124Sdim (__m512)__builtin_ia32_gathersiv16sf((__v16sf)_mm512_undefined_ps(), \ 7660344779Sdim (void const *)(addr), \ 7661360784Sdim (__v16si)(__m512)(index), \ 7662341825Sdim (__mmask16)-1, (int)(scale)) 7663309124Sdim 7664341825Sdim#define _mm512_mask_i32gather_ps(v1_old, mask, index, addr, scale) \ 7665309124Sdim (__m512)__builtin_ia32_gathersiv16sf((__v16sf)(__m512)(v1_old), \ 7666344779Sdim (void const *)(addr), \ 7667360784Sdim (__v16si)(__m512)(index), \ 7668341825Sdim (__mmask16)(mask), (int)(scale)) 7669309124Sdim 7670341825Sdim#define _mm512_i32gather_epi32(index, addr, scale) \ 7671309124Sdim (__m512i)__builtin_ia32_gathersiv16si((__v16si)_mm512_undefined_epi32(), \ 7672344779Sdim (void const *)(addr), \ 7673309124Sdim (__v16si)(__m512i)(index), \ 7674341825Sdim (__mmask16)-1, (int)(scale)) 7675309124Sdim 7676341825Sdim#define _mm512_mask_i32gather_epi32(v1_old, mask, index, addr, scale) \ 7677309124Sdim (__m512i)__builtin_ia32_gathersiv16si((__v16si)(__m512i)(v1_old), \ 7678344779Sdim (void const *)(addr), \ 7679309124Sdim (__v16si)(__m512i)(index), \ 7680341825Sdim (__mmask16)(mask), (int)(scale)) 7681309124Sdim 7682341825Sdim#define _mm512_i32gather_pd(index, addr, scale) \ 7683309124Sdim (__m512d)__builtin_ia32_gathersiv8df((__v8df)_mm512_undefined_pd(), \ 7684344779Sdim (void const *)(addr), \ 7685309124Sdim (__v8si)(__m256i)(index), (__mmask8)-1, \ 7686341825Sdim (int)(scale)) 7687309124Sdim 7688341825Sdim#define _mm512_mask_i32gather_pd(v1_old, mask, index, addr, scale) \ 7689309124Sdim (__m512d)__builtin_ia32_gathersiv8df((__v8df)(__m512d)(v1_old), \ 7690344779Sdim (void const *)(addr), \ 7691309124Sdim (__v8si)(__m256i)(index), \ 7692341825Sdim (__mmask8)(mask), (int)(scale)) 7693309124Sdim 7694341825Sdim#define _mm512_i32gather_epi64(index, addr, scale) \ 7695309124Sdim (__m512i)__builtin_ia32_gathersiv8di((__v8di)_mm512_undefined_epi32(), \ 7696344779Sdim (void const *)(addr), \ 7697309124Sdim (__v8si)(__m256i)(index), (__mmask8)-1, \ 7698341825Sdim (int)(scale)) 7699309124Sdim 7700341825Sdim#define _mm512_mask_i32gather_epi64(v1_old, mask, index, addr, scale) \ 7701309124Sdim (__m512i)__builtin_ia32_gathersiv8di((__v8di)(__m512i)(v1_old), \ 7702344779Sdim (void const *)(addr), \ 7703309124Sdim (__v8si)(__m256i)(index), \ 7704341825Sdim (__mmask8)(mask), (int)(scale)) 7705309124Sdim 7706341825Sdim#define _mm512_i64scatter_ps(addr, index, v1, scale) \ 7707344779Sdim __builtin_ia32_scatterdiv16sf((void *)(addr), (__mmask8)-1, \ 7708309124Sdim (__v8di)(__m512i)(index), \ 7709341825Sdim (__v8sf)(__m256)(v1), (int)(scale)) 7710309124Sdim 7711341825Sdim#define _mm512_mask_i64scatter_ps(addr, mask, index, v1, scale) \ 7712344779Sdim __builtin_ia32_scatterdiv16sf((void *)(addr), (__mmask8)(mask), \ 7713309124Sdim (__v8di)(__m512i)(index), \ 7714341825Sdim (__v8sf)(__m256)(v1), (int)(scale)) 7715309124Sdim 7716341825Sdim#define _mm512_i64scatter_epi32(addr, index, v1, scale) \ 7717344779Sdim __builtin_ia32_scatterdiv16si((void *)(addr), (__mmask8)-1, \ 7718309124Sdim (__v8di)(__m512i)(index), \ 7719341825Sdim (__v8si)(__m256i)(v1), (int)(scale)) 7720309124Sdim 7721341825Sdim#define _mm512_mask_i64scatter_epi32(addr, mask, index, v1, scale) \ 7722344779Sdim __builtin_ia32_scatterdiv16si((void *)(addr), (__mmask8)(mask), \ 7723309124Sdim (__v8di)(__m512i)(index), \ 7724341825Sdim (__v8si)(__m256i)(v1), (int)(scale)) 7725309124Sdim 7726341825Sdim#define _mm512_i64scatter_pd(addr, index, v1, scale) \ 7727344779Sdim __builtin_ia32_scatterdiv8df((void *)(addr), (__mmask8)-1, \ 7728309124Sdim (__v8di)(__m512i)(index), \ 7729341825Sdim (__v8df)(__m512d)(v1), (int)(scale)) 7730309124Sdim 7731341825Sdim#define _mm512_mask_i64scatter_pd(addr, mask, index, v1, scale) \ 7732344779Sdim __builtin_ia32_scatterdiv8df((void *)(addr), (__mmask8)(mask), \ 7733309124Sdim (__v8di)(__m512i)(index), \ 7734341825Sdim (__v8df)(__m512d)(v1), (int)(scale)) 7735309124Sdim 7736341825Sdim#define _mm512_i64scatter_epi64(addr, index, v1, scale) \ 7737344779Sdim __builtin_ia32_scatterdiv8di((void *)(addr), (__mmask8)-1, \ 7738309124Sdim (__v8di)(__m512i)(index), \ 7739341825Sdim (__v8di)(__m512i)(v1), (int)(scale)) 7740309124Sdim 7741341825Sdim#define _mm512_mask_i64scatter_epi64(addr, mask, index, v1, scale) \ 7742344779Sdim __builtin_ia32_scatterdiv8di((void *)(addr), (__mmask8)(mask), \ 7743309124Sdim (__v8di)(__m512i)(index), \ 7744341825Sdim (__v8di)(__m512i)(v1), (int)(scale)) 7745309124Sdim 7746341825Sdim#define _mm512_i32scatter_ps(addr, index, v1, scale) \ 7747344779Sdim __builtin_ia32_scattersiv16sf((void *)(addr), (__mmask16)-1, \ 7748309124Sdim (__v16si)(__m512i)(index), \ 7749341825Sdim (__v16sf)(__m512)(v1), (int)(scale)) 7750309124Sdim 7751341825Sdim#define _mm512_mask_i32scatter_ps(addr, mask, index, v1, scale) \ 7752344779Sdim __builtin_ia32_scattersiv16sf((void *)(addr), (__mmask16)(mask), \ 7753309124Sdim (__v16si)(__m512i)(index), \ 7754341825Sdim (__v16sf)(__m512)(v1), (int)(scale)) 7755309124Sdim 7756341825Sdim#define _mm512_i32scatter_epi32(addr, index, v1, scale) \ 7757344779Sdim __builtin_ia32_scattersiv16si((void *)(addr), (__mmask16)-1, \ 7758309124Sdim (__v16si)(__m512i)(index), \ 7759341825Sdim (__v16si)(__m512i)(v1), (int)(scale)) 7760309124Sdim 7761341825Sdim#define _mm512_mask_i32scatter_epi32(addr, mask, index, v1, scale) \ 7762344779Sdim __builtin_ia32_scattersiv16si((void *)(addr), (__mmask16)(mask), \ 7763309124Sdim (__v16si)(__m512i)(index), \ 7764341825Sdim (__v16si)(__m512i)(v1), (int)(scale)) 7765309124Sdim 7766341825Sdim#define _mm512_i32scatter_pd(addr, index, v1, scale) \ 7767344779Sdim __builtin_ia32_scattersiv8df((void *)(addr), (__mmask8)-1, \ 7768309124Sdim (__v8si)(__m256i)(index), \ 7769341825Sdim (__v8df)(__m512d)(v1), (int)(scale)) 7770309124Sdim 7771341825Sdim#define _mm512_mask_i32scatter_pd(addr, mask, index, v1, scale) \ 7772344779Sdim __builtin_ia32_scattersiv8df((void *)(addr), (__mmask8)(mask), \ 7773309124Sdim (__v8si)(__m256i)(index), \ 7774341825Sdim (__v8df)(__m512d)(v1), (int)(scale)) 7775309124Sdim 7776341825Sdim#define _mm512_i32scatter_epi64(addr, index, v1, scale) \ 7777344779Sdim __builtin_ia32_scattersiv8di((void *)(addr), (__mmask8)-1, \ 7778309124Sdim (__v8si)(__m256i)(index), \ 7779341825Sdim (__v8di)(__m512i)(v1), (int)(scale)) 7780309124Sdim 7781341825Sdim#define _mm512_mask_i32scatter_epi64(addr, mask, index, v1, scale) \ 7782344779Sdim __builtin_ia32_scattersiv8di((void *)(addr), (__mmask8)(mask), \ 7783309124Sdim (__v8si)(__m256i)(index), \ 7784341825Sdim (__v8di)(__m512i)(v1), (int)(scale)) 7785309124Sdim 7786341825Sdimstatic __inline__ __m128 __DEFAULT_FN_ATTRS128 7787309124Sdim_mm_mask_fmadd_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) 7788309124Sdim{ 7789341825Sdim return __builtin_ia32_vfmaddss3_mask((__v4sf)__W, 7790341825Sdim (__v4sf)__A, 7791341825Sdim (__v4sf)__B, 7792341825Sdim (__mmask8)__U, 7793341825Sdim _MM_FROUND_CUR_DIRECTION); 7794309124Sdim} 7795309124Sdim 7796341825Sdim#define _mm_fmadd_round_ss(A, B, C, R) \ 7797341825Sdim (__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(A), \ 7798341825Sdim (__v4sf)(__m128)(B), \ 7799341825Sdim (__v4sf)(__m128)(C), (__mmask8)-1, \ 7800341825Sdim (int)(R)) 7801341825Sdim 7802341825Sdim#define _mm_mask_fmadd_round_ss(W, U, A, B, R) \ 7803314564Sdim (__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(W), \ 7804314564Sdim (__v4sf)(__m128)(A), \ 7805314564Sdim (__v4sf)(__m128)(B), (__mmask8)(U), \ 7806341825Sdim (int)(R)) 7807309124Sdim 7808341825Sdimstatic __inline__ __m128 __DEFAULT_FN_ATTRS128 7809309124Sdim_mm_maskz_fmadd_ss (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C) 7810309124Sdim{ 7811341825Sdim return __builtin_ia32_vfmaddss3_maskz((__v4sf)__A, 7812341825Sdim (__v4sf)__B, 7813341825Sdim (__v4sf)__C, 7814341825Sdim (__mmask8)__U, 7815341825Sdim _MM_FROUND_CUR_DIRECTION); 7816309124Sdim} 7817309124Sdim 7818341825Sdim#define _mm_maskz_fmadd_round_ss(U, A, B, C, R) \ 7819309124Sdim (__m128)__builtin_ia32_vfmaddss3_maskz((__v4sf)(__m128)(A), \ 7820309124Sdim (__v4sf)(__m128)(B), \ 7821309124Sdim (__v4sf)(__m128)(C), (__mmask8)(U), \ 7822341825Sdim (int)(R)) 7823309124Sdim 7824341825Sdimstatic __inline__ __m128 __DEFAULT_FN_ATTRS128 7825309124Sdim_mm_mask3_fmadd_ss (__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U) 7826309124Sdim{ 7827341825Sdim return __builtin_ia32_vfmaddss3_mask3((__v4sf)__W, 7828341825Sdim (__v4sf)__X, 7829341825Sdim (__v4sf)__Y, 7830341825Sdim (__mmask8)__U, 7831341825Sdim _MM_FROUND_CUR_DIRECTION); 7832309124Sdim} 7833309124Sdim 7834341825Sdim#define _mm_mask3_fmadd_round_ss(W, X, Y, U, R) \ 7835309124Sdim (__m128)__builtin_ia32_vfmaddss3_mask3((__v4sf)(__m128)(W), \ 7836309124Sdim (__v4sf)(__m128)(X), \ 7837309124Sdim (__v4sf)(__m128)(Y), (__mmask8)(U), \ 7838341825Sdim (int)(R)) 7839309124Sdim 7840341825Sdimstatic __inline__ __m128 __DEFAULT_FN_ATTRS128 7841309124Sdim_mm_mask_fmsub_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) 7842309124Sdim{ 7843341825Sdim return __builtin_ia32_vfmaddss3_mask((__v4sf)__W, 7844341825Sdim (__v4sf)__A, 7845341825Sdim -(__v4sf)__B, 7846341825Sdim (__mmask8)__U, 7847341825Sdim _MM_FROUND_CUR_DIRECTION); 7848309124Sdim} 7849309124Sdim 7850341825Sdim#define _mm_fmsub_round_ss(A, B, C, R) \ 7851341825Sdim (__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(A), \ 7852341825Sdim (__v4sf)(__m128)(B), \ 7853341825Sdim -(__v4sf)(__m128)(C), (__mmask8)-1, \ 7854341825Sdim (int)(R)) 7855341825Sdim 7856341825Sdim#define _mm_mask_fmsub_round_ss(W, U, A, B, R) \ 7857314564Sdim (__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(W), \ 7858314564Sdim (__v4sf)(__m128)(A), \ 7859341825Sdim -(__v4sf)(__m128)(B), (__mmask8)(U), \ 7860341825Sdim (int)(R)) 7861309124Sdim 7862341825Sdimstatic __inline__ __m128 __DEFAULT_FN_ATTRS128 7863309124Sdim_mm_maskz_fmsub_ss (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C) 7864309124Sdim{ 7865341825Sdim return __builtin_ia32_vfmaddss3_maskz((__v4sf)__A, 7866341825Sdim (__v4sf)__B, 7867341825Sdim -(__v4sf)__C, 7868341825Sdim (__mmask8)__U, 7869341825Sdim _MM_FROUND_CUR_DIRECTION); 7870309124Sdim} 7871309124Sdim 7872341825Sdim#define _mm_maskz_fmsub_round_ss(U, A, B, C, R) \ 7873309124Sdim (__m128)__builtin_ia32_vfmaddss3_maskz((__v4sf)(__m128)(A), \ 7874309124Sdim (__v4sf)(__m128)(B), \ 7875309124Sdim -(__v4sf)(__m128)(C), (__mmask8)(U), \ 7876341825Sdim (int)(R)) 7877309124Sdim 7878341825Sdimstatic __inline__ __m128 __DEFAULT_FN_ATTRS128 7879309124Sdim_mm_mask3_fmsub_ss (__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U) 7880309124Sdim{ 7881341825Sdim return __builtin_ia32_vfmsubss3_mask3((__v4sf)__W, 7882341825Sdim (__v4sf)__X, 7883341825Sdim (__v4sf)__Y, 7884341825Sdim (__mmask8)__U, 7885341825Sdim _MM_FROUND_CUR_DIRECTION); 7886309124Sdim} 7887309124Sdim 7888341825Sdim#define _mm_mask3_fmsub_round_ss(W, X, Y, U, R) \ 7889314564Sdim (__m128)__builtin_ia32_vfmsubss3_mask3((__v4sf)(__m128)(W), \ 7890309124Sdim (__v4sf)(__m128)(X), \ 7891314564Sdim (__v4sf)(__m128)(Y), (__mmask8)(U), \ 7892341825Sdim (int)(R)) 7893309124Sdim 7894341825Sdimstatic __inline__ __m128 __DEFAULT_FN_ATTRS128 7895309124Sdim_mm_mask_fnmadd_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) 7896309124Sdim{ 7897341825Sdim return __builtin_ia32_vfmaddss3_mask((__v4sf)__W, 7898341825Sdim -(__v4sf)__A, 7899341825Sdim (__v4sf)__B, 7900341825Sdim (__mmask8)__U, 7901341825Sdim _MM_FROUND_CUR_DIRECTION); 7902309124Sdim} 7903309124Sdim 7904341825Sdim#define _mm_fnmadd_round_ss(A, B, C, R) \ 7905341825Sdim (__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(A), \ 7906341825Sdim -(__v4sf)(__m128)(B), \ 7907341825Sdim (__v4sf)(__m128)(C), (__mmask8)-1, \ 7908341825Sdim (int)(R)) 7909341825Sdim 7910341825Sdim#define _mm_mask_fnmadd_round_ss(W, U, A, B, R) \ 7911314564Sdim (__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(W), \ 7912314564Sdim -(__v4sf)(__m128)(A), \ 7913314564Sdim (__v4sf)(__m128)(B), (__mmask8)(U), \ 7914341825Sdim (int)(R)) 7915309124Sdim 7916341825Sdimstatic __inline__ __m128 __DEFAULT_FN_ATTRS128 7917309124Sdim_mm_maskz_fnmadd_ss (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C) 7918309124Sdim{ 7919341825Sdim return __builtin_ia32_vfmaddss3_maskz((__v4sf)__A, 7920341825Sdim -(__v4sf)__B, 7921341825Sdim (__v4sf)__C, 7922341825Sdim (__mmask8)__U, 7923341825Sdim _MM_FROUND_CUR_DIRECTION); 7924309124Sdim} 7925309124Sdim 7926341825Sdim#define _mm_maskz_fnmadd_round_ss(U, A, B, C, R) \ 7927341825Sdim (__m128)__builtin_ia32_vfmaddss3_maskz((__v4sf)(__m128)(A), \ 7928341825Sdim -(__v4sf)(__m128)(B), \ 7929309124Sdim (__v4sf)(__m128)(C), (__mmask8)(U), \ 7930341825Sdim (int)(R)) 7931309124Sdim 7932341825Sdimstatic __inline__ __m128 __DEFAULT_FN_ATTRS128 7933309124Sdim_mm_mask3_fnmadd_ss (__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U) 7934309124Sdim{ 7935341825Sdim return __builtin_ia32_vfmaddss3_mask3((__v4sf)__W, 7936341825Sdim -(__v4sf)__X, 7937341825Sdim (__v4sf)__Y, 7938341825Sdim (__mmask8)__U, 7939341825Sdim _MM_FROUND_CUR_DIRECTION); 7940309124Sdim} 7941309124Sdim 7942341825Sdim#define _mm_mask3_fnmadd_round_ss(W, X, Y, U, R) \ 7943341825Sdim (__m128)__builtin_ia32_vfmaddss3_mask3((__v4sf)(__m128)(W), \ 7944341825Sdim -(__v4sf)(__m128)(X), \ 7945309124Sdim (__v4sf)(__m128)(Y), (__mmask8)(U), \ 7946341825Sdim (int)(R)) 7947309124Sdim 7948341825Sdimstatic __inline__ __m128 __DEFAULT_FN_ATTRS128 7949309124Sdim_mm_mask_fnmsub_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) 7950309124Sdim{ 7951341825Sdim return __builtin_ia32_vfmaddss3_mask((__v4sf)__W, 7952341825Sdim -(__v4sf)__A, 7953341825Sdim -(__v4sf)__B, 7954341825Sdim (__mmask8)__U, 7955341825Sdim _MM_FROUND_CUR_DIRECTION); 7956309124Sdim} 7957309124Sdim 7958341825Sdim#define _mm_fnmsub_round_ss(A, B, C, R) \ 7959341825Sdim (__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(A), \ 7960341825Sdim -(__v4sf)(__m128)(B), \ 7961341825Sdim -(__v4sf)(__m128)(C), (__mmask8)-1, \ 7962341825Sdim (int)(R)) 7963341825Sdim 7964341825Sdim#define _mm_mask_fnmsub_round_ss(W, U, A, B, R) \ 7965314564Sdim (__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(W), \ 7966314564Sdim -(__v4sf)(__m128)(A), \ 7967314564Sdim -(__v4sf)(__m128)(B), (__mmask8)(U), \ 7968341825Sdim (int)(R)) 7969309124Sdim 7970341825Sdimstatic __inline__ __m128 __DEFAULT_FN_ATTRS128 7971309124Sdim_mm_maskz_fnmsub_ss (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C) 7972309124Sdim{ 7973341825Sdim return __builtin_ia32_vfmaddss3_maskz((__v4sf)__A, 7974341825Sdim -(__v4sf)__B, 7975341825Sdim -(__v4sf)__C, 7976341825Sdim (__mmask8)__U, 7977341825Sdim _MM_FROUND_CUR_DIRECTION); 7978309124Sdim} 7979309124Sdim 7980341825Sdim#define _mm_maskz_fnmsub_round_ss(U, A, B, C, R) \ 7981341825Sdim (__m128)__builtin_ia32_vfmaddss3_maskz((__v4sf)(__m128)(A), \ 7982341825Sdim -(__v4sf)(__m128)(B), \ 7983309124Sdim -(__v4sf)(__m128)(C), (__mmask8)(U), \ 7984341825Sdim (int)(R)) 7985309124Sdim 7986341825Sdimstatic __inline__ __m128 __DEFAULT_FN_ATTRS128 7987309124Sdim_mm_mask3_fnmsub_ss (__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U) 7988309124Sdim{ 7989341825Sdim return __builtin_ia32_vfmsubss3_mask3((__v4sf)__W, 7990341825Sdim -(__v4sf)__X, 7991341825Sdim (__v4sf)__Y, 7992341825Sdim (__mmask8)__U, 7993341825Sdim _MM_FROUND_CUR_DIRECTION); 7994309124Sdim} 7995309124Sdim 7996341825Sdim#define _mm_mask3_fnmsub_round_ss(W, X, Y, U, R) \ 7997341825Sdim (__m128)__builtin_ia32_vfmsubss3_mask3((__v4sf)(__m128)(W), \ 7998341825Sdim -(__v4sf)(__m128)(X), \ 7999314564Sdim (__v4sf)(__m128)(Y), (__mmask8)(U), \ 8000341825Sdim (int)(R)) 8001309124Sdim 8002341825Sdimstatic __inline__ __m128d __DEFAULT_FN_ATTRS128 8003309124Sdim_mm_mask_fmadd_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) 8004309124Sdim{ 8005341825Sdim return __builtin_ia32_vfmaddsd3_mask((__v2df)__W, 8006341825Sdim (__v2df)__A, 8007341825Sdim (__v2df)__B, 8008341825Sdim (__mmask8)__U, 8009341825Sdim _MM_FROUND_CUR_DIRECTION); 8010309124Sdim} 8011309124Sdim 8012341825Sdim#define _mm_fmadd_round_sd(A, B, C, R) \ 8013341825Sdim (__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(A), \ 8014341825Sdim (__v2df)(__m128d)(B), \ 8015341825Sdim (__v2df)(__m128d)(C), (__mmask8)-1, \ 8016341825Sdim (int)(R)) 8017341825Sdim 8018341825Sdim#define _mm_mask_fmadd_round_sd(W, U, A, B, R) \ 8019314564Sdim (__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(W), \ 8020314564Sdim (__v2df)(__m128d)(A), \ 8021314564Sdim (__v2df)(__m128d)(B), (__mmask8)(U), \ 8022341825Sdim (int)(R)) 8023309124Sdim 8024341825Sdimstatic __inline__ __m128d __DEFAULT_FN_ATTRS128 8025309124Sdim_mm_maskz_fmadd_sd (__mmask8 __U, __m128d __A, __m128d __B, __m128d __C) 8026309124Sdim{ 8027341825Sdim return __builtin_ia32_vfmaddsd3_maskz((__v2df)__A, 8028341825Sdim (__v2df)__B, 8029341825Sdim (__v2df)__C, 8030341825Sdim (__mmask8)__U, 8031341825Sdim _MM_FROUND_CUR_DIRECTION); 8032309124Sdim} 8033309124Sdim 8034341825Sdim#define _mm_maskz_fmadd_round_sd(U, A, B, C, R) \ 8035309124Sdim (__m128d)__builtin_ia32_vfmaddsd3_maskz((__v2df)(__m128d)(A), \ 8036309124Sdim (__v2df)(__m128d)(B), \ 8037309124Sdim (__v2df)(__m128d)(C), (__mmask8)(U), \ 8038341825Sdim (int)(R)) 8039309124Sdim 8040341825Sdimstatic __inline__ __m128d __DEFAULT_FN_ATTRS128 8041309124Sdim_mm_mask3_fmadd_sd (__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U) 8042309124Sdim{ 8043341825Sdim return __builtin_ia32_vfmaddsd3_mask3((__v2df)__W, 8044341825Sdim (__v2df)__X, 8045341825Sdim (__v2df)__Y, 8046341825Sdim (__mmask8)__U, 8047341825Sdim _MM_FROUND_CUR_DIRECTION); 8048309124Sdim} 8049309124Sdim 8050341825Sdim#define _mm_mask3_fmadd_round_sd(W, X, Y, U, R) \ 8051309124Sdim (__m128d)__builtin_ia32_vfmaddsd3_mask3((__v2df)(__m128d)(W), \ 8052309124Sdim (__v2df)(__m128d)(X), \ 8053309124Sdim (__v2df)(__m128d)(Y), (__mmask8)(U), \ 8054341825Sdim (int)(R)) 8055309124Sdim 8056341825Sdimstatic __inline__ __m128d __DEFAULT_FN_ATTRS128 8057309124Sdim_mm_mask_fmsub_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) 8058309124Sdim{ 8059341825Sdim return __builtin_ia32_vfmaddsd3_mask((__v2df)__W, 8060341825Sdim (__v2df)__A, 8061341825Sdim -(__v2df)__B, 8062341825Sdim (__mmask8)__U, 8063341825Sdim _MM_FROUND_CUR_DIRECTION); 8064309124Sdim} 8065309124Sdim 8066341825Sdim#define _mm_fmsub_round_sd(A, B, C, R) \ 8067341825Sdim (__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(A), \ 8068341825Sdim (__v2df)(__m128d)(B), \ 8069341825Sdim -(__v2df)(__m128d)(C), (__mmask8)-1, \ 8070341825Sdim (int)(R)) 8071341825Sdim 8072341825Sdim#define _mm_mask_fmsub_round_sd(W, U, A, B, R) \ 8073314564Sdim (__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(W), \ 8074314564Sdim (__v2df)(__m128d)(A), \ 8075314564Sdim -(__v2df)(__m128d)(B), (__mmask8)(U), \ 8076341825Sdim (int)(R)) 8077309124Sdim 8078341825Sdimstatic __inline__ __m128d __DEFAULT_FN_ATTRS128 8079309124Sdim_mm_maskz_fmsub_sd (__mmask8 __U, __m128d __A, __m128d __B, __m128d __C) 8080309124Sdim{ 8081341825Sdim return __builtin_ia32_vfmaddsd3_maskz((__v2df)__A, 8082341825Sdim (__v2df)__B, 8083341825Sdim -(__v2df)__C, 8084341825Sdim (__mmask8)__U, 8085341825Sdim _MM_FROUND_CUR_DIRECTION); 8086309124Sdim} 8087309124Sdim 8088341825Sdim#define _mm_maskz_fmsub_round_sd(U, A, B, C, R) \ 8089309124Sdim (__m128d)__builtin_ia32_vfmaddsd3_maskz((__v2df)(__m128d)(A), \ 8090309124Sdim (__v2df)(__m128d)(B), \ 8091309124Sdim -(__v2df)(__m128d)(C), \ 8092341825Sdim (__mmask8)(U), (int)(R)) 8093309124Sdim 8094341825Sdimstatic __inline__ __m128d __DEFAULT_FN_ATTRS128 8095309124Sdim_mm_mask3_fmsub_sd (__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U) 8096309124Sdim{ 8097341825Sdim return __builtin_ia32_vfmsubsd3_mask3((__v2df)__W, 8098341825Sdim (__v2df)__X, 8099341825Sdim (__v2df)__Y, 8100341825Sdim (__mmask8)__U, 8101341825Sdim _MM_FROUND_CUR_DIRECTION); 8102309124Sdim} 8103309124Sdim 8104341825Sdim#define _mm_mask3_fmsub_round_sd(W, X, Y, U, R) \ 8105314564Sdim (__m128d)__builtin_ia32_vfmsubsd3_mask3((__v2df)(__m128d)(W), \ 8106309124Sdim (__v2df)(__m128d)(X), \ 8107314564Sdim (__v2df)(__m128d)(Y), \ 8108341825Sdim (__mmask8)(U), (int)(R)) 8109309124Sdim 8110341825Sdimstatic __inline__ __m128d __DEFAULT_FN_ATTRS128 8111309124Sdim_mm_mask_fnmadd_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) 8112309124Sdim{ 8113341825Sdim return __builtin_ia32_vfmaddsd3_mask((__v2df)__W, 8114341825Sdim -(__v2df)__A, 8115341825Sdim (__v2df)__B, 8116341825Sdim (__mmask8)__U, 8117341825Sdim _MM_FROUND_CUR_DIRECTION); 8118309124Sdim} 8119309124Sdim 8120341825Sdim#define _mm_fnmadd_round_sd(A, B, C, R) \ 8121341825Sdim (__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(A), \ 8122341825Sdim -(__v2df)(__m128d)(B), \ 8123341825Sdim (__v2df)(__m128d)(C), (__mmask8)-1, \ 8124341825Sdim (int)(R)) 8125341825Sdim 8126341825Sdim#define _mm_mask_fnmadd_round_sd(W, U, A, B, R) \ 8127314564Sdim (__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(W), \ 8128314564Sdim -(__v2df)(__m128d)(A), \ 8129314564Sdim (__v2df)(__m128d)(B), (__mmask8)(U), \ 8130341825Sdim (int)(R)) 8131309124Sdim 8132341825Sdimstatic __inline__ __m128d __DEFAULT_FN_ATTRS128 8133309124Sdim_mm_maskz_fnmadd_sd (__mmask8 __U, __m128d __A, __m128d __B, __m128d __C) 8134309124Sdim{ 8135341825Sdim return __builtin_ia32_vfmaddsd3_maskz((__v2df)__A, 8136341825Sdim -(__v2df)__B, 8137341825Sdim (__v2df)__C, 8138341825Sdim (__mmask8)__U, 8139341825Sdim _MM_FROUND_CUR_DIRECTION); 8140309124Sdim} 8141309124Sdim 8142341825Sdim#define _mm_maskz_fnmadd_round_sd(U, A, B, C, R) \ 8143341825Sdim (__m128d)__builtin_ia32_vfmaddsd3_maskz((__v2df)(__m128d)(A), \ 8144341825Sdim -(__v2df)(__m128d)(B), \ 8145309124Sdim (__v2df)(__m128d)(C), (__mmask8)(U), \ 8146341825Sdim (int)(R)) 8147309124Sdim 8148341825Sdimstatic __inline__ __m128d __DEFAULT_FN_ATTRS128 8149309124Sdim_mm_mask3_fnmadd_sd (__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U) 8150309124Sdim{ 8151341825Sdim return __builtin_ia32_vfmaddsd3_mask3((__v2df)__W, 8152341825Sdim -(__v2df)__X, 8153341825Sdim (__v2df)__Y, 8154341825Sdim (__mmask8)__U, 8155341825Sdim _MM_FROUND_CUR_DIRECTION); 8156309124Sdim} 8157309124Sdim 8158341825Sdim#define _mm_mask3_fnmadd_round_sd(W, X, Y, U, R) \ 8159341825Sdim (__m128d)__builtin_ia32_vfmaddsd3_mask3((__v2df)(__m128d)(W), \ 8160341825Sdim -(__v2df)(__m128d)(X), \ 8161309124Sdim (__v2df)(__m128d)(Y), (__mmask8)(U), \ 8162341825Sdim (int)(R)) 8163309124Sdim 8164341825Sdimstatic __inline__ __m128d __DEFAULT_FN_ATTRS128 8165309124Sdim_mm_mask_fnmsub_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) 8166309124Sdim{ 8167341825Sdim return __builtin_ia32_vfmaddsd3_mask((__v2df)__W, 8168341825Sdim -(__v2df)__A, 8169341825Sdim -(__v2df)__B, 8170341825Sdim (__mmask8)__U, 8171341825Sdim _MM_FROUND_CUR_DIRECTION); 8172309124Sdim} 8173309124Sdim 8174341825Sdim#define _mm_fnmsub_round_sd(A, B, C, R) \ 8175341825Sdim (__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(A), \ 8176341825Sdim -(__v2df)(__m128d)(B), \ 8177341825Sdim -(__v2df)(__m128d)(C), (__mmask8)-1, \ 8178341825Sdim (int)(R)) 8179341825Sdim 8180341825Sdim#define _mm_mask_fnmsub_round_sd(W, U, A, B, R) \ 8181314564Sdim (__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(W), \ 8182314564Sdim -(__v2df)(__m128d)(A), \ 8183314564Sdim -(__v2df)(__m128d)(B), (__mmask8)(U), \ 8184341825Sdim (int)(R)) 8185309124Sdim 8186341825Sdimstatic __inline__ __m128d __DEFAULT_FN_ATTRS128 8187309124Sdim_mm_maskz_fnmsub_sd (__mmask8 __U, __m128d __A, __m128d __B, __m128d __C) 8188309124Sdim{ 8189341825Sdim return __builtin_ia32_vfmaddsd3_maskz((__v2df)__A, 8190341825Sdim -(__v2df)__B, 8191341825Sdim -(__v2df)__C, 8192341825Sdim (__mmask8)__U, 8193341825Sdim _MM_FROUND_CUR_DIRECTION); 8194309124Sdim} 8195309124Sdim 8196341825Sdim#define _mm_maskz_fnmsub_round_sd(U, A, B, C, R) \ 8197341825Sdim (__m128d)__builtin_ia32_vfmaddsd3_maskz((__v2df)(__m128d)(A), \ 8198341825Sdim -(__v2df)(__m128d)(B), \ 8199309124Sdim -(__v2df)(__m128d)(C), \ 8200309124Sdim (__mmask8)(U), \ 8201341825Sdim (int)(R)) 8202309124Sdim 8203341825Sdimstatic __inline__ __m128d __DEFAULT_FN_ATTRS128 8204309124Sdim_mm_mask3_fnmsub_sd (__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U) 8205309124Sdim{ 8206341825Sdim return __builtin_ia32_vfmsubsd3_mask3((__v2df)__W, 8207341825Sdim -(__v2df)__X, 8208341825Sdim (__v2df)__Y, 8209341825Sdim (__mmask8)__U, 8210341825Sdim _MM_FROUND_CUR_DIRECTION); 8211309124Sdim} 8212309124Sdim 8213341825Sdim#define _mm_mask3_fnmsub_round_sd(W, X, Y, U, R) \ 8214341825Sdim (__m128d)__builtin_ia32_vfmsubsd3_mask3((__v2df)(__m128d)(W), \ 8215341825Sdim -(__v2df)(__m128d)(X), \ 8216314564Sdim (__v2df)(__m128d)(Y), \ 8217341825Sdim (__mmask8)(U), (int)(R)) 8218309124Sdim 8219341825Sdim#define _mm512_permutex_pd(X, C) \ 8220341825Sdim (__m512d)__builtin_ia32_permdf512((__v8df)(__m512d)(X), (int)(C)) 8221309124Sdim 8222341825Sdim#define _mm512_mask_permutex_pd(W, U, X, C) \ 8223309124Sdim (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ 8224309124Sdim (__v8df)_mm512_permutex_pd((X), (C)), \ 8225341825Sdim (__v8df)(__m512d)(W)) 8226309124Sdim 8227341825Sdim#define _mm512_maskz_permutex_pd(U, X, C) \ 8228309124Sdim (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ 8229309124Sdim (__v8df)_mm512_permutex_pd((X), (C)), \ 8230341825Sdim (__v8df)_mm512_setzero_pd()) 8231309124Sdim 8232341825Sdim#define _mm512_permutex_epi64(X, C) \ 8233341825Sdim (__m512i)__builtin_ia32_permdi512((__v8di)(__m512i)(X), (int)(C)) 8234309124Sdim 8235341825Sdim#define _mm512_mask_permutex_epi64(W, U, X, C) \ 8236309124Sdim (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \ 8237309124Sdim (__v8di)_mm512_permutex_epi64((X), (C)), \ 8238341825Sdim (__v8di)(__m512i)(W)) 8239309124Sdim 8240341825Sdim#define _mm512_maskz_permutex_epi64(U, X, C) \ 8241309124Sdim (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \ 8242309124Sdim (__v8di)_mm512_permutex_epi64((X), (C)), \ 8243341825Sdim (__v8di)_mm512_setzero_si512()) 8244309124Sdim 8245341825Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS512 8246309124Sdim_mm512_permutexvar_pd (__m512i __X, __m512d __Y) 8247309124Sdim{ 8248341825Sdim return (__m512d)__builtin_ia32_permvardf512((__v8df) __Y, (__v8di) __X); 8249309124Sdim} 8250309124Sdim 8251341825Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS512 8252309124Sdim_mm512_mask_permutexvar_pd (__m512d __W, __mmask8 __U, __m512i __X, __m512d __Y) 8253309124Sdim{ 8254341825Sdim return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, 8255341825Sdim (__v8df)_mm512_permutexvar_pd(__X, __Y), 8256341825Sdim (__v8df)__W); 8257309124Sdim} 8258309124Sdim 8259341825Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS512 8260309124Sdim_mm512_maskz_permutexvar_pd (__mmask8 __U, __m512i __X, __m512d __Y) 8261309124Sdim{ 8262341825Sdim return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, 8263341825Sdim (__v8df)_mm512_permutexvar_pd(__X, __Y), 8264341825Sdim (__v8df)_mm512_setzero_pd()); 8265309124Sdim} 8266309124Sdim 8267341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 8268341825Sdim_mm512_permutexvar_epi64 (__m512i __X, __m512i __Y) 8269309124Sdim{ 8270341825Sdim return (__m512i)__builtin_ia32_permvardi512((__v8di)__Y, (__v8di)__X); 8271309124Sdim} 8272309124Sdim 8273341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 8274341825Sdim_mm512_maskz_permutexvar_epi64 (__mmask8 __M, __m512i __X, __m512i __Y) 8275309124Sdim{ 8276341825Sdim return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M, 8277341825Sdim (__v8di)_mm512_permutexvar_epi64(__X, __Y), 8278341825Sdim (__v8di)_mm512_setzero_si512()); 8279309124Sdim} 8280309124Sdim 8281341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 8282309124Sdim_mm512_mask_permutexvar_epi64 (__m512i __W, __mmask8 __M, __m512i __X, 8283309124Sdim __m512i __Y) 8284309124Sdim{ 8285341825Sdim return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M, 8286341825Sdim (__v8di)_mm512_permutexvar_epi64(__X, __Y), 8287341825Sdim (__v8di)__W); 8288309124Sdim} 8289309124Sdim 8290341825Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS512 8291309124Sdim_mm512_permutexvar_ps (__m512i __X, __m512 __Y) 8292309124Sdim{ 8293341825Sdim return (__m512)__builtin_ia32_permvarsf512((__v16sf)__Y, (__v16si)__X); 8294309124Sdim} 8295309124Sdim 8296341825Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS512 8297309124Sdim_mm512_mask_permutexvar_ps (__m512 __W, __mmask16 __U, __m512i __X, __m512 __Y) 8298309124Sdim{ 8299341825Sdim return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, 8300341825Sdim (__v16sf)_mm512_permutexvar_ps(__X, __Y), 8301341825Sdim (__v16sf)__W); 8302309124Sdim} 8303309124Sdim 8304341825Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS512 8305309124Sdim_mm512_maskz_permutexvar_ps (__mmask16 __U, __m512i __X, __m512 __Y) 8306309124Sdim{ 8307341825Sdim return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, 8308341825Sdim (__v16sf)_mm512_permutexvar_ps(__X, __Y), 8309341825Sdim (__v16sf)_mm512_setzero_ps()); 8310309124Sdim} 8311309124Sdim 8312341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 8313341825Sdim_mm512_permutexvar_epi32 (__m512i __X, __m512i __Y) 8314309124Sdim{ 8315341825Sdim return (__m512i)__builtin_ia32_permvarsi512((__v16si)__Y, (__v16si)__X); 8316309124Sdim} 8317309124Sdim 8318341825Sdim#define _mm512_permutevar_epi32 _mm512_permutexvar_epi32 8319341825Sdim 8320341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 8321341825Sdim_mm512_maskz_permutexvar_epi32 (__mmask16 __M, __m512i __X, __m512i __Y) 8322309124Sdim{ 8323341825Sdim return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M, 8324341825Sdim (__v16si)_mm512_permutexvar_epi32(__X, __Y), 8325341825Sdim (__v16si)_mm512_setzero_si512()); 8326309124Sdim} 8327309124Sdim 8328341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 8329309124Sdim_mm512_mask_permutexvar_epi32 (__m512i __W, __mmask16 __M, __m512i __X, 8330309124Sdim __m512i __Y) 8331309124Sdim{ 8332341825Sdim return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M, 8333341825Sdim (__v16si)_mm512_permutexvar_epi32(__X, __Y), 8334341825Sdim (__v16si)__W); 8335309124Sdim} 8336309124Sdim 8337321369Sdim#define _mm512_mask_permutevar_epi32 _mm512_mask_permutexvar_epi32 8338321369Sdim 8339344779Sdimstatic __inline__ __mmask16 __DEFAULT_FN_ATTRS 8340309124Sdim_mm512_kand (__mmask16 __A, __mmask16 __B) 8341309124Sdim{ 8342309124Sdim return (__mmask16) __builtin_ia32_kandhi ((__mmask16) __A, (__mmask16) __B); 8343309124Sdim} 8344309124Sdim 8345344779Sdimstatic __inline__ __mmask16 __DEFAULT_FN_ATTRS 8346309124Sdim_mm512_kandn (__mmask16 __A, __mmask16 __B) 8347309124Sdim{ 8348309124Sdim return (__mmask16) __builtin_ia32_kandnhi ((__mmask16) __A, (__mmask16) __B); 8349309124Sdim} 8350309124Sdim 8351344779Sdimstatic __inline__ __mmask16 __DEFAULT_FN_ATTRS 8352309124Sdim_mm512_kor (__mmask16 __A, __mmask16 __B) 8353309124Sdim{ 8354309124Sdim return (__mmask16) __builtin_ia32_korhi ((__mmask16) __A, (__mmask16) __B); 8355309124Sdim} 8356309124Sdim 8357344779Sdimstatic __inline__ int __DEFAULT_FN_ATTRS 8358309124Sdim_mm512_kortestc (__mmask16 __A, __mmask16 __B) 8359309124Sdim{ 8360309124Sdim return __builtin_ia32_kortestchi ((__mmask16) __A, (__mmask16) __B); 8361309124Sdim} 8362309124Sdim 8363344779Sdimstatic __inline__ int __DEFAULT_FN_ATTRS 8364309124Sdim_mm512_kortestz (__mmask16 __A, __mmask16 __B) 8365309124Sdim{ 8366309124Sdim return __builtin_ia32_kortestzhi ((__mmask16) __A, (__mmask16) __B); 8367309124Sdim} 8368309124Sdim 8369344779Sdimstatic __inline__ unsigned char __DEFAULT_FN_ATTRS 8370344779Sdim_kortestc_mask16_u8(__mmask16 __A, __mmask16 __B) 8371344779Sdim{ 8372344779Sdim return (unsigned char)__builtin_ia32_kortestchi(__A, __B); 8373344779Sdim} 8374344779Sdim 8375344779Sdimstatic __inline__ unsigned char __DEFAULT_FN_ATTRS 8376344779Sdim_kortestz_mask16_u8(__mmask16 __A, __mmask16 __B) 8377344779Sdim{ 8378344779Sdim return (unsigned char)__builtin_ia32_kortestzhi(__A, __B); 8379344779Sdim} 8380344779Sdim 8381344779Sdimstatic __inline__ unsigned char __DEFAULT_FN_ATTRS 8382344779Sdim_kortest_mask16_u8(__mmask16 __A, __mmask16 __B, unsigned char *__C) { 8383344779Sdim *__C = (unsigned char)__builtin_ia32_kortestchi(__A, __B); 8384344779Sdim return (unsigned char)__builtin_ia32_kortestzhi(__A, __B); 8385344779Sdim} 8386344779Sdim 8387344779Sdimstatic __inline__ __mmask16 __DEFAULT_FN_ATTRS 8388309124Sdim_mm512_kunpackb (__mmask16 __A, __mmask16 __B) 8389309124Sdim{ 8390329410Sdim return (__mmask16) __builtin_ia32_kunpckhi ((__mmask16) __A, (__mmask16) __B); 8391309124Sdim} 8392309124Sdim 8393344779Sdimstatic __inline__ __mmask16 __DEFAULT_FN_ATTRS 8394309124Sdim_mm512_kxnor (__mmask16 __A, __mmask16 __B) 8395309124Sdim{ 8396309124Sdim return (__mmask16) __builtin_ia32_kxnorhi ((__mmask16) __A, (__mmask16) __B); 8397309124Sdim} 8398309124Sdim 8399344779Sdimstatic __inline__ __mmask16 __DEFAULT_FN_ATTRS 8400309124Sdim_mm512_kxor (__mmask16 __A, __mmask16 __B) 8401309124Sdim{ 8402309124Sdim return (__mmask16) __builtin_ia32_kxorhi ((__mmask16) __A, (__mmask16) __B); 8403309124Sdim} 8404309124Sdim 8405344779Sdim#define _kand_mask16 _mm512_kand 8406344779Sdim#define _kandn_mask16 _mm512_kandn 8407344779Sdim#define _knot_mask16 _mm512_knot 8408344779Sdim#define _kor_mask16 _mm512_kor 8409344779Sdim#define _kxnor_mask16 _mm512_kxnor 8410344779Sdim#define _kxor_mask16 _mm512_kxor 8411344779Sdim 8412344779Sdim#define _kshiftli_mask16(A, I) \ 8413344779Sdim (__mmask16)__builtin_ia32_kshiftlihi((__mmask16)(A), (unsigned int)(I)) 8414344779Sdim 8415344779Sdim#define _kshiftri_mask16(A, I) \ 8416344779Sdim (__mmask16)__builtin_ia32_kshiftrihi((__mmask16)(A), (unsigned int)(I)) 8417344779Sdim 8418344779Sdimstatic __inline__ unsigned int __DEFAULT_FN_ATTRS 8419344779Sdim_cvtmask16_u32(__mmask16 __A) { 8420344779Sdim return (unsigned int)__builtin_ia32_kmovw((__mmask16)__A); 8421344779Sdim} 8422344779Sdim 8423344779Sdimstatic __inline__ __mmask16 __DEFAULT_FN_ATTRS 8424344779Sdim_cvtu32_mask16(unsigned int __A) { 8425344779Sdim return (__mmask16)__builtin_ia32_kmovw((__mmask16)__A); 8426344779Sdim} 8427344779Sdim 8428344779Sdimstatic __inline__ __mmask16 __DEFAULT_FN_ATTRS 8429344779Sdim_load_mask16(__mmask16 *__A) { 8430344779Sdim return (__mmask16)__builtin_ia32_kmovw(*(__mmask16 *)__A); 8431344779Sdim} 8432344779Sdim 8433344779Sdimstatic __inline__ void __DEFAULT_FN_ATTRS 8434344779Sdim_store_mask16(__mmask16 *__A, __mmask16 __B) { 8435344779Sdim *(__mmask16 *)__A = __builtin_ia32_kmovw((__mmask16)__B); 8436344779Sdim} 8437344779Sdim 8438341825Sdimstatic __inline__ void __DEFAULT_FN_ATTRS512 8439360784Sdim_mm512_stream_si512 (void * __P, __m512i __A) 8440309124Sdim{ 8441322320Sdim typedef __v8di __v8di_aligned __attribute__((aligned(64))); 8442322320Sdim __builtin_nontemporal_store((__v8di_aligned)__A, (__v8di_aligned*)__P); 8443309124Sdim} 8444309124Sdim 8445341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 8446327952Sdim_mm512_stream_load_si512 (void const *__P) 8447309124Sdim{ 8448322320Sdim typedef __v8di __v8di_aligned __attribute__((aligned(64))); 8449322320Sdim return (__m512i) __builtin_nontemporal_load((const __v8di_aligned *)__P); 8450309124Sdim} 8451309124Sdim 8452341825Sdimstatic __inline__ void __DEFAULT_FN_ATTRS512 8453360784Sdim_mm512_stream_pd (void *__P, __m512d __A) 8454309124Sdim{ 8455322320Sdim typedef __v8df __v8df_aligned __attribute__((aligned(64))); 8456322320Sdim __builtin_nontemporal_store((__v8df_aligned)__A, (__v8df_aligned*)__P); 8457309124Sdim} 8458309124Sdim 8459341825Sdimstatic __inline__ void __DEFAULT_FN_ATTRS512 8460360784Sdim_mm512_stream_ps (void *__P, __m512 __A) 8461309124Sdim{ 8462322320Sdim typedef __v16sf __v16sf_aligned __attribute__((aligned(64))); 8463322320Sdim __builtin_nontemporal_store((__v16sf_aligned)__A, (__v16sf_aligned*)__P); 8464309124Sdim} 8465309124Sdim 8466341825Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS512 8467309124Sdim_mm512_mask_compress_pd (__m512d __W, __mmask8 __U, __m512d __A) 8468309124Sdim{ 8469309124Sdim return (__m512d) __builtin_ia32_compressdf512_mask ((__v8df) __A, 8470309124Sdim (__v8df) __W, 8471309124Sdim (__mmask8) __U); 8472309124Sdim} 8473309124Sdim 8474341825Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS512 8475309124Sdim_mm512_maskz_compress_pd (__mmask8 __U, __m512d __A) 8476309124Sdim{ 8477309124Sdim return (__m512d) __builtin_ia32_compressdf512_mask ((__v8df) __A, 8478309124Sdim (__v8df) 8479309124Sdim _mm512_setzero_pd (), 8480309124Sdim (__mmask8) __U); 8481309124Sdim} 8482309124Sdim 8483341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 8484309124Sdim_mm512_mask_compress_epi64 (__m512i __W, __mmask8 __U, __m512i __A) 8485309124Sdim{ 8486309124Sdim return (__m512i) __builtin_ia32_compressdi512_mask ((__v8di) __A, 8487309124Sdim (__v8di) __W, 8488309124Sdim (__mmask8) __U); 8489309124Sdim} 8490309124Sdim 8491341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 8492309124Sdim_mm512_maskz_compress_epi64 (__mmask8 __U, __m512i __A) 8493309124Sdim{ 8494309124Sdim return (__m512i) __builtin_ia32_compressdi512_mask ((__v8di) __A, 8495309124Sdim (__v8di) 8496309124Sdim _mm512_setzero_si512 (), 8497309124Sdim (__mmask8) __U); 8498309124Sdim} 8499309124Sdim 8500341825Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS512 8501309124Sdim_mm512_mask_compress_ps (__m512 __W, __mmask16 __U, __m512 __A) 8502309124Sdim{ 8503309124Sdim return (__m512) __builtin_ia32_compresssf512_mask ((__v16sf) __A, 8504309124Sdim (__v16sf) __W, 8505309124Sdim (__mmask16) __U); 8506309124Sdim} 8507309124Sdim 8508341825Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS512 8509309124Sdim_mm512_maskz_compress_ps (__mmask16 __U, __m512 __A) 8510309124Sdim{ 8511309124Sdim return (__m512) __builtin_ia32_compresssf512_mask ((__v16sf) __A, 8512309124Sdim (__v16sf) 8513309124Sdim _mm512_setzero_ps (), 8514309124Sdim (__mmask16) __U); 8515309124Sdim} 8516309124Sdim 8517341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 8518309124Sdim_mm512_mask_compress_epi32 (__m512i __W, __mmask16 __U, __m512i __A) 8519309124Sdim{ 8520309124Sdim return (__m512i) __builtin_ia32_compresssi512_mask ((__v16si) __A, 8521309124Sdim (__v16si) __W, 8522309124Sdim (__mmask16) __U); 8523309124Sdim} 8524309124Sdim 8525341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 8526309124Sdim_mm512_maskz_compress_epi32 (__mmask16 __U, __m512i __A) 8527309124Sdim{ 8528309124Sdim return (__m512i) __builtin_ia32_compresssi512_mask ((__v16si) __A, 8529309124Sdim (__v16si) 8530309124Sdim _mm512_setzero_si512 (), 8531309124Sdim (__mmask16) __U); 8532309124Sdim} 8533309124Sdim 8534341825Sdim#define _mm_cmp_round_ss_mask(X, Y, P, R) \ 8535309124Sdim (__mmask8)__builtin_ia32_cmpss_mask((__v4sf)(__m128)(X), \ 8536309124Sdim (__v4sf)(__m128)(Y), (int)(P), \ 8537341825Sdim (__mmask8)-1, (int)(R)) 8538309124Sdim 8539341825Sdim#define _mm_mask_cmp_round_ss_mask(M, X, Y, P, R) \ 8540309124Sdim (__mmask8)__builtin_ia32_cmpss_mask((__v4sf)(__m128)(X), \ 8541309124Sdim (__v4sf)(__m128)(Y), (int)(P), \ 8542341825Sdim (__mmask8)(M), (int)(R)) 8543309124Sdim 8544341825Sdim#define _mm_cmp_ss_mask(X, Y, P) \ 8545309124Sdim (__mmask8)__builtin_ia32_cmpss_mask((__v4sf)(__m128)(X), \ 8546309124Sdim (__v4sf)(__m128)(Y), (int)(P), \ 8547309124Sdim (__mmask8)-1, \ 8548341825Sdim _MM_FROUND_CUR_DIRECTION) 8549309124Sdim 8550341825Sdim#define _mm_mask_cmp_ss_mask(M, X, Y, P) \ 8551309124Sdim (__mmask8)__builtin_ia32_cmpss_mask((__v4sf)(__m128)(X), \ 8552309124Sdim (__v4sf)(__m128)(Y), (int)(P), \ 8553309124Sdim (__mmask8)(M), \ 8554341825Sdim _MM_FROUND_CUR_DIRECTION) 8555309124Sdim 8556341825Sdim#define _mm_cmp_round_sd_mask(X, Y, P, R) \ 8557309124Sdim (__mmask8)__builtin_ia32_cmpsd_mask((__v2df)(__m128d)(X), \ 8558309124Sdim (__v2df)(__m128d)(Y), (int)(P), \ 8559341825Sdim (__mmask8)-1, (int)(R)) 8560309124Sdim 8561341825Sdim#define _mm_mask_cmp_round_sd_mask(M, X, Y, P, R) \ 8562309124Sdim (__mmask8)__builtin_ia32_cmpsd_mask((__v2df)(__m128d)(X), \ 8563309124Sdim (__v2df)(__m128d)(Y), (int)(P), \ 8564341825Sdim (__mmask8)(M), (int)(R)) 8565309124Sdim 8566341825Sdim#define _mm_cmp_sd_mask(X, Y, P) \ 8567309124Sdim (__mmask8)__builtin_ia32_cmpsd_mask((__v2df)(__m128d)(X), \ 8568309124Sdim (__v2df)(__m128d)(Y), (int)(P), \ 8569309124Sdim (__mmask8)-1, \ 8570341825Sdim _MM_FROUND_CUR_DIRECTION) 8571309124Sdim 8572341825Sdim#define _mm_mask_cmp_sd_mask(M, X, Y, P) \ 8573309124Sdim (__mmask8)__builtin_ia32_cmpsd_mask((__v2df)(__m128d)(X), \ 8574309124Sdim (__v2df)(__m128d)(Y), (int)(P), \ 8575309124Sdim (__mmask8)(M), \ 8576341825Sdim _MM_FROUND_CUR_DIRECTION) 8577309124Sdim 8578327952Sdim/* Bit Test */ 8579327952Sdim 8580341825Sdimstatic __inline __mmask16 __DEFAULT_FN_ATTRS512 8581327952Sdim_mm512_test_epi32_mask (__m512i __A, __m512i __B) 8582327952Sdim{ 8583327952Sdim return _mm512_cmpneq_epi32_mask (_mm512_and_epi32(__A, __B), 8584341825Sdim _mm512_setzero_si512()); 8585327952Sdim} 8586327952Sdim 8587341825Sdimstatic __inline__ __mmask16 __DEFAULT_FN_ATTRS512 8588327952Sdim_mm512_mask_test_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B) 8589327952Sdim{ 8590327952Sdim return _mm512_mask_cmpneq_epi32_mask (__U, _mm512_and_epi32 (__A, __B), 8591341825Sdim _mm512_setzero_si512()); 8592327952Sdim} 8593327952Sdim 8594341825Sdimstatic __inline __mmask8 __DEFAULT_FN_ATTRS512 8595327952Sdim_mm512_test_epi64_mask (__m512i __A, __m512i __B) 8596327952Sdim{ 8597327952Sdim return _mm512_cmpneq_epi64_mask (_mm512_and_epi32 (__A, __B), 8598341825Sdim _mm512_setzero_si512()); 8599327952Sdim} 8600327952Sdim 8601341825Sdimstatic __inline__ __mmask8 __DEFAULT_FN_ATTRS512 8602327952Sdim_mm512_mask_test_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B) 8603327952Sdim{ 8604327952Sdim return _mm512_mask_cmpneq_epi64_mask (__U, _mm512_and_epi32 (__A, __B), 8605341825Sdim _mm512_setzero_si512()); 8606327952Sdim} 8607327952Sdim 8608341825Sdimstatic __inline__ __mmask16 __DEFAULT_FN_ATTRS512 8609327952Sdim_mm512_testn_epi32_mask (__m512i __A, __m512i __B) 8610327952Sdim{ 8611327952Sdim return _mm512_cmpeq_epi32_mask (_mm512_and_epi32 (__A, __B), 8612341825Sdim _mm512_setzero_si512()); 8613327952Sdim} 8614327952Sdim 8615341825Sdimstatic __inline__ __mmask16 __DEFAULT_FN_ATTRS512 8616327952Sdim_mm512_mask_testn_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B) 8617327952Sdim{ 8618327952Sdim return _mm512_mask_cmpeq_epi32_mask (__U, _mm512_and_epi32 (__A, __B), 8619341825Sdim _mm512_setzero_si512()); 8620327952Sdim} 8621327952Sdim 8622341825Sdimstatic __inline__ __mmask8 __DEFAULT_FN_ATTRS512 8623327952Sdim_mm512_testn_epi64_mask (__m512i __A, __m512i __B) 8624327952Sdim{ 8625327952Sdim return _mm512_cmpeq_epi64_mask (_mm512_and_epi32 (__A, __B), 8626341825Sdim _mm512_setzero_si512()); 8627327952Sdim} 8628327952Sdim 8629341825Sdimstatic __inline__ __mmask8 __DEFAULT_FN_ATTRS512 8630327952Sdim_mm512_mask_testn_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B) 8631327952Sdim{ 8632327952Sdim return _mm512_mask_cmpeq_epi64_mask (__U, _mm512_and_epi32 (__A, __B), 8633341825Sdim _mm512_setzero_si512()); 8634327952Sdim} 8635327952Sdim 8636341825Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS512 8637309124Sdim_mm512_movehdup_ps (__m512 __A) 8638309124Sdim{ 8639309124Sdim return (__m512)__builtin_shufflevector((__v16sf)__A, (__v16sf)__A, 8640309124Sdim 1, 1, 3, 3, 5, 5, 7, 7, 9, 9, 11, 11, 13, 13, 15, 15); 8641309124Sdim} 8642309124Sdim 8643341825Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS512 8644309124Sdim_mm512_mask_movehdup_ps (__m512 __W, __mmask16 __U, __m512 __A) 8645309124Sdim{ 8646309124Sdim return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, 8647309124Sdim (__v16sf)_mm512_movehdup_ps(__A), 8648309124Sdim (__v16sf)__W); 8649309124Sdim} 8650309124Sdim 8651341825Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS512 8652309124Sdim_mm512_maskz_movehdup_ps (__mmask16 __U, __m512 __A) 8653309124Sdim{ 8654309124Sdim return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, 8655309124Sdim (__v16sf)_mm512_movehdup_ps(__A), 8656309124Sdim (__v16sf)_mm512_setzero_ps()); 8657309124Sdim} 8658309124Sdim 8659341825Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS512 8660309124Sdim_mm512_moveldup_ps (__m512 __A) 8661309124Sdim{ 8662309124Sdim return (__m512)__builtin_shufflevector((__v16sf)__A, (__v16sf)__A, 8663309124Sdim 0, 0, 2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12, 14, 14); 8664309124Sdim} 8665309124Sdim 8666341825Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS512 8667309124Sdim_mm512_mask_moveldup_ps (__m512 __W, __mmask16 __U, __m512 __A) 8668309124Sdim{ 8669309124Sdim return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, 8670309124Sdim (__v16sf)_mm512_moveldup_ps(__A), 8671309124Sdim (__v16sf)__W); 8672309124Sdim} 8673309124Sdim 8674341825Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS512 8675309124Sdim_mm512_maskz_moveldup_ps (__mmask16 __U, __m512 __A) 8676309124Sdim{ 8677309124Sdim return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, 8678309124Sdim (__v16sf)_mm512_moveldup_ps(__A), 8679309124Sdim (__v16sf)_mm512_setzero_ps()); 8680309124Sdim} 8681309124Sdim 8682341825Sdimstatic __inline__ __m128 __DEFAULT_FN_ATTRS128 8683314564Sdim_mm_mask_move_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) 8684314564Sdim{ 8685341825Sdim return __builtin_ia32_selectss_128(__U, _mm_move_ss(__A, __B), __W); 8686314564Sdim} 8687314564Sdim 8688341825Sdimstatic __inline__ __m128 __DEFAULT_FN_ATTRS128 8689314564Sdim_mm_maskz_move_ss (__mmask8 __U, __m128 __A, __m128 __B) 8690314564Sdim{ 8691341825Sdim return __builtin_ia32_selectss_128(__U, _mm_move_ss(__A, __B), 8692341825Sdim _mm_setzero_ps()); 8693314564Sdim} 8694314564Sdim 8695341825Sdimstatic __inline__ __m128d __DEFAULT_FN_ATTRS128 8696314564Sdim_mm_mask_move_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) 8697314564Sdim{ 8698341825Sdim return __builtin_ia32_selectsd_128(__U, _mm_move_sd(__A, __B), __W); 8699314564Sdim} 8700314564Sdim 8701341825Sdimstatic __inline__ __m128d __DEFAULT_FN_ATTRS128 8702314564Sdim_mm_maskz_move_sd (__mmask8 __U, __m128d __A, __m128d __B) 8703314564Sdim{ 8704341825Sdim return __builtin_ia32_selectsd_128(__U, _mm_move_sd(__A, __B), 8705341825Sdim _mm_setzero_pd()); 8706314564Sdim} 8707314564Sdim 8708341825Sdimstatic __inline__ void __DEFAULT_FN_ATTRS128 8709314564Sdim_mm_mask_store_ss (float * __W, __mmask8 __U, __m128 __A) 8710314564Sdim{ 8711341825Sdim __builtin_ia32_storess128_mask ((__v4sf *)__W, __A, __U & 1); 8712314564Sdim} 8713314564Sdim 8714341825Sdimstatic __inline__ void __DEFAULT_FN_ATTRS128 8715314564Sdim_mm_mask_store_sd (double * __W, __mmask8 __U, __m128d __A) 8716314564Sdim{ 8717341825Sdim __builtin_ia32_storesd128_mask ((__v2df *)__W, __A, __U & 1); 8718314564Sdim} 8719314564Sdim 8720341825Sdimstatic __inline__ __m128 __DEFAULT_FN_ATTRS128 8721314564Sdim_mm_mask_load_ss (__m128 __W, __mmask8 __U, const float* __A) 8722314564Sdim{ 8723314564Sdim __m128 src = (__v4sf) __builtin_shufflevector((__v4sf) __W, 8724341825Sdim (__v4sf)_mm_setzero_ps(), 8725314564Sdim 0, 4, 4, 4); 8726314564Sdim 8727360784Sdim return (__m128) __builtin_ia32_loadss128_mask ((const __v4sf *) __A, src, __U & 1); 8728314564Sdim} 8729314564Sdim 8730341825Sdimstatic __inline__ __m128 __DEFAULT_FN_ATTRS128 8731314564Sdim_mm_maskz_load_ss (__mmask8 __U, const float* __A) 8732314564Sdim{ 8733360784Sdim return (__m128)__builtin_ia32_loadss128_mask ((const __v4sf *) __A, 8734341825Sdim (__v4sf) _mm_setzero_ps(), 8735341825Sdim __U & 1); 8736314564Sdim} 8737314564Sdim 8738341825Sdimstatic __inline__ __m128d __DEFAULT_FN_ATTRS128 8739314564Sdim_mm_mask_load_sd (__m128d __W, __mmask8 __U, const double* __A) 8740314564Sdim{ 8741314564Sdim __m128d src = (__v2df) __builtin_shufflevector((__v2df) __W, 8742341825Sdim (__v2df)_mm_setzero_pd(), 8743341825Sdim 0, 2); 8744314564Sdim 8745360784Sdim return (__m128d) __builtin_ia32_loadsd128_mask ((const __v2df *) __A, src, __U & 1); 8746314564Sdim} 8747314564Sdim 8748341825Sdimstatic __inline__ __m128d __DEFAULT_FN_ATTRS128 8749314564Sdim_mm_maskz_load_sd (__mmask8 __U, const double* __A) 8750314564Sdim{ 8751360784Sdim return (__m128d) __builtin_ia32_loadsd128_mask ((const __v2df *) __A, 8752341825Sdim (__v2df) _mm_setzero_pd(), 8753341825Sdim __U & 1); 8754314564Sdim} 8755314564Sdim 8756341825Sdim#define _mm512_shuffle_epi32(A, I) \ 8757341825Sdim (__m512i)__builtin_ia32_pshufd512((__v16si)(__m512i)(A), (int)(I)) 8758309124Sdim 8759341825Sdim#define _mm512_mask_shuffle_epi32(W, U, A, I) \ 8760309124Sdim (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \ 8761309124Sdim (__v16si)_mm512_shuffle_epi32((A), (I)), \ 8762341825Sdim (__v16si)(__m512i)(W)) 8763309124Sdim 8764341825Sdim#define _mm512_maskz_shuffle_epi32(U, A, I) \ 8765309124Sdim (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \ 8766309124Sdim (__v16si)_mm512_shuffle_epi32((A), (I)), \ 8767341825Sdim (__v16si)_mm512_setzero_si512()) 8768309124Sdim 8769341825Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS512 8770309124Sdim_mm512_mask_expand_pd (__m512d __W, __mmask8 __U, __m512d __A) 8771309124Sdim{ 8772309124Sdim return (__m512d) __builtin_ia32_expanddf512_mask ((__v8df) __A, 8773309124Sdim (__v8df) __W, 8774309124Sdim (__mmask8) __U); 8775309124Sdim} 8776309124Sdim 8777341825Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS512 8778309124Sdim_mm512_maskz_expand_pd (__mmask8 __U, __m512d __A) 8779309124Sdim{ 8780309124Sdim return (__m512d) __builtin_ia32_expanddf512_mask ((__v8df) __A, 8781309124Sdim (__v8df) _mm512_setzero_pd (), 8782309124Sdim (__mmask8) __U); 8783309124Sdim} 8784309124Sdim 8785341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 8786309124Sdim_mm512_mask_expand_epi64 (__m512i __W, __mmask8 __U, __m512i __A) 8787309124Sdim{ 8788309124Sdim return (__m512i) __builtin_ia32_expanddi512_mask ((__v8di) __A, 8789309124Sdim (__v8di) __W, 8790309124Sdim (__mmask8) __U); 8791309124Sdim} 8792309124Sdim 8793341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 8794309124Sdim_mm512_maskz_expand_epi64 ( __mmask8 __U, __m512i __A) 8795309124Sdim{ 8796309124Sdim return (__m512i) __builtin_ia32_expanddi512_mask ((__v8di) __A, 8797341825Sdim (__v8di) _mm512_setzero_si512 (), 8798309124Sdim (__mmask8) __U); 8799309124Sdim} 8800309124Sdim 8801341825Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS512 8802309124Sdim_mm512_mask_expandloadu_pd(__m512d __W, __mmask8 __U, void const *__P) 8803309124Sdim{ 8804309124Sdim return (__m512d) __builtin_ia32_expandloaddf512_mask ((const __v8df *)__P, 8805309124Sdim (__v8df) __W, 8806309124Sdim (__mmask8) __U); 8807309124Sdim} 8808309124Sdim 8809341825Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS512 8810309124Sdim_mm512_maskz_expandloadu_pd(__mmask8 __U, void const *__P) 8811309124Sdim{ 8812309124Sdim return (__m512d) __builtin_ia32_expandloaddf512_mask ((const __v8df *)__P, 8813309124Sdim (__v8df) _mm512_setzero_pd(), 8814309124Sdim (__mmask8) __U); 8815309124Sdim} 8816309124Sdim 8817341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 8818309124Sdim_mm512_mask_expandloadu_epi64(__m512i __W, __mmask8 __U, void const *__P) 8819309124Sdim{ 8820309124Sdim return (__m512i) __builtin_ia32_expandloaddi512_mask ((const __v8di *)__P, 8821309124Sdim (__v8di) __W, 8822309124Sdim (__mmask8) __U); 8823309124Sdim} 8824309124Sdim 8825341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 8826309124Sdim_mm512_maskz_expandloadu_epi64(__mmask8 __U, void const *__P) 8827309124Sdim{ 8828309124Sdim return (__m512i) __builtin_ia32_expandloaddi512_mask ((const __v8di *)__P, 8829341825Sdim (__v8di) _mm512_setzero_si512(), 8830309124Sdim (__mmask8) __U); 8831309124Sdim} 8832309124Sdim 8833341825Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS512 8834309124Sdim_mm512_mask_expandloadu_ps(__m512 __W, __mmask16 __U, void const *__P) 8835309124Sdim{ 8836309124Sdim return (__m512) __builtin_ia32_expandloadsf512_mask ((const __v16sf *)__P, 8837309124Sdim (__v16sf) __W, 8838309124Sdim (__mmask16) __U); 8839309124Sdim} 8840309124Sdim 8841341825Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS512 8842309124Sdim_mm512_maskz_expandloadu_ps(__mmask16 __U, void const *__P) 8843309124Sdim{ 8844309124Sdim return (__m512) __builtin_ia32_expandloadsf512_mask ((const __v16sf *)__P, 8845309124Sdim (__v16sf) _mm512_setzero_ps(), 8846309124Sdim (__mmask16) __U); 8847309124Sdim} 8848309124Sdim 8849341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 8850309124Sdim_mm512_mask_expandloadu_epi32(__m512i __W, __mmask16 __U, void const *__P) 8851309124Sdim{ 8852309124Sdim return (__m512i) __builtin_ia32_expandloadsi512_mask ((const __v16si *)__P, 8853309124Sdim (__v16si) __W, 8854309124Sdim (__mmask16) __U); 8855309124Sdim} 8856309124Sdim 8857341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 8858309124Sdim_mm512_maskz_expandloadu_epi32(__mmask16 __U, void const *__P) 8859309124Sdim{ 8860309124Sdim return (__m512i) __builtin_ia32_expandloadsi512_mask ((const __v16si *)__P, 8861341825Sdim (__v16si) _mm512_setzero_si512(), 8862309124Sdim (__mmask16) __U); 8863309124Sdim} 8864309124Sdim 8865341825Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS512 8866309124Sdim_mm512_mask_expand_ps (__m512 __W, __mmask16 __U, __m512 __A) 8867309124Sdim{ 8868309124Sdim return (__m512) __builtin_ia32_expandsf512_mask ((__v16sf) __A, 8869309124Sdim (__v16sf) __W, 8870309124Sdim (__mmask16) __U); 8871309124Sdim} 8872309124Sdim 8873341825Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS512 8874309124Sdim_mm512_maskz_expand_ps (__mmask16 __U, __m512 __A) 8875309124Sdim{ 8876309124Sdim return (__m512) __builtin_ia32_expandsf512_mask ((__v16sf) __A, 8877309124Sdim (__v16sf) _mm512_setzero_ps(), 8878309124Sdim (__mmask16) __U); 8879309124Sdim} 8880309124Sdim 8881341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 8882309124Sdim_mm512_mask_expand_epi32 (__m512i __W, __mmask16 __U, __m512i __A) 8883309124Sdim{ 8884309124Sdim return (__m512i) __builtin_ia32_expandsi512_mask ((__v16si) __A, 8885309124Sdim (__v16si) __W, 8886309124Sdim (__mmask16) __U); 8887309124Sdim} 8888309124Sdim 8889341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 8890309124Sdim_mm512_maskz_expand_epi32 (__mmask16 __U, __m512i __A) 8891309124Sdim{ 8892309124Sdim return (__m512i) __builtin_ia32_expandsi512_mask ((__v16si) __A, 8893341825Sdim (__v16si) _mm512_setzero_si512(), 8894309124Sdim (__mmask16) __U); 8895309124Sdim} 8896309124Sdim 8897341825Sdim#define _mm512_cvt_roundps_pd(A, R) \ 8898309124Sdim (__m512d)__builtin_ia32_cvtps2pd512_mask((__v8sf)(__m256)(A), \ 8899309124Sdim (__v8df)_mm512_undefined_pd(), \ 8900341825Sdim (__mmask8)-1, (int)(R)) 8901309124Sdim 8902341825Sdim#define _mm512_mask_cvt_roundps_pd(W, U, A, R) \ 8903309124Sdim (__m512d)__builtin_ia32_cvtps2pd512_mask((__v8sf)(__m256)(A), \ 8904309124Sdim (__v8df)(__m512d)(W), \ 8905341825Sdim (__mmask8)(U), (int)(R)) 8906309124Sdim 8907341825Sdim#define _mm512_maskz_cvt_roundps_pd(U, A, R) \ 8908309124Sdim (__m512d)__builtin_ia32_cvtps2pd512_mask((__v8sf)(__m256)(A), \ 8909309124Sdim (__v8df)_mm512_setzero_pd(), \ 8910341825Sdim (__mmask8)(U), (int)(R)) 8911309124Sdim 8912341825Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS512 8913309124Sdim_mm512_cvtps_pd (__m256 __A) 8914309124Sdim{ 8915341825Sdim return (__m512d) __builtin_convertvector((__v8sf)__A, __v8df); 8916309124Sdim} 8917309124Sdim 8918341825Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS512 8919309124Sdim_mm512_mask_cvtps_pd (__m512d __W, __mmask8 __U, __m256 __A) 8920309124Sdim{ 8921341825Sdim return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, 8922341825Sdim (__v8df)_mm512_cvtps_pd(__A), 8923341825Sdim (__v8df)__W); 8924309124Sdim} 8925309124Sdim 8926341825Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS512 8927309124Sdim_mm512_maskz_cvtps_pd (__mmask8 __U, __m256 __A) 8928309124Sdim{ 8929341825Sdim return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, 8930341825Sdim (__v8df)_mm512_cvtps_pd(__A), 8931341825Sdim (__v8df)_mm512_setzero_pd()); 8932309124Sdim} 8933309124Sdim 8934341825Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS512 8935314564Sdim_mm512_cvtpslo_pd (__m512 __A) 8936314564Sdim{ 8937341825Sdim return (__m512d) _mm512_cvtps_pd(_mm512_castps512_ps256(__A)); 8938314564Sdim} 8939314564Sdim 8940341825Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS512 8941314564Sdim_mm512_mask_cvtpslo_pd (__m512d __W, __mmask8 __U, __m512 __A) 8942314564Sdim{ 8943341825Sdim return (__m512d) _mm512_mask_cvtps_pd(__W, __U, _mm512_castps512_ps256(__A)); 8944314564Sdim} 8945314564Sdim 8946341825Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS512 8947309124Sdim_mm512_mask_mov_pd (__m512d __W, __mmask8 __U, __m512d __A) 8948309124Sdim{ 8949309124Sdim return (__m512d) __builtin_ia32_selectpd_512 ((__mmask8) __U, 8950309124Sdim (__v8df) __A, 8951309124Sdim (__v8df) __W); 8952309124Sdim} 8953309124Sdim 8954341825Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS512 8955309124Sdim_mm512_maskz_mov_pd (__mmask8 __U, __m512d __A) 8956309124Sdim{ 8957309124Sdim return (__m512d) __builtin_ia32_selectpd_512 ((__mmask8) __U, 8958309124Sdim (__v8df) __A, 8959309124Sdim (__v8df) _mm512_setzero_pd ()); 8960309124Sdim} 8961309124Sdim 8962341825Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS512 8963309124Sdim_mm512_mask_mov_ps (__m512 __W, __mmask16 __U, __m512 __A) 8964309124Sdim{ 8965309124Sdim return (__m512) __builtin_ia32_selectps_512 ((__mmask16) __U, 8966309124Sdim (__v16sf) __A, 8967309124Sdim (__v16sf) __W); 8968309124Sdim} 8969309124Sdim 8970341825Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS512 8971309124Sdim_mm512_maskz_mov_ps (__mmask16 __U, __m512 __A) 8972309124Sdim{ 8973309124Sdim return (__m512) __builtin_ia32_selectps_512 ((__mmask16) __U, 8974309124Sdim (__v16sf) __A, 8975309124Sdim (__v16sf) _mm512_setzero_ps ()); 8976309124Sdim} 8977309124Sdim 8978341825Sdimstatic __inline__ void __DEFAULT_FN_ATTRS512 8979309124Sdim_mm512_mask_compressstoreu_pd (void *__P, __mmask8 __U, __m512d __A) 8980309124Sdim{ 8981309124Sdim __builtin_ia32_compressstoredf512_mask ((__v8df *) __P, (__v8df) __A, 8982309124Sdim (__mmask8) __U); 8983309124Sdim} 8984309124Sdim 8985341825Sdimstatic __inline__ void __DEFAULT_FN_ATTRS512 8986309124Sdim_mm512_mask_compressstoreu_epi64 (void *__P, __mmask8 __U, __m512i __A) 8987309124Sdim{ 8988309124Sdim __builtin_ia32_compressstoredi512_mask ((__v8di *) __P, (__v8di) __A, 8989309124Sdim (__mmask8) __U); 8990309124Sdim} 8991309124Sdim 8992341825Sdimstatic __inline__ void __DEFAULT_FN_ATTRS512 8993309124Sdim_mm512_mask_compressstoreu_ps (void *__P, __mmask16 __U, __m512 __A) 8994309124Sdim{ 8995309124Sdim __builtin_ia32_compressstoresf512_mask ((__v16sf *) __P, (__v16sf) __A, 8996309124Sdim (__mmask16) __U); 8997309124Sdim} 8998309124Sdim 8999341825Sdimstatic __inline__ void __DEFAULT_FN_ATTRS512 9000309124Sdim_mm512_mask_compressstoreu_epi32 (void *__P, __mmask16 __U, __m512i __A) 9001309124Sdim{ 9002309124Sdim __builtin_ia32_compressstoresi512_mask ((__v16si *) __P, (__v16si) __A, 9003309124Sdim (__mmask16) __U); 9004309124Sdim} 9005309124Sdim 9006341825Sdim#define _mm_cvt_roundsd_ss(A, B, R) \ 9007309124Sdim (__m128)__builtin_ia32_cvtsd2ss_round_mask((__v4sf)(__m128)(A), \ 9008309124Sdim (__v2df)(__m128d)(B), \ 9009309124Sdim (__v4sf)_mm_undefined_ps(), \ 9010341825Sdim (__mmask8)-1, (int)(R)) 9011309124Sdim 9012341825Sdim#define _mm_mask_cvt_roundsd_ss(W, U, A, B, R) \ 9013309124Sdim (__m128)__builtin_ia32_cvtsd2ss_round_mask((__v4sf)(__m128)(A), \ 9014309124Sdim (__v2df)(__m128d)(B), \ 9015309124Sdim (__v4sf)(__m128)(W), \ 9016341825Sdim (__mmask8)(U), (int)(R)) 9017309124Sdim 9018341825Sdim#define _mm_maskz_cvt_roundsd_ss(U, A, B, R) \ 9019309124Sdim (__m128)__builtin_ia32_cvtsd2ss_round_mask((__v4sf)(__m128)(A), \ 9020309124Sdim (__v2df)(__m128d)(B), \ 9021309124Sdim (__v4sf)_mm_setzero_ps(), \ 9022341825Sdim (__mmask8)(U), (int)(R)) 9023309124Sdim 9024341825Sdimstatic __inline__ __m128 __DEFAULT_FN_ATTRS128 9025309124Sdim_mm_mask_cvtsd_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128d __B) 9026309124Sdim{ 9027341825Sdim return __builtin_ia32_cvtsd2ss_round_mask ((__v4sf)__A, 9028341825Sdim (__v2df)__B, 9029341825Sdim (__v4sf)__W, 9030341825Sdim (__mmask8)__U, _MM_FROUND_CUR_DIRECTION); 9031309124Sdim} 9032309124Sdim 9033341825Sdimstatic __inline__ __m128 __DEFAULT_FN_ATTRS128 9034309124Sdim_mm_maskz_cvtsd_ss (__mmask8 __U, __m128 __A, __m128d __B) 9035309124Sdim{ 9036341825Sdim return __builtin_ia32_cvtsd2ss_round_mask ((__v4sf)__A, 9037341825Sdim (__v2df)__B, 9038322320Sdim (__v4sf)_mm_setzero_ps(), 9039341825Sdim (__mmask8)__U, _MM_FROUND_CUR_DIRECTION); 9040309124Sdim} 9041309124Sdim 9042309124Sdim#define _mm_cvtss_i32 _mm_cvtss_si32 9043314564Sdim#define _mm_cvtsd_i32 _mm_cvtsd_si32 9044314564Sdim#define _mm_cvti32_sd _mm_cvtsi32_sd 9045314564Sdim#define _mm_cvti32_ss _mm_cvtsi32_ss 9046314564Sdim#ifdef __x86_64__ 9047309124Sdim#define _mm_cvtss_i64 _mm_cvtss_si64 9048309124Sdim#define _mm_cvtsd_i64 _mm_cvtsd_si64 9049309124Sdim#define _mm_cvti64_sd _mm_cvtsi64_sd 9050309124Sdim#define _mm_cvti64_ss _mm_cvtsi64_ss 9051314564Sdim#endif 9052309124Sdim 9053314564Sdim#ifdef __x86_64__ 9054341825Sdim#define _mm_cvt_roundi64_sd(A, B, R) \ 9055309124Sdim (__m128d)__builtin_ia32_cvtsi2sd64((__v2df)(__m128d)(A), (long long)(B), \ 9056341825Sdim (int)(R)) 9057309124Sdim 9058341825Sdim#define _mm_cvt_roundsi64_sd(A, B, R) \ 9059309124Sdim (__m128d)__builtin_ia32_cvtsi2sd64((__v2df)(__m128d)(A), (long long)(B), \ 9060341825Sdim (int)(R)) 9061314564Sdim#endif 9062309124Sdim 9063341825Sdim#define _mm_cvt_roundsi32_ss(A, B, R) \ 9064341825Sdim (__m128)__builtin_ia32_cvtsi2ss32((__v4sf)(__m128)(A), (int)(B), (int)(R)) 9065309124Sdim 9066341825Sdim#define _mm_cvt_roundi32_ss(A, B, R) \ 9067341825Sdim (__m128)__builtin_ia32_cvtsi2ss32((__v4sf)(__m128)(A), (int)(B), (int)(R)) 9068309124Sdim 9069314564Sdim#ifdef __x86_64__ 9070341825Sdim#define _mm_cvt_roundsi64_ss(A, B, R) \ 9071309124Sdim (__m128)__builtin_ia32_cvtsi2ss64((__v4sf)(__m128)(A), (long long)(B), \ 9072341825Sdim (int)(R)) 9073309124Sdim 9074341825Sdim#define _mm_cvt_roundi64_ss(A, B, R) \ 9075309124Sdim (__m128)__builtin_ia32_cvtsi2ss64((__v4sf)(__m128)(A), (long long)(B), \ 9076341825Sdim (int)(R)) 9077314564Sdim#endif 9078309124Sdim 9079341825Sdim#define _mm_cvt_roundss_sd(A, B, R) \ 9080309124Sdim (__m128d)__builtin_ia32_cvtss2sd_round_mask((__v2df)(__m128d)(A), \ 9081309124Sdim (__v4sf)(__m128)(B), \ 9082309124Sdim (__v2df)_mm_undefined_pd(), \ 9083341825Sdim (__mmask8)-1, (int)(R)) 9084309124Sdim 9085341825Sdim#define _mm_mask_cvt_roundss_sd(W, U, A, B, R) \ 9086309124Sdim (__m128d)__builtin_ia32_cvtss2sd_round_mask((__v2df)(__m128d)(A), \ 9087309124Sdim (__v4sf)(__m128)(B), \ 9088309124Sdim (__v2df)(__m128d)(W), \ 9089341825Sdim (__mmask8)(U), (int)(R)) 9090309124Sdim 9091341825Sdim#define _mm_maskz_cvt_roundss_sd(U, A, B, R) \ 9092309124Sdim (__m128d)__builtin_ia32_cvtss2sd_round_mask((__v2df)(__m128d)(A), \ 9093309124Sdim (__v4sf)(__m128)(B), \ 9094309124Sdim (__v2df)_mm_setzero_pd(), \ 9095341825Sdim (__mmask8)(U), (int)(R)) 9096309124Sdim 9097341825Sdimstatic __inline__ __m128d __DEFAULT_FN_ATTRS128 9098309124Sdim_mm_mask_cvtss_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128 __B) 9099309124Sdim{ 9100341825Sdim return __builtin_ia32_cvtss2sd_round_mask((__v2df)__A, 9101341825Sdim (__v4sf)__B, 9102341825Sdim (__v2df)__W, 9103341825Sdim (__mmask8)__U, _MM_FROUND_CUR_DIRECTION); 9104309124Sdim} 9105309124Sdim 9106341825Sdimstatic __inline__ __m128d __DEFAULT_FN_ATTRS128 9107309124Sdim_mm_maskz_cvtss_sd (__mmask8 __U, __m128d __A, __m128 __B) 9108309124Sdim{ 9109341825Sdim return __builtin_ia32_cvtss2sd_round_mask((__v2df)__A, 9110341825Sdim (__v4sf)__B, 9111341825Sdim (__v2df)_mm_setzero_pd(), 9112341825Sdim (__mmask8)__U, _MM_FROUND_CUR_DIRECTION); 9113309124Sdim} 9114309124Sdim 9115341825Sdimstatic __inline__ __m128d __DEFAULT_FN_ATTRS128 9116309124Sdim_mm_cvtu32_sd (__m128d __A, unsigned __B) 9117309124Sdim{ 9118341825Sdim __A[0] = __B; 9119341825Sdim return __A; 9120309124Sdim} 9121309124Sdim 9122314564Sdim#ifdef __x86_64__ 9123341825Sdim#define _mm_cvt_roundu64_sd(A, B, R) \ 9124309124Sdim (__m128d)__builtin_ia32_cvtusi2sd64((__v2df)(__m128d)(A), \ 9125341825Sdim (unsigned long long)(B), (int)(R)) 9126309124Sdim 9127341825Sdimstatic __inline__ __m128d __DEFAULT_FN_ATTRS128 9128309124Sdim_mm_cvtu64_sd (__m128d __A, unsigned long long __B) 9129309124Sdim{ 9130341825Sdim __A[0] = __B; 9131341825Sdim return __A; 9132309124Sdim} 9133314564Sdim#endif 9134309124Sdim 9135341825Sdim#define _mm_cvt_roundu32_ss(A, B, R) \ 9136309124Sdim (__m128)__builtin_ia32_cvtusi2ss32((__v4sf)(__m128)(A), (unsigned int)(B), \ 9137341825Sdim (int)(R)) 9138309124Sdim 9139341825Sdimstatic __inline__ __m128 __DEFAULT_FN_ATTRS128 9140309124Sdim_mm_cvtu32_ss (__m128 __A, unsigned __B) 9141309124Sdim{ 9142341825Sdim __A[0] = __B; 9143341825Sdim return __A; 9144309124Sdim} 9145309124Sdim 9146314564Sdim#ifdef __x86_64__ 9147341825Sdim#define _mm_cvt_roundu64_ss(A, B, R) \ 9148309124Sdim (__m128)__builtin_ia32_cvtusi2ss64((__v4sf)(__m128)(A), \ 9149341825Sdim (unsigned long long)(B), (int)(R)) 9150309124Sdim 9151341825Sdimstatic __inline__ __m128 __DEFAULT_FN_ATTRS128 9152309124Sdim_mm_cvtu64_ss (__m128 __A, unsigned long long __B) 9153309124Sdim{ 9154341825Sdim __A[0] = __B; 9155341825Sdim return __A; 9156309124Sdim} 9157314564Sdim#endif 9158309124Sdim 9159341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 9160309124Sdim_mm512_mask_set1_epi32 (__m512i __O, __mmask16 __M, int __A) 9161309124Sdim{ 9162327952Sdim return (__m512i) __builtin_ia32_selectd_512(__M, 9163327952Sdim (__v16si) _mm512_set1_epi32(__A), 9164327952Sdim (__v16si) __O); 9165309124Sdim} 9166309124Sdim 9167341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 9168309124Sdim_mm512_mask_set1_epi64 (__m512i __O, __mmask8 __M, long long __A) 9169309124Sdim{ 9170327952Sdim return (__m512i) __builtin_ia32_selectq_512(__M, 9171327952Sdim (__v8di) _mm512_set1_epi64(__A), 9172327952Sdim (__v8di) __O); 9173309124Sdim} 9174309124Sdim 9175341825Sdimstatic __inline __m512i __DEFAULT_FN_ATTRS512 9176321369Sdim_mm512_set_epi8 (char __e63, char __e62, char __e61, char __e60, char __e59, 9177321369Sdim char __e58, char __e57, char __e56, char __e55, char __e54, char __e53, 9178321369Sdim char __e52, char __e51, char __e50, char __e49, char __e48, char __e47, 9179321369Sdim char __e46, char __e45, char __e44, char __e43, char __e42, char __e41, 9180321369Sdim char __e40, char __e39, char __e38, char __e37, char __e36, char __e35, 9181321369Sdim char __e34, char __e33, char __e32, char __e31, char __e30, char __e29, 9182321369Sdim char __e28, char __e27, char __e26, char __e25, char __e24, char __e23, 9183321369Sdim char __e22, char __e21, char __e20, char __e19, char __e18, char __e17, 9184321369Sdim char __e16, char __e15, char __e14, char __e13, char __e12, char __e11, 9185321369Sdim char __e10, char __e9, char __e8, char __e7, char __e6, char __e5, 9186321369Sdim char __e4, char __e3, char __e2, char __e1, char __e0) { 9187321369Sdim 9188321369Sdim return __extension__ (__m512i)(__v64qi) 9189321369Sdim {__e0, __e1, __e2, __e3, __e4, __e5, __e6, __e7, 9190321369Sdim __e8, __e9, __e10, __e11, __e12, __e13, __e14, __e15, 9191321369Sdim __e16, __e17, __e18, __e19, __e20, __e21, __e22, __e23, 9192321369Sdim __e24, __e25, __e26, __e27, __e28, __e29, __e30, __e31, 9193321369Sdim __e32, __e33, __e34, __e35, __e36, __e37, __e38, __e39, 9194321369Sdim __e40, __e41, __e42, __e43, __e44, __e45, __e46, __e47, 9195321369Sdim __e48, __e49, __e50, __e51, __e52, __e53, __e54, __e55, 9196321369Sdim __e56, __e57, __e58, __e59, __e60, __e61, __e62, __e63}; 9197321369Sdim} 9198321369Sdim 9199341825Sdimstatic __inline __m512i __DEFAULT_FN_ATTRS512 9200321369Sdim_mm512_set_epi16(short __e31, short __e30, short __e29, short __e28, 9201321369Sdim short __e27, short __e26, short __e25, short __e24, short __e23, 9202321369Sdim short __e22, short __e21, short __e20, short __e19, short __e18, 9203321369Sdim short __e17, short __e16, short __e15, short __e14, short __e13, 9204321369Sdim short __e12, short __e11, short __e10, short __e9, short __e8, 9205321369Sdim short __e7, short __e6, short __e5, short __e4, short __e3, 9206321369Sdim short __e2, short __e1, short __e0) { 9207321369Sdim return __extension__ (__m512i)(__v32hi) 9208321369Sdim {__e0, __e1, __e2, __e3, __e4, __e5, __e6, __e7, 9209321369Sdim __e8, __e9, __e10, __e11, __e12, __e13, __e14, __e15, 9210321369Sdim __e16, __e17, __e18, __e19, __e20, __e21, __e22, __e23, 9211321369Sdim __e24, __e25, __e26, __e27, __e28, __e29, __e30, __e31 }; 9212321369Sdim} 9213321369Sdim 9214341825Sdimstatic __inline __m512i __DEFAULT_FN_ATTRS512 9215309124Sdim_mm512_set_epi32 (int __A, int __B, int __C, int __D, 9216309124Sdim int __E, int __F, int __G, int __H, 9217309124Sdim int __I, int __J, int __K, int __L, 9218309124Sdim int __M, int __N, int __O, int __P) 9219309124Sdim{ 9220309124Sdim return __extension__ (__m512i)(__v16si) 9221309124Sdim { __P, __O, __N, __M, __L, __K, __J, __I, 9222309124Sdim __H, __G, __F, __E, __D, __C, __B, __A }; 9223309124Sdim} 9224309124Sdim 9225309124Sdim#define _mm512_setr_epi32(e0,e1,e2,e3,e4,e5,e6,e7, \ 9226309124Sdim e8,e9,e10,e11,e12,e13,e14,e15) \ 9227309124Sdim _mm512_set_epi32((e15),(e14),(e13),(e12),(e11),(e10),(e9),(e8),(e7),(e6), \ 9228309124Sdim (e5),(e4),(e3),(e2),(e1),(e0)) 9229309124Sdim 9230341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 9231309124Sdim_mm512_set_epi64 (long long __A, long long __B, long long __C, 9232309124Sdim long long __D, long long __E, long long __F, 9233309124Sdim long long __G, long long __H) 9234309124Sdim{ 9235309124Sdim return __extension__ (__m512i) (__v8di) 9236309124Sdim { __H, __G, __F, __E, __D, __C, __B, __A }; 9237309124Sdim} 9238309124Sdim 9239309124Sdim#define _mm512_setr_epi64(e0,e1,e2,e3,e4,e5,e6,e7) \ 9240309124Sdim _mm512_set_epi64((e7),(e6),(e5),(e4),(e3),(e2),(e1),(e0)) 9241309124Sdim 9242341825Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS512 9243309124Sdim_mm512_set_pd (double __A, double __B, double __C, double __D, 9244309124Sdim double __E, double __F, double __G, double __H) 9245309124Sdim{ 9246309124Sdim return __extension__ (__m512d) 9247309124Sdim { __H, __G, __F, __E, __D, __C, __B, __A }; 9248309124Sdim} 9249309124Sdim 9250309124Sdim#define _mm512_setr_pd(e0,e1,e2,e3,e4,e5,e6,e7) \ 9251309124Sdim _mm512_set_pd((e7),(e6),(e5),(e4),(e3),(e2),(e1),(e0)) 9252309124Sdim 9253341825Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS512 9254309124Sdim_mm512_set_ps (float __A, float __B, float __C, float __D, 9255309124Sdim float __E, float __F, float __G, float __H, 9256309124Sdim float __I, float __J, float __K, float __L, 9257309124Sdim float __M, float __N, float __O, float __P) 9258309124Sdim{ 9259309124Sdim return __extension__ (__m512) 9260309124Sdim { __P, __O, __N, __M, __L, __K, __J, __I, 9261309124Sdim __H, __G, __F, __E, __D, __C, __B, __A }; 9262309124Sdim} 9263309124Sdim 9264309124Sdim#define _mm512_setr_ps(e0,e1,e2,e3,e4,e5,e6,e7,e8,e9,e10,e11,e12,e13,e14,e15) \ 9265309124Sdim _mm512_set_ps((e15),(e14),(e13),(e12),(e11),(e10),(e9),(e8),(e7),(e6),(e5), \ 9266309124Sdim (e4),(e3),(e2),(e1),(e0)) 9267309124Sdim 9268341825Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS512 9269314564Sdim_mm512_abs_ps(__m512 __A) 9270309124Sdim{ 9271314564Sdim return (__m512)_mm512_and_epi32(_mm512_set1_epi32(0x7FFFFFFF),(__m512i)__A) ; 9272309124Sdim} 9273309124Sdim 9274341825Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS512 9275314564Sdim_mm512_mask_abs_ps(__m512 __W, __mmask16 __K, __m512 __A) 9276309124Sdim{ 9277314564Sdim return (__m512)_mm512_mask_and_epi32((__m512i)__W, __K, _mm512_set1_epi32(0x7FFFFFFF),(__m512i)__A) ; 9278309124Sdim} 9279309124Sdim 9280341825Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS512 9281314564Sdim_mm512_abs_pd(__m512d __A) 9282309124Sdim{ 9283314564Sdim return (__m512d)_mm512_and_epi64(_mm512_set1_epi64(0x7FFFFFFFFFFFFFFF),(__v8di)__A) ; 9284309124Sdim} 9285309124Sdim 9286341825Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS512 9287314564Sdim_mm512_mask_abs_pd(__m512d __W, __mmask8 __K, __m512d __A) 9288309124Sdim{ 9289314564Sdim return (__m512d)_mm512_mask_and_epi64((__v8di)__W, __K, _mm512_set1_epi64(0x7FFFFFFFFFFFFFFF),(__v8di)__A); 9290309124Sdim} 9291309124Sdim 9292341825Sdim/* Vector-reduction arithmetic accepts vectors as inputs and produces scalars as 9293341825Sdim * outputs. This class of vector operation forms the basis of many scientific 9294341825Sdim * computations. In vector-reduction arithmetic, the evaluation off is 9295341825Sdim * independent of the order of the input elements of V. 9296314564Sdim 9297341825Sdim * Used bisection method. At each step, we partition the vector with previous 9298341825Sdim * step in half, and the operation is performed on its two halves. 9299341825Sdim * This takes log2(n) steps where n is the number of elements in the vector. 9300341825Sdim */ 9301314564Sdim 9302341825Sdim#define _mm512_mask_reduce_operator(op) \ 9303341825Sdim __v4du __t1 = (__v4du)_mm512_extracti64x4_epi64(__W, 0); \ 9304341825Sdim __v4du __t2 = (__v4du)_mm512_extracti64x4_epi64(__W, 1); \ 9305341825Sdim __m256i __t3 = (__m256i)(__t1 op __t2); \ 9306341825Sdim __v2du __t4 = (__v2du)_mm256_extracti128_si256(__t3, 0); \ 9307341825Sdim __v2du __t5 = (__v2du)_mm256_extracti128_si256(__t3, 1); \ 9308341825Sdim __v2du __t6 = __t4 op __t5; \ 9309341825Sdim __v2du __t7 = __builtin_shufflevector(__t6, __t6, 1, 0); \ 9310341825Sdim __v2du __t8 = __t6 op __t7; \ 9311353358Sdim return __t8[0] 9312314564Sdim 9313341825Sdimstatic __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_reduce_add_epi64(__m512i __W) { 9314341825Sdim _mm512_mask_reduce_operator(+); 9315314564Sdim} 9316314564Sdim 9317341825Sdimstatic __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_reduce_mul_epi64(__m512i __W) { 9318341825Sdim _mm512_mask_reduce_operator(*); 9319314564Sdim} 9320314564Sdim 9321341825Sdimstatic __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_reduce_and_epi64(__m512i __W) { 9322341825Sdim _mm512_mask_reduce_operator(&); 9323314564Sdim} 9324314564Sdim 9325341825Sdimstatic __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_reduce_or_epi64(__m512i __W) { 9326341825Sdim _mm512_mask_reduce_operator(|); 9327314564Sdim} 9328314564Sdim 9329341825Sdimstatic __inline__ long long __DEFAULT_FN_ATTRS512 9330314564Sdim_mm512_mask_reduce_add_epi64(__mmask8 __M, __m512i __W) { 9331341825Sdim __W = _mm512_maskz_mov_epi64(__M, __W); 9332341825Sdim _mm512_mask_reduce_operator(+); 9333314564Sdim} 9334314564Sdim 9335341825Sdimstatic __inline__ long long __DEFAULT_FN_ATTRS512 9336314564Sdim_mm512_mask_reduce_mul_epi64(__mmask8 __M, __m512i __W) { 9337341825Sdim __W = _mm512_mask_mov_epi64(_mm512_set1_epi64(1), __M, __W); 9338341825Sdim _mm512_mask_reduce_operator(*); 9339314564Sdim} 9340314564Sdim 9341341825Sdimstatic __inline__ long long __DEFAULT_FN_ATTRS512 9342314564Sdim_mm512_mask_reduce_and_epi64(__mmask8 __M, __m512i __W) { 9343341825Sdim __W = _mm512_mask_mov_epi64(_mm512_set1_epi64(~0ULL), __M, __W); 9344341825Sdim _mm512_mask_reduce_operator(&); 9345314564Sdim} 9346314564Sdim 9347341825Sdimstatic __inline__ long long __DEFAULT_FN_ATTRS512 9348314564Sdim_mm512_mask_reduce_or_epi64(__mmask8 __M, __m512i __W) { 9349341825Sdim __W = _mm512_maskz_mov_epi64(__M, __W); 9350341825Sdim _mm512_mask_reduce_operator(|); 9351314564Sdim} 9352341825Sdim#undef _mm512_mask_reduce_operator 9353314564Sdim 9354341825Sdim#define _mm512_mask_reduce_operator(op) \ 9355341825Sdim __m256d __t1 = _mm512_extractf64x4_pd(__W, 0); \ 9356341825Sdim __m256d __t2 = _mm512_extractf64x4_pd(__W, 1); \ 9357341825Sdim __m256d __t3 = __t1 op __t2; \ 9358341825Sdim __m128d __t4 = _mm256_extractf128_pd(__t3, 0); \ 9359341825Sdim __m128d __t5 = _mm256_extractf128_pd(__t3, 1); \ 9360341825Sdim __m128d __t6 = __t4 op __t5; \ 9361341825Sdim __m128d __t7 = __builtin_shufflevector(__t6, __t6, 1, 0); \ 9362341825Sdim __m128d __t8 = __t6 op __t7; \ 9363353358Sdim return __t8[0] 9364341825Sdim 9365341825Sdimstatic __inline__ double __DEFAULT_FN_ATTRS512 _mm512_reduce_add_pd(__m512d __W) { 9366341825Sdim _mm512_mask_reduce_operator(+); 9367341825Sdim} 9368341825Sdim 9369341825Sdimstatic __inline__ double __DEFAULT_FN_ATTRS512 _mm512_reduce_mul_pd(__m512d __W) { 9370341825Sdim _mm512_mask_reduce_operator(*); 9371341825Sdim} 9372341825Sdim 9373341825Sdimstatic __inline__ double __DEFAULT_FN_ATTRS512 9374314564Sdim_mm512_mask_reduce_add_pd(__mmask8 __M, __m512d __W) { 9375341825Sdim __W = _mm512_maskz_mov_pd(__M, __W); 9376341825Sdim _mm512_mask_reduce_operator(+); 9377314564Sdim} 9378314564Sdim 9379341825Sdimstatic __inline__ double __DEFAULT_FN_ATTRS512 9380314564Sdim_mm512_mask_reduce_mul_pd(__mmask8 __M, __m512d __W) { 9381341825Sdim __W = _mm512_mask_mov_pd(_mm512_set1_pd(1.0), __M, __W); 9382341825Sdim _mm512_mask_reduce_operator(*); 9383314564Sdim} 9384341825Sdim#undef _mm512_mask_reduce_operator 9385314564Sdim 9386341825Sdim#define _mm512_mask_reduce_operator(op) \ 9387341825Sdim __v8su __t1 = (__v8su)_mm512_extracti64x4_epi64(__W, 0); \ 9388341825Sdim __v8su __t2 = (__v8su)_mm512_extracti64x4_epi64(__W, 1); \ 9389341825Sdim __m256i __t3 = (__m256i)(__t1 op __t2); \ 9390341825Sdim __v4su __t4 = (__v4su)_mm256_extracti128_si256(__t3, 0); \ 9391341825Sdim __v4su __t5 = (__v4su)_mm256_extracti128_si256(__t3, 1); \ 9392341825Sdim __v4su __t6 = __t4 op __t5; \ 9393341825Sdim __v4su __t7 = __builtin_shufflevector(__t6, __t6, 2, 3, 0, 1); \ 9394341825Sdim __v4su __t8 = __t6 op __t7; \ 9395341825Sdim __v4su __t9 = __builtin_shufflevector(__t8, __t8, 1, 0, 3, 2); \ 9396341825Sdim __v4su __t10 = __t8 op __t9; \ 9397353358Sdim return __t10[0] 9398314564Sdim 9399341825Sdimstatic __inline__ int __DEFAULT_FN_ATTRS512 9400314564Sdim_mm512_reduce_add_epi32(__m512i __W) { 9401341825Sdim _mm512_mask_reduce_operator(+); 9402314564Sdim} 9403314564Sdim 9404341825Sdimstatic __inline__ int __DEFAULT_FN_ATTRS512 9405314564Sdim_mm512_reduce_mul_epi32(__m512i __W) { 9406341825Sdim _mm512_mask_reduce_operator(*); 9407314564Sdim} 9408314564Sdim 9409341825Sdimstatic __inline__ int __DEFAULT_FN_ATTRS512 9410314564Sdim_mm512_reduce_and_epi32(__m512i __W) { 9411341825Sdim _mm512_mask_reduce_operator(&); 9412314564Sdim} 9413314564Sdim 9414341825Sdimstatic __inline__ int __DEFAULT_FN_ATTRS512 9415314564Sdim_mm512_reduce_or_epi32(__m512i __W) { 9416341825Sdim _mm512_mask_reduce_operator(|); 9417314564Sdim} 9418314564Sdim 9419341825Sdimstatic __inline__ int __DEFAULT_FN_ATTRS512 9420314564Sdim_mm512_mask_reduce_add_epi32( __mmask16 __M, __m512i __W) { 9421341825Sdim __W = _mm512_maskz_mov_epi32(__M, __W); 9422341825Sdim _mm512_mask_reduce_operator(+); 9423314564Sdim} 9424314564Sdim 9425341825Sdimstatic __inline__ int __DEFAULT_FN_ATTRS512 9426314564Sdim_mm512_mask_reduce_mul_epi32( __mmask16 __M, __m512i __W) { 9427341825Sdim __W = _mm512_mask_mov_epi32(_mm512_set1_epi32(1), __M, __W); 9428341825Sdim _mm512_mask_reduce_operator(*); 9429314564Sdim} 9430314564Sdim 9431341825Sdimstatic __inline__ int __DEFAULT_FN_ATTRS512 9432314564Sdim_mm512_mask_reduce_and_epi32( __mmask16 __M, __m512i __W) { 9433341825Sdim __W = _mm512_mask_mov_epi32(_mm512_set1_epi32(~0U), __M, __W); 9434341825Sdim _mm512_mask_reduce_operator(&); 9435314564Sdim} 9436314564Sdim 9437341825Sdimstatic __inline__ int __DEFAULT_FN_ATTRS512 9438314564Sdim_mm512_mask_reduce_or_epi32(__mmask16 __M, __m512i __W) { 9439341825Sdim __W = _mm512_maskz_mov_epi32(__M, __W); 9440341825Sdim _mm512_mask_reduce_operator(|); 9441314564Sdim} 9442341825Sdim#undef _mm512_mask_reduce_operator 9443314564Sdim 9444341825Sdim#define _mm512_mask_reduce_operator(op) \ 9445341825Sdim __m256 __t1 = (__m256)_mm512_extractf64x4_pd((__m512d)__W, 0); \ 9446341825Sdim __m256 __t2 = (__m256)_mm512_extractf64x4_pd((__m512d)__W, 1); \ 9447341825Sdim __m256 __t3 = __t1 op __t2; \ 9448341825Sdim __m128 __t4 = _mm256_extractf128_ps(__t3, 0); \ 9449341825Sdim __m128 __t5 = _mm256_extractf128_ps(__t3, 1); \ 9450341825Sdim __m128 __t6 = __t4 op __t5; \ 9451341825Sdim __m128 __t7 = __builtin_shufflevector(__t6, __t6, 2, 3, 0, 1); \ 9452341825Sdim __m128 __t8 = __t6 op __t7; \ 9453341825Sdim __m128 __t9 = __builtin_shufflevector(__t8, __t8, 1, 0, 3, 2); \ 9454341825Sdim __m128 __t10 = __t8 op __t9; \ 9455353358Sdim return __t10[0] 9456341825Sdim 9457341825Sdimstatic __inline__ float __DEFAULT_FN_ATTRS512 9458341825Sdim_mm512_reduce_add_ps(__m512 __W) { 9459341825Sdim _mm512_mask_reduce_operator(+); 9460341825Sdim} 9461341825Sdim 9462341825Sdimstatic __inline__ float __DEFAULT_FN_ATTRS512 9463341825Sdim_mm512_reduce_mul_ps(__m512 __W) { 9464341825Sdim _mm512_mask_reduce_operator(*); 9465341825Sdim} 9466341825Sdim 9467341825Sdimstatic __inline__ float __DEFAULT_FN_ATTRS512 9468314564Sdim_mm512_mask_reduce_add_ps(__mmask16 __M, __m512 __W) { 9469341825Sdim __W = _mm512_maskz_mov_ps(__M, __W); 9470341825Sdim _mm512_mask_reduce_operator(+); 9471314564Sdim} 9472314564Sdim 9473341825Sdimstatic __inline__ float __DEFAULT_FN_ATTRS512 9474314564Sdim_mm512_mask_reduce_mul_ps(__mmask16 __M, __m512 __W) { 9475341825Sdim __W = _mm512_mask_mov_ps(_mm512_set1_ps(1.0f), __M, __W); 9476341825Sdim _mm512_mask_reduce_operator(*); 9477314564Sdim} 9478341825Sdim#undef _mm512_mask_reduce_operator 9479314564Sdim 9480341825Sdim#define _mm512_mask_reduce_operator(op) \ 9481341825Sdim __m512i __t1 = (__m512i)__builtin_shufflevector((__v8di)__V, (__v8di)__V, 4, 5, 6, 7, 0, 1, 2, 3); \ 9482341825Sdim __m512i __t2 = _mm512_##op(__V, __t1); \ 9483341825Sdim __m512i __t3 = (__m512i)__builtin_shufflevector((__v8di)__t2, (__v8di)__t2, 2, 3, 0, 1, 6, 7, 4, 5); \ 9484341825Sdim __m512i __t4 = _mm512_##op(__t2, __t3); \ 9485341825Sdim __m512i __t5 = (__m512i)__builtin_shufflevector((__v8di)__t4, (__v8di)__t4, 1, 0, 3, 2, 5, 4, 7, 6); \ 9486341825Sdim __v8di __t6 = (__v8di)_mm512_##op(__t4, __t5); \ 9487353358Sdim return __t6[0] 9488314564Sdim 9489341825Sdimstatic __inline__ long long __DEFAULT_FN_ATTRS512 9490314564Sdim_mm512_reduce_max_epi64(__m512i __V) { 9491341825Sdim _mm512_mask_reduce_operator(max_epi64); 9492314564Sdim} 9493314564Sdim 9494341825Sdimstatic __inline__ unsigned long long __DEFAULT_FN_ATTRS512 9495314564Sdim_mm512_reduce_max_epu64(__m512i __V) { 9496341825Sdim _mm512_mask_reduce_operator(max_epu64); 9497314564Sdim} 9498314564Sdim 9499341825Sdimstatic __inline__ long long __DEFAULT_FN_ATTRS512 9500341825Sdim_mm512_reduce_min_epi64(__m512i __V) { 9501341825Sdim _mm512_mask_reduce_operator(min_epi64); 9502314564Sdim} 9503314564Sdim 9504341825Sdimstatic __inline__ unsigned long long __DEFAULT_FN_ATTRS512 9505314564Sdim_mm512_reduce_min_epu64(__m512i __V) { 9506341825Sdim _mm512_mask_reduce_operator(min_epu64); 9507314564Sdim} 9508314564Sdim 9509341825Sdimstatic __inline__ long long __DEFAULT_FN_ATTRS512 9510314564Sdim_mm512_mask_reduce_max_epi64(__mmask8 __M, __m512i __V) { 9511341825Sdim __V = _mm512_mask_mov_epi64(_mm512_set1_epi64(-__LONG_LONG_MAX__ - 1LL), __M, __V); 9512341825Sdim _mm512_mask_reduce_operator(max_epi64); 9513314564Sdim} 9514314564Sdim 9515341825Sdimstatic __inline__ unsigned long long __DEFAULT_FN_ATTRS512 9516314564Sdim_mm512_mask_reduce_max_epu64(__mmask8 __M, __m512i __V) { 9517341825Sdim __V = _mm512_maskz_mov_epi64(__M, __V); 9518341825Sdim _mm512_mask_reduce_operator(max_epu64); 9519314564Sdim} 9520314564Sdim 9521341825Sdimstatic __inline__ long long __DEFAULT_FN_ATTRS512 9522314564Sdim_mm512_mask_reduce_min_epi64(__mmask8 __M, __m512i __V) { 9523341825Sdim __V = _mm512_mask_mov_epi64(_mm512_set1_epi64(__LONG_LONG_MAX__), __M, __V); 9524341825Sdim _mm512_mask_reduce_operator(min_epi64); 9525314564Sdim} 9526314564Sdim 9527341825Sdimstatic __inline__ unsigned long long __DEFAULT_FN_ATTRS512 9528314564Sdim_mm512_mask_reduce_min_epu64(__mmask8 __M, __m512i __V) { 9529341825Sdim __V = _mm512_mask_mov_epi64(_mm512_set1_epi64(~0ULL), __M, __V); 9530341825Sdim _mm512_mask_reduce_operator(min_epu64); 9531314564Sdim} 9532341825Sdim#undef _mm512_mask_reduce_operator 9533314564Sdim 9534341825Sdim#define _mm512_mask_reduce_operator(op) \ 9535341825Sdim __m256i __t1 = _mm512_extracti64x4_epi64(__V, 0); \ 9536341825Sdim __m256i __t2 = _mm512_extracti64x4_epi64(__V, 1); \ 9537341825Sdim __m256i __t3 = _mm256_##op(__t1, __t2); \ 9538341825Sdim __m128i __t4 = _mm256_extracti128_si256(__t3, 0); \ 9539341825Sdim __m128i __t5 = _mm256_extracti128_si256(__t3, 1); \ 9540341825Sdim __m128i __t6 = _mm_##op(__t4, __t5); \ 9541341825Sdim __m128i __t7 = (__m128i)__builtin_shufflevector((__v4si)__t6, (__v4si)__t6, 2, 3, 0, 1); \ 9542341825Sdim __m128i __t8 = _mm_##op(__t6, __t7); \ 9543341825Sdim __m128i __t9 = (__m128i)__builtin_shufflevector((__v4si)__t8, (__v4si)__t8, 1, 0, 3, 2); \ 9544341825Sdim __v4si __t10 = (__v4si)_mm_##op(__t8, __t9); \ 9545353358Sdim return __t10[0] 9546341825Sdim 9547341825Sdimstatic __inline__ int __DEFAULT_FN_ATTRS512 9548341825Sdim_mm512_reduce_max_epi32(__m512i __V) { 9549341825Sdim _mm512_mask_reduce_operator(max_epi32); 9550314564Sdim} 9551314564Sdim 9552341825Sdimstatic __inline__ unsigned int __DEFAULT_FN_ATTRS512 9553341825Sdim_mm512_reduce_max_epu32(__m512i __V) { 9554341825Sdim _mm512_mask_reduce_operator(max_epu32); 9555341825Sdim} 9556314564Sdim 9557341825Sdimstatic __inline__ int __DEFAULT_FN_ATTRS512 9558341825Sdim_mm512_reduce_min_epi32(__m512i __V) { 9559341825Sdim _mm512_mask_reduce_operator(min_epi32); 9560314564Sdim} 9561314564Sdim 9562341825Sdimstatic __inline__ unsigned int __DEFAULT_FN_ATTRS512 9563341825Sdim_mm512_reduce_min_epu32(__m512i __V) { 9564341825Sdim _mm512_mask_reduce_operator(min_epu32); 9565314564Sdim} 9566314564Sdim 9567341825Sdimstatic __inline__ int __DEFAULT_FN_ATTRS512 9568341825Sdim_mm512_mask_reduce_max_epi32(__mmask16 __M, __m512i __V) { 9569341825Sdim __V = _mm512_mask_mov_epi32(_mm512_set1_epi32(-__INT_MAX__ - 1), __M, __V); 9570341825Sdim _mm512_mask_reduce_operator(max_epi32); 9571314564Sdim} 9572314564Sdim 9573341825Sdimstatic __inline__ unsigned int __DEFAULT_FN_ATTRS512 9574341825Sdim_mm512_mask_reduce_max_epu32(__mmask16 __M, __m512i __V) { 9575341825Sdim __V = _mm512_maskz_mov_epi32(__M, __V); 9576341825Sdim _mm512_mask_reduce_operator(max_epu32); 9577314564Sdim} 9578314564Sdim 9579341825Sdimstatic __inline__ int __DEFAULT_FN_ATTRS512 9580341825Sdim_mm512_mask_reduce_min_epi32(__mmask16 __M, __m512i __V) { 9581341825Sdim __V = _mm512_mask_mov_epi32(_mm512_set1_epi32(__INT_MAX__), __M, __V); 9582341825Sdim _mm512_mask_reduce_operator(min_epi32); 9583314564Sdim} 9584314564Sdim 9585341825Sdimstatic __inline__ unsigned int __DEFAULT_FN_ATTRS512 9586341825Sdim_mm512_mask_reduce_min_epu32(__mmask16 __M, __m512i __V) { 9587341825Sdim __V = _mm512_mask_mov_epi32(_mm512_set1_epi32(~0U), __M, __V); 9588341825Sdim _mm512_mask_reduce_operator(min_epu32); 9589314564Sdim} 9590341825Sdim#undef _mm512_mask_reduce_operator 9591314564Sdim 9592341825Sdim#define _mm512_mask_reduce_operator(op) \ 9593341825Sdim __m256d __t1 = _mm512_extractf64x4_pd(__V, 0); \ 9594341825Sdim __m256d __t2 = _mm512_extractf64x4_pd(__V, 1); \ 9595341825Sdim __m256d __t3 = _mm256_##op(__t1, __t2); \ 9596341825Sdim __m128d __t4 = _mm256_extractf128_pd(__t3, 0); \ 9597341825Sdim __m128d __t5 = _mm256_extractf128_pd(__t3, 1); \ 9598341825Sdim __m128d __t6 = _mm_##op(__t4, __t5); \ 9599341825Sdim __m128d __t7 = __builtin_shufflevector(__t6, __t6, 1, 0); \ 9600341825Sdim __m128d __t8 = _mm_##op(__t6, __t7); \ 9601353358Sdim return __t8[0] 9602314564Sdim 9603341825Sdimstatic __inline__ double __DEFAULT_FN_ATTRS512 9604341825Sdim_mm512_reduce_max_pd(__m512d __V) { 9605341825Sdim _mm512_mask_reduce_operator(max_pd); 9606341825Sdim} 9607314564Sdim 9608341825Sdimstatic __inline__ double __DEFAULT_FN_ATTRS512 9609341825Sdim_mm512_reduce_min_pd(__m512d __V) { 9610341825Sdim _mm512_mask_reduce_operator(min_pd); 9611314564Sdim} 9612314564Sdim 9613341825Sdimstatic __inline__ double __DEFAULT_FN_ATTRS512 9614341825Sdim_mm512_mask_reduce_max_pd(__mmask8 __M, __m512d __V) { 9615341825Sdim __V = _mm512_mask_mov_pd(_mm512_set1_pd(-__builtin_inf()), __M, __V); 9616341825Sdim _mm512_mask_reduce_operator(max_pd); 9617314564Sdim} 9618314564Sdim 9619341825Sdimstatic __inline__ double __DEFAULT_FN_ATTRS512 9620341825Sdim_mm512_mask_reduce_min_pd(__mmask8 __M, __m512d __V) { 9621341825Sdim __V = _mm512_mask_mov_pd(_mm512_set1_pd(__builtin_inf()), __M, __V); 9622341825Sdim _mm512_mask_reduce_operator(min_pd); 9623314564Sdim} 9624341825Sdim#undef _mm512_mask_reduce_operator 9625314564Sdim 9626341825Sdim#define _mm512_mask_reduce_operator(op) \ 9627341825Sdim __m256 __t1 = (__m256)_mm512_extractf64x4_pd((__m512d)__V, 0); \ 9628341825Sdim __m256 __t2 = (__m256)_mm512_extractf64x4_pd((__m512d)__V, 1); \ 9629341825Sdim __m256 __t3 = _mm256_##op(__t1, __t2); \ 9630341825Sdim __m128 __t4 = _mm256_extractf128_ps(__t3, 0); \ 9631341825Sdim __m128 __t5 = _mm256_extractf128_ps(__t3, 1); \ 9632341825Sdim __m128 __t6 = _mm_##op(__t4, __t5); \ 9633341825Sdim __m128 __t7 = __builtin_shufflevector(__t6, __t6, 2, 3, 0, 1); \ 9634341825Sdim __m128 __t8 = _mm_##op(__t6, __t7); \ 9635341825Sdim __m128 __t9 = __builtin_shufflevector(__t8, __t8, 1, 0, 3, 2); \ 9636341825Sdim __m128 __t10 = _mm_##op(__t8, __t9); \ 9637353358Sdim return __t10[0] 9638341825Sdim 9639341825Sdimstatic __inline__ float __DEFAULT_FN_ATTRS512 9640341825Sdim_mm512_reduce_max_ps(__m512 __V) { 9641341825Sdim _mm512_mask_reduce_operator(max_ps); 9642314564Sdim} 9643314564Sdim 9644341825Sdimstatic __inline__ float __DEFAULT_FN_ATTRS512 9645341825Sdim_mm512_reduce_min_ps(__m512 __V) { 9646341825Sdim _mm512_mask_reduce_operator(min_ps); 9647314564Sdim} 9648314564Sdim 9649341825Sdimstatic __inline__ float __DEFAULT_FN_ATTRS512 9650341825Sdim_mm512_mask_reduce_max_ps(__mmask16 __M, __m512 __V) { 9651341825Sdim __V = _mm512_mask_mov_ps(_mm512_set1_ps(-__builtin_inff()), __M, __V); 9652341825Sdim _mm512_mask_reduce_operator(max_ps); 9653341825Sdim} 9654341825Sdim 9655341825Sdimstatic __inline__ float __DEFAULT_FN_ATTRS512 9656314564Sdim_mm512_mask_reduce_min_ps(__mmask16 __M, __m512 __V) { 9657341825Sdim __V = _mm512_mask_mov_ps(_mm512_set1_ps(__builtin_inff()), __M, __V); 9658341825Sdim _mm512_mask_reduce_operator(min_ps); 9659314564Sdim} 9660341825Sdim#undef _mm512_mask_reduce_operator 9661314564Sdim 9662360784Sdim/// Moves the least significant 32 bits of a vector of [16 x i32] to a 9663360784Sdim/// 32-bit signed integer value. 9664360784Sdim/// 9665360784Sdim/// \headerfile <x86intrin.h> 9666360784Sdim/// 9667360784Sdim/// This intrinsic corresponds to the <c> VMOVD / MOVD </c> instruction. 9668360784Sdim/// 9669360784Sdim/// \param __A 9670360784Sdim/// A vector of [16 x i32]. The least significant 32 bits are moved to the 9671360784Sdim/// destination. 9672360784Sdim/// \returns A 32-bit signed integer containing the moved value. 9673360784Sdimstatic __inline__ int __DEFAULT_FN_ATTRS512 9674360784Sdim_mm512_cvtsi512_si32(__m512i __A) { 9675360784Sdim __v16si __b = (__v16si)__A; 9676360784Sdim return __b[0]; 9677360784Sdim} 9678360784Sdim 9679341825Sdim#undef __DEFAULT_FN_ATTRS512 9680341825Sdim#undef __DEFAULT_FN_ATTRS128 9681344779Sdim#undef __DEFAULT_FN_ATTRS 9682288943Sdim 9683341825Sdim#endif /* __AVX512FINTRIN_H */ 9684