1296417Sdim/*===---- avx512fintrin.h - AVX512F intrinsics -----------------------------===
2277325Sdim *
3353358Sdim * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4353358Sdim * See https://llvm.org/LICENSE.txt for license information.
5353358Sdim * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6277325Sdim *
7277325Sdim *===-----------------------------------------------------------------------===
8277325Sdim */
9277325Sdim#ifndef __IMMINTRIN_H
10277325Sdim#error "Never use <avx512fintrin.h> directly; include <immintrin.h> instead."
11277325Sdim#endif
12277325Sdim
13277325Sdim#ifndef __AVX512FINTRIN_H
14277325Sdim#define __AVX512FINTRIN_H
15277325Sdim
16309124Sdimtypedef char __v64qi __attribute__((__vector_size__(64)));
17309124Sdimtypedef short __v32hi __attribute__((__vector_size__(64)));
18277325Sdimtypedef double __v8df __attribute__((__vector_size__(64)));
19277325Sdimtypedef float __v16sf __attribute__((__vector_size__(64)));
20277325Sdimtypedef long long __v8di __attribute__((__vector_size__(64)));
21277325Sdimtypedef int __v16si __attribute__((__vector_size__(64)));
22277325Sdim
23309124Sdim/* Unsigned types */
24309124Sdimtypedef unsigned char __v64qu __attribute__((__vector_size__(64)));
25309124Sdimtypedef unsigned short __v32hu __attribute__((__vector_size__(64)));
26309124Sdimtypedef unsigned long long __v8du __attribute__((__vector_size__(64)));
27309124Sdimtypedef unsigned int __v16su __attribute__((__vector_size__(64)));
28309124Sdim
29353358Sdimtypedef float __m512 __attribute__((__vector_size__(64), __aligned__(64)));
30353358Sdimtypedef double __m512d __attribute__((__vector_size__(64), __aligned__(64)));
31353358Sdimtypedef long long __m512i __attribute__((__vector_size__(64), __aligned__(64)));
32277325Sdim
33353358Sdimtypedef float __m512_u __attribute__((__vector_size__(64), __aligned__(1)));
34353358Sdimtypedef double __m512d_u __attribute__((__vector_size__(64), __aligned__(1)));
35353358Sdimtypedef long long __m512i_u __attribute__((__vector_size__(64), __aligned__(1)));
36353358Sdim
37277325Sdimtypedef unsigned char __mmask8;
38277325Sdimtypedef unsigned short __mmask16;
39277325Sdim
40277325Sdim/* Rounding mode macros.  */
41277325Sdim#define _MM_FROUND_TO_NEAREST_INT   0x00
42277325Sdim#define _MM_FROUND_TO_NEG_INF       0x01
43277325Sdim#define _MM_FROUND_TO_POS_INF       0x02
44277325Sdim#define _MM_FROUND_TO_ZERO          0x03
45277325Sdim#define _MM_FROUND_CUR_DIRECTION    0x04
46277325Sdim
47314564Sdim/* Constants for integer comparison predicates */
48314564Sdimtypedef enum {
49314564Sdim    _MM_CMPINT_EQ,      /* Equal */
50314564Sdim    _MM_CMPINT_LT,      /* Less than */
51314564Sdim    _MM_CMPINT_LE,      /* Less than or Equal */
52314564Sdim    _MM_CMPINT_UNUSED,
53314564Sdim    _MM_CMPINT_NE,      /* Not Equal */
54314564Sdim    _MM_CMPINT_NLT,     /* Not Less than */
55314564Sdim#define _MM_CMPINT_GE   _MM_CMPINT_NLT  /* Greater than or Equal */
56314564Sdim    _MM_CMPINT_NLE      /* Not Less than or Equal */
57314564Sdim#define _MM_CMPINT_GT   _MM_CMPINT_NLE  /* Greater than */
58314564Sdim} _MM_CMPINT_ENUM;
59314564Sdim
60309124Sdimtypedef enum
61309124Sdim{
62309124Sdim  _MM_PERM_AAAA = 0x00, _MM_PERM_AAAB = 0x01, _MM_PERM_AAAC = 0x02,
63309124Sdim  _MM_PERM_AAAD = 0x03, _MM_PERM_AABA = 0x04, _MM_PERM_AABB = 0x05,
64309124Sdim  _MM_PERM_AABC = 0x06, _MM_PERM_AABD = 0x07, _MM_PERM_AACA = 0x08,
65309124Sdim  _MM_PERM_AACB = 0x09, _MM_PERM_AACC = 0x0A, _MM_PERM_AACD = 0x0B,
66309124Sdim  _MM_PERM_AADA = 0x0C, _MM_PERM_AADB = 0x0D, _MM_PERM_AADC = 0x0E,
67309124Sdim  _MM_PERM_AADD = 0x0F, _MM_PERM_ABAA = 0x10, _MM_PERM_ABAB = 0x11,
68309124Sdim  _MM_PERM_ABAC = 0x12, _MM_PERM_ABAD = 0x13, _MM_PERM_ABBA = 0x14,
69309124Sdim  _MM_PERM_ABBB = 0x15, _MM_PERM_ABBC = 0x16, _MM_PERM_ABBD = 0x17,
70309124Sdim  _MM_PERM_ABCA = 0x18, _MM_PERM_ABCB = 0x19, _MM_PERM_ABCC = 0x1A,
71309124Sdim  _MM_PERM_ABCD = 0x1B, _MM_PERM_ABDA = 0x1C, _MM_PERM_ABDB = 0x1D,
72309124Sdim  _MM_PERM_ABDC = 0x1E, _MM_PERM_ABDD = 0x1F, _MM_PERM_ACAA = 0x20,
73309124Sdim  _MM_PERM_ACAB = 0x21, _MM_PERM_ACAC = 0x22, _MM_PERM_ACAD = 0x23,
74309124Sdim  _MM_PERM_ACBA = 0x24, _MM_PERM_ACBB = 0x25, _MM_PERM_ACBC = 0x26,
75309124Sdim  _MM_PERM_ACBD = 0x27, _MM_PERM_ACCA = 0x28, _MM_PERM_ACCB = 0x29,
76309124Sdim  _MM_PERM_ACCC = 0x2A, _MM_PERM_ACCD = 0x2B, _MM_PERM_ACDA = 0x2C,
77309124Sdim  _MM_PERM_ACDB = 0x2D, _MM_PERM_ACDC = 0x2E, _MM_PERM_ACDD = 0x2F,
78309124Sdim  _MM_PERM_ADAA = 0x30, _MM_PERM_ADAB = 0x31, _MM_PERM_ADAC = 0x32,
79309124Sdim  _MM_PERM_ADAD = 0x33, _MM_PERM_ADBA = 0x34, _MM_PERM_ADBB = 0x35,
80309124Sdim  _MM_PERM_ADBC = 0x36, _MM_PERM_ADBD = 0x37, _MM_PERM_ADCA = 0x38,
81309124Sdim  _MM_PERM_ADCB = 0x39, _MM_PERM_ADCC = 0x3A, _MM_PERM_ADCD = 0x3B,
82309124Sdim  _MM_PERM_ADDA = 0x3C, _MM_PERM_ADDB = 0x3D, _MM_PERM_ADDC = 0x3E,
83309124Sdim  _MM_PERM_ADDD = 0x3F, _MM_PERM_BAAA = 0x40, _MM_PERM_BAAB = 0x41,
84309124Sdim  _MM_PERM_BAAC = 0x42, _MM_PERM_BAAD = 0x43, _MM_PERM_BABA = 0x44,
85309124Sdim  _MM_PERM_BABB = 0x45, _MM_PERM_BABC = 0x46, _MM_PERM_BABD = 0x47,
86309124Sdim  _MM_PERM_BACA = 0x48, _MM_PERM_BACB = 0x49, _MM_PERM_BACC = 0x4A,
87309124Sdim  _MM_PERM_BACD = 0x4B, _MM_PERM_BADA = 0x4C, _MM_PERM_BADB = 0x4D,
88309124Sdim  _MM_PERM_BADC = 0x4E, _MM_PERM_BADD = 0x4F, _MM_PERM_BBAA = 0x50,
89309124Sdim  _MM_PERM_BBAB = 0x51, _MM_PERM_BBAC = 0x52, _MM_PERM_BBAD = 0x53,
90309124Sdim  _MM_PERM_BBBA = 0x54, _MM_PERM_BBBB = 0x55, _MM_PERM_BBBC = 0x56,
91309124Sdim  _MM_PERM_BBBD = 0x57, _MM_PERM_BBCA = 0x58, _MM_PERM_BBCB = 0x59,
92309124Sdim  _MM_PERM_BBCC = 0x5A, _MM_PERM_BBCD = 0x5B, _MM_PERM_BBDA = 0x5C,
93309124Sdim  _MM_PERM_BBDB = 0x5D, _MM_PERM_BBDC = 0x5E, _MM_PERM_BBDD = 0x5F,
94309124Sdim  _MM_PERM_BCAA = 0x60, _MM_PERM_BCAB = 0x61, _MM_PERM_BCAC = 0x62,
95309124Sdim  _MM_PERM_BCAD = 0x63, _MM_PERM_BCBA = 0x64, _MM_PERM_BCBB = 0x65,
96309124Sdim  _MM_PERM_BCBC = 0x66, _MM_PERM_BCBD = 0x67, _MM_PERM_BCCA = 0x68,
97309124Sdim  _MM_PERM_BCCB = 0x69, _MM_PERM_BCCC = 0x6A, _MM_PERM_BCCD = 0x6B,
98309124Sdim  _MM_PERM_BCDA = 0x6C, _MM_PERM_BCDB = 0x6D, _MM_PERM_BCDC = 0x6E,
99309124Sdim  _MM_PERM_BCDD = 0x6F, _MM_PERM_BDAA = 0x70, _MM_PERM_BDAB = 0x71,
100309124Sdim  _MM_PERM_BDAC = 0x72, _MM_PERM_BDAD = 0x73, _MM_PERM_BDBA = 0x74,
101309124Sdim  _MM_PERM_BDBB = 0x75, _MM_PERM_BDBC = 0x76, _MM_PERM_BDBD = 0x77,
102309124Sdim  _MM_PERM_BDCA = 0x78, _MM_PERM_BDCB = 0x79, _MM_PERM_BDCC = 0x7A,
103309124Sdim  _MM_PERM_BDCD = 0x7B, _MM_PERM_BDDA = 0x7C, _MM_PERM_BDDB = 0x7D,
104309124Sdim  _MM_PERM_BDDC = 0x7E, _MM_PERM_BDDD = 0x7F, _MM_PERM_CAAA = 0x80,
105309124Sdim  _MM_PERM_CAAB = 0x81, _MM_PERM_CAAC = 0x82, _MM_PERM_CAAD = 0x83,
106309124Sdim  _MM_PERM_CABA = 0x84, _MM_PERM_CABB = 0x85, _MM_PERM_CABC = 0x86,
107309124Sdim  _MM_PERM_CABD = 0x87, _MM_PERM_CACA = 0x88, _MM_PERM_CACB = 0x89,
108309124Sdim  _MM_PERM_CACC = 0x8A, _MM_PERM_CACD = 0x8B, _MM_PERM_CADA = 0x8C,
109309124Sdim  _MM_PERM_CADB = 0x8D, _MM_PERM_CADC = 0x8E, _MM_PERM_CADD = 0x8F,
110309124Sdim  _MM_PERM_CBAA = 0x90, _MM_PERM_CBAB = 0x91, _MM_PERM_CBAC = 0x92,
111309124Sdim  _MM_PERM_CBAD = 0x93, _MM_PERM_CBBA = 0x94, _MM_PERM_CBBB = 0x95,
112309124Sdim  _MM_PERM_CBBC = 0x96, _MM_PERM_CBBD = 0x97, _MM_PERM_CBCA = 0x98,
113309124Sdim  _MM_PERM_CBCB = 0x99, _MM_PERM_CBCC = 0x9A, _MM_PERM_CBCD = 0x9B,
114309124Sdim  _MM_PERM_CBDA = 0x9C, _MM_PERM_CBDB = 0x9D, _MM_PERM_CBDC = 0x9E,
115309124Sdim  _MM_PERM_CBDD = 0x9F, _MM_PERM_CCAA = 0xA0, _MM_PERM_CCAB = 0xA1,
116309124Sdim  _MM_PERM_CCAC = 0xA2, _MM_PERM_CCAD = 0xA3, _MM_PERM_CCBA = 0xA4,
117309124Sdim  _MM_PERM_CCBB = 0xA5, _MM_PERM_CCBC = 0xA6, _MM_PERM_CCBD = 0xA7,
118309124Sdim  _MM_PERM_CCCA = 0xA8, _MM_PERM_CCCB = 0xA9, _MM_PERM_CCCC = 0xAA,
119309124Sdim  _MM_PERM_CCCD = 0xAB, _MM_PERM_CCDA = 0xAC, _MM_PERM_CCDB = 0xAD,
120309124Sdim  _MM_PERM_CCDC = 0xAE, _MM_PERM_CCDD = 0xAF, _MM_PERM_CDAA = 0xB0,
121309124Sdim  _MM_PERM_CDAB = 0xB1, _MM_PERM_CDAC = 0xB2, _MM_PERM_CDAD = 0xB3,
122309124Sdim  _MM_PERM_CDBA = 0xB4, _MM_PERM_CDBB = 0xB5, _MM_PERM_CDBC = 0xB6,
123309124Sdim  _MM_PERM_CDBD = 0xB7, _MM_PERM_CDCA = 0xB8, _MM_PERM_CDCB = 0xB9,
124309124Sdim  _MM_PERM_CDCC = 0xBA, _MM_PERM_CDCD = 0xBB, _MM_PERM_CDDA = 0xBC,
125309124Sdim  _MM_PERM_CDDB = 0xBD, _MM_PERM_CDDC = 0xBE, _MM_PERM_CDDD = 0xBF,
126309124Sdim  _MM_PERM_DAAA = 0xC0, _MM_PERM_DAAB = 0xC1, _MM_PERM_DAAC = 0xC2,
127309124Sdim  _MM_PERM_DAAD = 0xC3, _MM_PERM_DABA = 0xC4, _MM_PERM_DABB = 0xC5,
128309124Sdim  _MM_PERM_DABC = 0xC6, _MM_PERM_DABD = 0xC7, _MM_PERM_DACA = 0xC8,
129309124Sdim  _MM_PERM_DACB = 0xC9, _MM_PERM_DACC = 0xCA, _MM_PERM_DACD = 0xCB,
130309124Sdim  _MM_PERM_DADA = 0xCC, _MM_PERM_DADB = 0xCD, _MM_PERM_DADC = 0xCE,
131309124Sdim  _MM_PERM_DADD = 0xCF, _MM_PERM_DBAA = 0xD0, _MM_PERM_DBAB = 0xD1,
132309124Sdim  _MM_PERM_DBAC = 0xD2, _MM_PERM_DBAD = 0xD3, _MM_PERM_DBBA = 0xD4,
133309124Sdim  _MM_PERM_DBBB = 0xD5, _MM_PERM_DBBC = 0xD6, _MM_PERM_DBBD = 0xD7,
134309124Sdim  _MM_PERM_DBCA = 0xD8, _MM_PERM_DBCB = 0xD9, _MM_PERM_DBCC = 0xDA,
135309124Sdim  _MM_PERM_DBCD = 0xDB, _MM_PERM_DBDA = 0xDC, _MM_PERM_DBDB = 0xDD,
136309124Sdim  _MM_PERM_DBDC = 0xDE, _MM_PERM_DBDD = 0xDF, _MM_PERM_DCAA = 0xE0,
137309124Sdim  _MM_PERM_DCAB = 0xE1, _MM_PERM_DCAC = 0xE2, _MM_PERM_DCAD = 0xE3,
138309124Sdim  _MM_PERM_DCBA = 0xE4, _MM_PERM_DCBB = 0xE5, _MM_PERM_DCBC = 0xE6,
139309124Sdim  _MM_PERM_DCBD = 0xE7, _MM_PERM_DCCA = 0xE8, _MM_PERM_DCCB = 0xE9,
140309124Sdim  _MM_PERM_DCCC = 0xEA, _MM_PERM_DCCD = 0xEB, _MM_PERM_DCDA = 0xEC,
141309124Sdim  _MM_PERM_DCDB = 0xED, _MM_PERM_DCDC = 0xEE, _MM_PERM_DCDD = 0xEF,
142309124Sdim  _MM_PERM_DDAA = 0xF0, _MM_PERM_DDAB = 0xF1, _MM_PERM_DDAC = 0xF2,
143309124Sdim  _MM_PERM_DDAD = 0xF3, _MM_PERM_DDBA = 0xF4, _MM_PERM_DDBB = 0xF5,
144309124Sdim  _MM_PERM_DDBC = 0xF6, _MM_PERM_DDBD = 0xF7, _MM_PERM_DDCA = 0xF8,
145309124Sdim  _MM_PERM_DDCB = 0xF9, _MM_PERM_DDCC = 0xFA, _MM_PERM_DDCD = 0xFB,
146309124Sdim  _MM_PERM_DDDA = 0xFC, _MM_PERM_DDDB = 0xFD, _MM_PERM_DDDC = 0xFE,
147309124Sdim  _MM_PERM_DDDD = 0xFF
148309124Sdim} _MM_PERM_ENUM;
149309124Sdim
150309124Sdimtypedef enum
151309124Sdim{
152309124Sdim  _MM_MANT_NORM_1_2,    /* interval [1, 2)      */
153309124Sdim  _MM_MANT_NORM_p5_2,   /* interval [0.5, 2)    */
154309124Sdim  _MM_MANT_NORM_p5_1,   /* interval [0.5, 1)    */
155309124Sdim  _MM_MANT_NORM_p75_1p5   /* interval [0.75, 1.5) */
156309124Sdim} _MM_MANTISSA_NORM_ENUM;
157309124Sdim
158309124Sdimtypedef enum
159309124Sdim{
160309124Sdim  _MM_MANT_SIGN_src,    /* sign = sign(SRC)     */
161309124Sdim  _MM_MANT_SIGN_zero,   /* sign = 0             */
162309124Sdim  _MM_MANT_SIGN_nan   /* DEST = NaN if sign(SRC) = 1 */
163309124Sdim} _MM_MANTISSA_SIGN_ENUM;
164309124Sdim
165288943Sdim/* Define the default attributes for the functions in this file. */
166341825Sdim#define __DEFAULT_FN_ATTRS512 __attribute__((__always_inline__, __nodebug__, __target__("avx512f"), __min_vector_width__(512)))
167341825Sdim#define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("avx512f"), __min_vector_width__(128)))
168344779Sdim#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512f")))
169288943Sdim
170277325Sdim/* Create vectors with repeated elements */
171277325Sdim
172341825Sdimstatic  __inline __m512i __DEFAULT_FN_ATTRS512
173277325Sdim_mm512_setzero_si512(void)
174277325Sdim{
175341825Sdim  return __extension__ (__m512i)(__v8di){ 0, 0, 0, 0, 0, 0, 0, 0 };
176277325Sdim}
177277325Sdim
178309124Sdim#define _mm512_setzero_epi32 _mm512_setzero_si512
179309124Sdim
180341825Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS512
181309124Sdim_mm512_undefined_pd(void)
182296417Sdim{
183296417Sdim  return (__m512d)__builtin_ia32_undef512();
184296417Sdim}
185296417Sdim
186341825Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS512
187309124Sdim_mm512_undefined(void)
188296417Sdim{
189296417Sdim  return (__m512)__builtin_ia32_undef512();
190296417Sdim}
191296417Sdim
192341825Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS512
193309124Sdim_mm512_undefined_ps(void)
194296417Sdim{
195296417Sdim  return (__m512)__builtin_ia32_undef512();
196296417Sdim}
197296417Sdim
198341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
199309124Sdim_mm512_undefined_epi32(void)
200296417Sdim{
201296417Sdim  return (__m512i)__builtin_ia32_undef512();
202296417Sdim}
203296417Sdim
204341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
205309124Sdim_mm512_broadcastd_epi32 (__m128i __A)
206309124Sdim{
207341825Sdim  return (__m512i)__builtin_shufflevector((__v4si) __A, (__v4si) __A,
208309124Sdim                                          0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
209309124Sdim}
210309124Sdim
211341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
212309124Sdim_mm512_mask_broadcastd_epi32 (__m512i __O, __mmask16 __M, __m128i __A)
213309124Sdim{
214309124Sdim  return (__m512i)__builtin_ia32_selectd_512(__M,
215309124Sdim                                             (__v16si) _mm512_broadcastd_epi32(__A),
216309124Sdim                                             (__v16si) __O);
217309124Sdim}
218309124Sdim
219341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
220309124Sdim_mm512_maskz_broadcastd_epi32 (__mmask16 __M, __m128i __A)
221309124Sdim{
222309124Sdim  return (__m512i)__builtin_ia32_selectd_512(__M,
223309124Sdim                                             (__v16si) _mm512_broadcastd_epi32(__A),
224309124Sdim                                             (__v16si) _mm512_setzero_si512());
225309124Sdim}
226309124Sdim
227341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
228309124Sdim_mm512_broadcastq_epi64 (__m128i __A)
229309124Sdim{
230341825Sdim  return (__m512i)__builtin_shufflevector((__v2di) __A, (__v2di) __A,
231309124Sdim                                          0, 0, 0, 0, 0, 0, 0, 0);
232309124Sdim}
233309124Sdim
234341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
235309124Sdim_mm512_mask_broadcastq_epi64 (__m512i __O, __mmask8 __M, __m128i __A)
236309124Sdim{
237309124Sdim  return (__m512i)__builtin_ia32_selectq_512(__M,
238309124Sdim                                             (__v8di) _mm512_broadcastq_epi64(__A),
239309124Sdim                                             (__v8di) __O);
240309124Sdim
241309124Sdim}
242309124Sdim
243341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
244309124Sdim_mm512_maskz_broadcastq_epi64 (__mmask8 __M, __m128i __A)
245309124Sdim{
246309124Sdim  return (__m512i)__builtin_ia32_selectq_512(__M,
247309124Sdim                                             (__v8di) _mm512_broadcastq_epi64(__A),
248309124Sdim                                             (__v8di) _mm512_setzero_si512());
249309124Sdim}
250309124Sdim
251277325Sdim
252341825Sdimstatic __inline __m512 __DEFAULT_FN_ATTRS512
253277325Sdim_mm512_setzero_ps(void)
254277325Sdim{
255341825Sdim  return __extension__ (__m512){ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
256341825Sdim                                 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 };
257277325Sdim}
258309124Sdim
259309124Sdim#define _mm512_setzero _mm512_setzero_ps
260309124Sdim
261341825Sdimstatic  __inline __m512d __DEFAULT_FN_ATTRS512
262277325Sdim_mm512_setzero_pd(void)
263277325Sdim{
264341825Sdim  return __extension__ (__m512d){ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 };
265277325Sdim}
266277325Sdim
267341825Sdimstatic __inline __m512 __DEFAULT_FN_ATTRS512
268277325Sdim_mm512_set1_ps(float __w)
269277325Sdim{
270341825Sdim  return __extension__ (__m512){ __w, __w, __w, __w, __w, __w, __w, __w,
271341825Sdim                                 __w, __w, __w, __w, __w, __w, __w, __w  };
272277325Sdim}
273277325Sdim
274341825Sdimstatic __inline __m512d __DEFAULT_FN_ATTRS512
275277325Sdim_mm512_set1_pd(double __w)
276277325Sdim{
277341825Sdim  return __extension__ (__m512d){ __w, __w, __w, __w, __w, __w, __w, __w };
278277325Sdim}
279277325Sdim
280341825Sdimstatic __inline __m512i __DEFAULT_FN_ATTRS512
281309124Sdim_mm512_set1_epi8(char __w)
282309124Sdim{
283341825Sdim  return __extension__ (__m512i)(__v64qi){
284341825Sdim    __w, __w, __w, __w, __w, __w, __w, __w,
285341825Sdim    __w, __w, __w, __w, __w, __w, __w, __w,
286341825Sdim    __w, __w, __w, __w, __w, __w, __w, __w,
287341825Sdim    __w, __w, __w, __w, __w, __w, __w, __w,
288341825Sdim    __w, __w, __w, __w, __w, __w, __w, __w,
289341825Sdim    __w, __w, __w, __w, __w, __w, __w, __w,
290341825Sdim    __w, __w, __w, __w, __w, __w, __w, __w,
291341825Sdim    __w, __w, __w, __w, __w, __w, __w, __w  };
292309124Sdim}
293309124Sdim
294341825Sdimstatic __inline __m512i __DEFAULT_FN_ATTRS512
295309124Sdim_mm512_set1_epi16(short __w)
296309124Sdim{
297341825Sdim  return __extension__ (__m512i)(__v32hi){
298341825Sdim    __w, __w, __w, __w, __w, __w, __w, __w,
299341825Sdim    __w, __w, __w, __w, __w, __w, __w, __w,
300341825Sdim    __w, __w, __w, __w, __w, __w, __w, __w,
301341825Sdim    __w, __w, __w, __w, __w, __w, __w, __w };
302309124Sdim}
303309124Sdim
304341825Sdimstatic __inline __m512i __DEFAULT_FN_ATTRS512
305277325Sdim_mm512_set1_epi32(int __s)
306277325Sdim{
307341825Sdim  return __extension__ (__m512i)(__v16si){
308341825Sdim    __s, __s, __s, __s, __s, __s, __s, __s,
309341825Sdim    __s, __s, __s, __s, __s, __s, __s, __s };
310277325Sdim}
311277325Sdim
312341825Sdimstatic __inline __m512i __DEFAULT_FN_ATTRS512
313341825Sdim_mm512_maskz_set1_epi32(__mmask16 __M, int __A)
314327952Sdim{
315341825Sdim  return (__m512i)__builtin_ia32_selectd_512(__M,
316327952Sdim                                             (__v16si)_mm512_set1_epi32(__A),
317327952Sdim                                             (__v16si)_mm512_setzero_si512());
318327952Sdim}
319327952Sdim
320341825Sdimstatic __inline __m512i __DEFAULT_FN_ATTRS512
321277325Sdim_mm512_set1_epi64(long long __d)
322277325Sdim{
323341825Sdim  return __extension__(__m512i)(__v8di){ __d, __d, __d, __d, __d, __d, __d, __d };
324277325Sdim}
325277325Sdim
326341825Sdimstatic __inline __m512i __DEFAULT_FN_ATTRS512
327327952Sdim_mm512_maskz_set1_epi64(__mmask8 __M, long long __A)
328327952Sdim{
329327952Sdim  return (__m512i)__builtin_ia32_selectq_512(__M,
330327952Sdim                                             (__v8di)_mm512_set1_epi64(__A),
331327952Sdim                                             (__v8di)_mm512_setzero_si512());
332327952Sdim}
333327952Sdim
334341825Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS512
335309124Sdim_mm512_broadcastss_ps(__m128 __A)
336277325Sdim{
337341825Sdim  return (__m512)__builtin_shufflevector((__v4sf) __A, (__v4sf) __A,
338309124Sdim                                         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
339277325Sdim}
340277325Sdim
341341825Sdimstatic __inline __m512i __DEFAULT_FN_ATTRS512
342309124Sdim_mm512_set4_epi32 (int __A, int __B, int __C, int __D)
343309124Sdim{
344341825Sdim  return __extension__ (__m512i)(__v16si)
345309124Sdim   { __D, __C, __B, __A, __D, __C, __B, __A,
346309124Sdim     __D, __C, __B, __A, __D, __C, __B, __A };
347309124Sdim}
348309124Sdim
349341825Sdimstatic __inline __m512i __DEFAULT_FN_ATTRS512
350309124Sdim_mm512_set4_epi64 (long long __A, long long __B, long long __C,
351309124Sdim       long long __D)
352309124Sdim{
353341825Sdim  return __extension__ (__m512i) (__v8di)
354309124Sdim   { __D, __C, __B, __A, __D, __C, __B, __A };
355309124Sdim}
356309124Sdim
357341825Sdimstatic __inline __m512d __DEFAULT_FN_ATTRS512
358309124Sdim_mm512_set4_pd (double __A, double __B, double __C, double __D)
359309124Sdim{
360341825Sdim  return __extension__ (__m512d)
361309124Sdim   { __D, __C, __B, __A, __D, __C, __B, __A };
362309124Sdim}
363309124Sdim
364341825Sdimstatic __inline __m512 __DEFAULT_FN_ATTRS512
365309124Sdim_mm512_set4_ps (float __A, float __B, float __C, float __D)
366309124Sdim{
367341825Sdim  return __extension__ (__m512)
368309124Sdim   { __D, __C, __B, __A, __D, __C, __B, __A,
369309124Sdim     __D, __C, __B, __A, __D, __C, __B, __A };
370309124Sdim}
371309124Sdim
372309124Sdim#define _mm512_setr4_epi32(e0,e1,e2,e3)               \
373309124Sdim  _mm512_set4_epi32((e3),(e2),(e1),(e0))
374309124Sdim
375309124Sdim#define _mm512_setr4_epi64(e0,e1,e2,e3)               \
376309124Sdim  _mm512_set4_epi64((e3),(e2),(e1),(e0))
377309124Sdim
378309124Sdim#define _mm512_setr4_pd(e0,e1,e2,e3)                \
379309124Sdim  _mm512_set4_pd((e3),(e2),(e1),(e0))
380309124Sdim
381309124Sdim#define _mm512_setr4_ps(e0,e1,e2,e3)                \
382309124Sdim  _mm512_set4_ps((e3),(e2),(e1),(e0))
383309124Sdim
384341825Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS512
385309124Sdim_mm512_broadcastsd_pd(__m128d __A)
386277325Sdim{
387341825Sdim  return (__m512d)__builtin_shufflevector((__v2df) __A, (__v2df) __A,
388309124Sdim                                          0, 0, 0, 0, 0, 0, 0, 0);
389277325Sdim}
390277325Sdim
391277325Sdim/* Cast between vector types */
392277325Sdim
393341825Sdimstatic __inline __m512d __DEFAULT_FN_ATTRS512
394277325Sdim_mm512_castpd256_pd512(__m256d __a)
395277325Sdim{
396277325Sdim  return __builtin_shufflevector(__a, __a, 0, 1, 2, 3, -1, -1, -1, -1);
397277325Sdim}
398277325Sdim
399341825Sdimstatic __inline __m512 __DEFAULT_FN_ATTRS512
400277325Sdim_mm512_castps256_ps512(__m256 __a)
401277325Sdim{
402277325Sdim  return __builtin_shufflevector(__a, __a, 0,  1,  2,  3,  4,  5,  6,  7,
403277325Sdim                                          -1, -1, -1, -1, -1, -1, -1, -1);
404277325Sdim}
405277325Sdim
406341825Sdimstatic __inline __m128d __DEFAULT_FN_ATTRS512
407277325Sdim_mm512_castpd512_pd128(__m512d __a)
408277325Sdim{
409277325Sdim  return __builtin_shufflevector(__a, __a, 0, 1);
410277325Sdim}
411277325Sdim
412341825Sdimstatic __inline __m256d __DEFAULT_FN_ATTRS512
413309124Sdim_mm512_castpd512_pd256 (__m512d __A)
414309124Sdim{
415309124Sdim  return __builtin_shufflevector(__A, __A, 0, 1, 2, 3);
416309124Sdim}
417309124Sdim
418341825Sdimstatic __inline __m128 __DEFAULT_FN_ATTRS512
419277325Sdim_mm512_castps512_ps128(__m512 __a)
420277325Sdim{
421277325Sdim  return __builtin_shufflevector(__a, __a, 0, 1, 2, 3);
422277325Sdim}
423277325Sdim
424341825Sdimstatic __inline __m256 __DEFAULT_FN_ATTRS512
425309124Sdim_mm512_castps512_ps256 (__m512 __A)
426309124Sdim{
427309124Sdim  return __builtin_shufflevector(__A, __A, 0, 1, 2, 3, 4, 5, 6, 7);
428309124Sdim}
429309124Sdim
430341825Sdimstatic __inline __m512 __DEFAULT_FN_ATTRS512
431309124Sdim_mm512_castpd_ps (__m512d __A)
432309124Sdim{
433309124Sdim  return (__m512) (__A);
434309124Sdim}
435309124Sdim
436341825Sdimstatic __inline __m512i __DEFAULT_FN_ATTRS512
437309124Sdim_mm512_castpd_si512 (__m512d __A)
438309124Sdim{
439309124Sdim  return (__m512i) (__A);
440309124Sdim}
441309124Sdim
442341825Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS512
443309124Sdim_mm512_castpd128_pd512 (__m128d __A)
444309124Sdim{
445309124Sdim  return __builtin_shufflevector( __A, __A, 0, 1, -1, -1, -1, -1, -1, -1);
446309124Sdim}
447309124Sdim
448341825Sdimstatic __inline __m512d __DEFAULT_FN_ATTRS512
449309124Sdim_mm512_castps_pd (__m512 __A)
450309124Sdim{
451309124Sdim  return (__m512d) (__A);
452309124Sdim}
453309124Sdim
454341825Sdimstatic __inline __m512i __DEFAULT_FN_ATTRS512
455309124Sdim_mm512_castps_si512 (__m512 __A)
456309124Sdim{
457309124Sdim  return (__m512i) (__A);
458309124Sdim}
459309124Sdim
460341825Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS512
461309124Sdim_mm512_castps128_ps512 (__m128 __A)
462309124Sdim{
463309124Sdim    return  __builtin_shufflevector( __A, __A, 0, 1, 2, 3, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1);
464309124Sdim}
465309124Sdim
466341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
467309124Sdim_mm512_castsi128_si512 (__m128i __A)
468309124Sdim{
469309124Sdim   return  __builtin_shufflevector( __A, __A, 0, 1, -1, -1, -1, -1, -1, -1);
470309124Sdim}
471309124Sdim
472341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
473309124Sdim_mm512_castsi256_si512 (__m256i __A)
474309124Sdim{
475309124Sdim   return  __builtin_shufflevector( __A, __A, 0, 1, 2, 3, -1, -1, -1, -1);
476309124Sdim}
477309124Sdim
478341825Sdimstatic __inline __m512 __DEFAULT_FN_ATTRS512
479309124Sdim_mm512_castsi512_ps (__m512i __A)
480309124Sdim{
481309124Sdim  return (__m512) (__A);
482309124Sdim}
483309124Sdim
484341825Sdimstatic __inline __m512d __DEFAULT_FN_ATTRS512
485309124Sdim_mm512_castsi512_pd (__m512i __A)
486309124Sdim{
487309124Sdim  return (__m512d) (__A);
488309124Sdim}
489309124Sdim
490341825Sdimstatic __inline __m128i __DEFAULT_FN_ATTRS512
491309124Sdim_mm512_castsi512_si128 (__m512i __A)
492309124Sdim{
493309124Sdim  return (__m128i)__builtin_shufflevector(__A, __A , 0, 1);
494309124Sdim}
495309124Sdim
496341825Sdimstatic __inline __m256i __DEFAULT_FN_ATTRS512
497309124Sdim_mm512_castsi512_si256 (__m512i __A)
498309124Sdim{
499309124Sdim  return (__m256i)__builtin_shufflevector(__A, __A , 0, 1, 2, 3);
500309124Sdim}
501309124Sdim
502344779Sdimstatic __inline__ __mmask16 __DEFAULT_FN_ATTRS
503314564Sdim_mm512_int2mask(int __a)
504314564Sdim{
505314564Sdim  return (__mmask16)__a;
506314564Sdim}
507314564Sdim
508344779Sdimstatic __inline__ int __DEFAULT_FN_ATTRS
509314564Sdim_mm512_mask2int(__mmask16 __a)
510314564Sdim{
511314564Sdim  return (int)__a;
512314564Sdim}
513314564Sdim
514341825Sdim/// Constructs a 512-bit floating-point vector of [8 x double] from a
515321369Sdim///    128-bit floating-point vector of [2 x double]. The lower 128 bits
516321369Sdim///    contain the value of the source vector. The upper 384 bits are set
517321369Sdim///    to zero.
518321369Sdim///
519321369Sdim/// \headerfile <x86intrin.h>
520321369Sdim///
521321369Sdim/// This intrinsic has no corresponding instruction.
522321369Sdim///
523321369Sdim/// \param __a
524321369Sdim///    A 128-bit vector of [2 x double].
525321369Sdim/// \returns A 512-bit floating-point vector of [8 x double]. The lower 128 bits
526321369Sdim///    contain the value of the parameter. The upper 384 bits are set to zero.
527341825Sdimstatic __inline __m512d __DEFAULT_FN_ATTRS512
528321369Sdim_mm512_zextpd128_pd512(__m128d __a)
529321369Sdim{
530321369Sdim  return __builtin_shufflevector((__v2df)__a, (__v2df)_mm_setzero_pd(), 0, 1, 2, 3, 2, 3, 2, 3);
531321369Sdim}
532321369Sdim
533341825Sdim/// Constructs a 512-bit floating-point vector of [8 x double] from a
534321369Sdim///    256-bit floating-point vector of [4 x double]. The lower 256 bits
535321369Sdim///    contain the value of the source vector. The upper 256 bits are set
536321369Sdim///    to zero.
537321369Sdim///
538321369Sdim/// \headerfile <x86intrin.h>
539321369Sdim///
540321369Sdim/// This intrinsic has no corresponding instruction.
541321369Sdim///
542321369Sdim/// \param __a
543321369Sdim///    A 256-bit vector of [4 x double].
544321369Sdim/// \returns A 512-bit floating-point vector of [8 x double]. The lower 256 bits
545321369Sdim///    contain the value of the parameter. The upper 256 bits are set to zero.
546341825Sdimstatic __inline __m512d __DEFAULT_FN_ATTRS512
547321369Sdim_mm512_zextpd256_pd512(__m256d __a)
548321369Sdim{
549321369Sdim  return __builtin_shufflevector((__v4df)__a, (__v4df)_mm256_setzero_pd(), 0, 1, 2, 3, 4, 5, 6, 7);
550321369Sdim}
551321369Sdim
552341825Sdim/// Constructs a 512-bit floating-point vector of [16 x float] from a
553321369Sdim///    128-bit floating-point vector of [4 x float]. The lower 128 bits contain
554321369Sdim///    the value of the source vector. The upper 384 bits are set to zero.
555321369Sdim///
556321369Sdim/// \headerfile <x86intrin.h>
557321369Sdim///
558321369Sdim/// This intrinsic has no corresponding instruction.
559321369Sdim///
560321369Sdim/// \param __a
561321369Sdim///    A 128-bit vector of [4 x float].
562321369Sdim/// \returns A 512-bit floating-point vector of [16 x float]. The lower 128 bits
563321369Sdim///    contain the value of the parameter. The upper 384 bits are set to zero.
564341825Sdimstatic __inline __m512 __DEFAULT_FN_ATTRS512
565321369Sdim_mm512_zextps128_ps512(__m128 __a)
566321369Sdim{
567321369Sdim  return __builtin_shufflevector((__v4sf)__a, (__v4sf)_mm_setzero_ps(), 0, 1, 2, 3, 4, 5, 6, 7, 4, 5, 6, 7, 4, 5, 6, 7);
568321369Sdim}
569321369Sdim
570341825Sdim/// Constructs a 512-bit floating-point vector of [16 x float] from a
571321369Sdim///    256-bit floating-point vector of [8 x float]. The lower 256 bits contain
572321369Sdim///    the value of the source vector. The upper 256 bits are set to zero.
573321369Sdim///
574321369Sdim/// \headerfile <x86intrin.h>
575321369Sdim///
576321369Sdim/// This intrinsic has no corresponding instruction.
577321369Sdim///
578321369Sdim/// \param __a
579321369Sdim///    A 256-bit vector of [8 x float].
580321369Sdim/// \returns A 512-bit floating-point vector of [16 x float]. The lower 256 bits
581321369Sdim///    contain the value of the parameter. The upper 256 bits are set to zero.
582341825Sdimstatic __inline __m512 __DEFAULT_FN_ATTRS512
583321369Sdim_mm512_zextps256_ps512(__m256 __a)
584321369Sdim{
585321369Sdim  return __builtin_shufflevector((__v8sf)__a, (__v8sf)_mm256_setzero_ps(), 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
586321369Sdim}
587321369Sdim
588341825Sdim/// Constructs a 512-bit integer vector from a 128-bit integer vector.
589321369Sdim///    The lower 128 bits contain the value of the source vector. The upper
590321369Sdim///    384 bits are set to zero.
591321369Sdim///
592321369Sdim/// \headerfile <x86intrin.h>
593321369Sdim///
594321369Sdim/// This intrinsic has no corresponding instruction.
595321369Sdim///
596321369Sdim/// \param __a
597321369Sdim///    A 128-bit integer vector.
598321369Sdim/// \returns A 512-bit integer vector. The lower 128 bits contain the value of
599321369Sdim///    the parameter. The upper 384 bits are set to zero.
600341825Sdimstatic __inline __m512i __DEFAULT_FN_ATTRS512
601321369Sdim_mm512_zextsi128_si512(__m128i __a)
602321369Sdim{
603321369Sdim  return __builtin_shufflevector((__v2di)__a, (__v2di)_mm_setzero_si128(), 0, 1, 2, 3, 2, 3, 2, 3);
604321369Sdim}
605321369Sdim
606341825Sdim/// Constructs a 512-bit integer vector from a 256-bit integer vector.
607321369Sdim///    The lower 256 bits contain the value of the source vector. The upper
608321369Sdim///    256 bits are set to zero.
609321369Sdim///
610321369Sdim/// \headerfile <x86intrin.h>
611321369Sdim///
612321369Sdim/// This intrinsic has no corresponding instruction.
613321369Sdim///
614321369Sdim/// \param __a
615321369Sdim///    A 256-bit integer vector.
616321369Sdim/// \returns A 512-bit integer vector. The lower 256 bits contain the value of
617321369Sdim///    the parameter. The upper 256 bits are set to zero.
618341825Sdimstatic __inline __m512i __DEFAULT_FN_ATTRS512
619321369Sdim_mm512_zextsi256_si512(__m256i __a)
620321369Sdim{
621321369Sdim  return __builtin_shufflevector((__v4di)__a, (__v4di)_mm256_setzero_si256(), 0, 1, 2, 3, 4, 5, 6, 7);
622321369Sdim}
623321369Sdim
624288943Sdim/* Bitwise operators */
625341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
626288943Sdim_mm512_and_epi32(__m512i __a, __m512i __b)
627288943Sdim{
628309124Sdim  return (__m512i)((__v16su)__a & (__v16su)__b);
629288943Sdim}
630288943Sdim
631341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
632288943Sdim_mm512_mask_and_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b)
633288943Sdim{
634309124Sdim  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__k,
635309124Sdim                (__v16si) _mm512_and_epi32(__a, __b),
636309124Sdim                (__v16si) __src);
637288943Sdim}
638309124Sdim
639341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
640288943Sdim_mm512_maskz_and_epi32(__mmask16 __k, __m512i __a, __m512i __b)
641288943Sdim{
642309124Sdim  return (__m512i) _mm512_mask_and_epi32(_mm512_setzero_si512 (),
643309124Sdim                                         __k, __a, __b);
644288943Sdim}
645288943Sdim
646341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
647288943Sdim_mm512_and_epi64(__m512i __a, __m512i __b)
648288943Sdim{
649309124Sdim  return (__m512i)((__v8du)__a & (__v8du)__b);
650288943Sdim}
651288943Sdim
652341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
653288943Sdim_mm512_mask_and_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b)
654288943Sdim{
655309124Sdim    return (__m512i) __builtin_ia32_selectq_512 ((__mmask8) __k,
656309124Sdim                (__v8di) _mm512_and_epi64(__a, __b),
657309124Sdim                (__v8di) __src);
658288943Sdim}
659309124Sdim
660341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
661288943Sdim_mm512_maskz_and_epi64(__mmask8 __k, __m512i __a, __m512i __b)
662288943Sdim{
663309124Sdim  return (__m512i) _mm512_mask_and_epi64(_mm512_setzero_si512 (),
664309124Sdim                                         __k, __a, __b);
665288943Sdim}
666288943Sdim
667341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
668309124Sdim_mm512_andnot_si512 (__m512i __A, __m512i __B)
669309124Sdim{
670341825Sdim  return (__m512i)(~(__v8du)__A & (__v8du)__B);
671309124Sdim}
672309124Sdim
673341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
674288943Sdim_mm512_andnot_epi32 (__m512i __A, __m512i __B)
675288943Sdim{
676341825Sdim  return (__m512i)(~(__v16su)__A & (__v16su)__B);
677288943Sdim}
678288943Sdim
679341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
680309124Sdim_mm512_mask_andnot_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
681288943Sdim{
682309124Sdim  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
683309124Sdim                                         (__v16si)_mm512_andnot_epi32(__A, __B),
684309124Sdim                                         (__v16si)__W);
685288943Sdim}
686288943Sdim
687341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
688309124Sdim_mm512_maskz_andnot_epi32(__mmask16 __U, __m512i __A, __m512i __B)
689288943Sdim{
690309124Sdim  return (__m512i)_mm512_mask_andnot_epi32(_mm512_setzero_si512(),
691309124Sdim                                           __U, __A, __B);
692288943Sdim}
693288943Sdim
694341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
695309124Sdim_mm512_andnot_epi64(__m512i __A, __m512i __B)
696288943Sdim{
697341825Sdim  return (__m512i)(~(__v8du)__A & (__v8du)__B);
698288943Sdim}
699288943Sdim
700341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
701309124Sdim_mm512_mask_andnot_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
702288943Sdim{
703309124Sdim  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
704309124Sdim                                          (__v8di)_mm512_andnot_epi64(__A, __B),
705309124Sdim                                          (__v8di)__W);
706288943Sdim}
707288943Sdim
708341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
709309124Sdim_mm512_maskz_andnot_epi64(__mmask8 __U, __m512i __A, __m512i __B)
710288943Sdim{
711309124Sdim  return (__m512i)_mm512_mask_andnot_epi64(_mm512_setzero_si512(),
712309124Sdim                                           __U, __A, __B);
713288943Sdim}
714309124Sdim
715341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
716288943Sdim_mm512_or_epi32(__m512i __a, __m512i __b)
717288943Sdim{
718309124Sdim  return (__m512i)((__v16su)__a | (__v16su)__b);
719288943Sdim}
720288943Sdim
721341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
722288943Sdim_mm512_mask_or_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b)
723288943Sdim{
724309124Sdim  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__k,
725309124Sdim                                             (__v16si)_mm512_or_epi32(__a, __b),
726309124Sdim                                             (__v16si)__src);
727288943Sdim}
728309124Sdim
729341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
730288943Sdim_mm512_maskz_or_epi32(__mmask16 __k, __m512i __a, __m512i __b)
731288943Sdim{
732309124Sdim  return (__m512i)_mm512_mask_or_epi32(_mm512_setzero_si512(), __k, __a, __b);
733288943Sdim}
734288943Sdim
735341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
736288943Sdim_mm512_or_epi64(__m512i __a, __m512i __b)
737288943Sdim{
738309124Sdim  return (__m512i)((__v8du)__a | (__v8du)__b);
739288943Sdim}
740288943Sdim
741341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
742288943Sdim_mm512_mask_or_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b)
743288943Sdim{
744309124Sdim  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__k,
745309124Sdim                                             (__v8di)_mm512_or_epi64(__a, __b),
746309124Sdim                                             (__v8di)__src);
747288943Sdim}
748309124Sdim
749341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
750288943Sdim_mm512_maskz_or_epi64(__mmask8 __k, __m512i __a, __m512i __b)
751288943Sdim{
752309124Sdim  return (__m512i)_mm512_mask_or_epi64(_mm512_setzero_si512(), __k, __a, __b);
753288943Sdim}
754288943Sdim
755341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
756288943Sdim_mm512_xor_epi32(__m512i __a, __m512i __b)
757288943Sdim{
758309124Sdim  return (__m512i)((__v16su)__a ^ (__v16su)__b);
759288943Sdim}
760288943Sdim
761341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
762288943Sdim_mm512_mask_xor_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b)
763288943Sdim{
764309124Sdim  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__k,
765309124Sdim                                            (__v16si)_mm512_xor_epi32(__a, __b),
766309124Sdim                                            (__v16si)__src);
767288943Sdim}
768309124Sdim
769341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
770288943Sdim_mm512_maskz_xor_epi32(__mmask16 __k, __m512i __a, __m512i __b)
771288943Sdim{
772309124Sdim  return (__m512i)_mm512_mask_xor_epi32(_mm512_setzero_si512(), __k, __a, __b);
773288943Sdim}
774288943Sdim
775341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
776288943Sdim_mm512_xor_epi64(__m512i __a, __m512i __b)
777288943Sdim{
778309124Sdim  return (__m512i)((__v8du)__a ^ (__v8du)__b);
779288943Sdim}
780288943Sdim
781341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
782288943Sdim_mm512_mask_xor_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b)
783288943Sdim{
784309124Sdim  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__k,
785309124Sdim                                             (__v8di)_mm512_xor_epi64(__a, __b),
786309124Sdim                                             (__v8di)__src);
787288943Sdim}
788309124Sdim
789341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
790288943Sdim_mm512_maskz_xor_epi64(__mmask8 __k, __m512i __a, __m512i __b)
791288943Sdim{
792309124Sdim  return (__m512i)_mm512_mask_xor_epi64(_mm512_setzero_si512(), __k, __a, __b);
793288943Sdim}
794288943Sdim
795341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
796288943Sdim_mm512_and_si512(__m512i __a, __m512i __b)
797288943Sdim{
798309124Sdim  return (__m512i)((__v8du)__a & (__v8du)__b);
799288943Sdim}
800288943Sdim
801341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
802288943Sdim_mm512_or_si512(__m512i __a, __m512i __b)
803288943Sdim{
804309124Sdim  return (__m512i)((__v8du)__a | (__v8du)__b);
805288943Sdim}
806288943Sdim
807341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
808288943Sdim_mm512_xor_si512(__m512i __a, __m512i __b)
809288943Sdim{
810309124Sdim  return (__m512i)((__v8du)__a ^ (__v8du)__b);
811288943Sdim}
812309124Sdim
813277325Sdim/* Arithmetic */
814277325Sdim
815341825Sdimstatic __inline __m512d __DEFAULT_FN_ATTRS512
816277325Sdim_mm512_add_pd(__m512d __a, __m512d __b)
817277325Sdim{
818309124Sdim  return (__m512d)((__v8df)__a + (__v8df)__b);
819277325Sdim}
820277325Sdim
821341825Sdimstatic __inline __m512 __DEFAULT_FN_ATTRS512
822277325Sdim_mm512_add_ps(__m512 __a, __m512 __b)
823277325Sdim{
824309124Sdim  return (__m512)((__v16sf)__a + (__v16sf)__b);
825277325Sdim}
826277325Sdim
827341825Sdimstatic __inline __m512d __DEFAULT_FN_ATTRS512
828277325Sdim_mm512_mul_pd(__m512d __a, __m512d __b)
829277325Sdim{
830309124Sdim  return (__m512d)((__v8df)__a * (__v8df)__b);
831277325Sdim}
832277325Sdim
833341825Sdimstatic __inline __m512 __DEFAULT_FN_ATTRS512
834277325Sdim_mm512_mul_ps(__m512 __a, __m512 __b)
835277325Sdim{
836309124Sdim  return (__m512)((__v16sf)__a * (__v16sf)__b);
837277325Sdim}
838277325Sdim
839341825Sdimstatic __inline __m512d __DEFAULT_FN_ATTRS512
840277325Sdim_mm512_sub_pd(__m512d __a, __m512d __b)
841277325Sdim{
842309124Sdim  return (__m512d)((__v8df)__a - (__v8df)__b);
843277325Sdim}
844277325Sdim
845341825Sdimstatic __inline __m512 __DEFAULT_FN_ATTRS512
846277325Sdim_mm512_sub_ps(__m512 __a, __m512 __b)
847277325Sdim{
848309124Sdim  return (__m512)((__v16sf)__a - (__v16sf)__b);
849277325Sdim}
850277325Sdim
851341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
852288943Sdim_mm512_add_epi64 (__m512i __A, __m512i __B)
853288943Sdim{
854309124Sdim  return (__m512i) ((__v8du) __A + (__v8du) __B);
855288943Sdim}
856288943Sdim
857341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
858314564Sdim_mm512_mask_add_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
859288943Sdim{
860314564Sdim  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
861314564Sdim                                             (__v8di)_mm512_add_epi64(__A, __B),
862314564Sdim                                             (__v8di)__W);
863288943Sdim}
864288943Sdim
865341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
866314564Sdim_mm512_maskz_add_epi64(__mmask8 __U, __m512i __A, __m512i __B)
867288943Sdim{
868314564Sdim  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
869314564Sdim                                             (__v8di)_mm512_add_epi64(__A, __B),
870314564Sdim                                             (__v8di)_mm512_setzero_si512());
871288943Sdim}
872288943Sdim
873341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
874288943Sdim_mm512_sub_epi64 (__m512i __A, __m512i __B)
875288943Sdim{
876309124Sdim  return (__m512i) ((__v8du) __A - (__v8du) __B);
877288943Sdim}
878288943Sdim
879341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
880314564Sdim_mm512_mask_sub_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
881288943Sdim{
882314564Sdim  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
883314564Sdim                                             (__v8di)_mm512_sub_epi64(__A, __B),
884314564Sdim                                             (__v8di)__W);
885288943Sdim}
886288943Sdim
887341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
888314564Sdim_mm512_maskz_sub_epi64(__mmask8 __U, __m512i __A, __m512i __B)
889288943Sdim{
890314564Sdim  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
891314564Sdim                                             (__v8di)_mm512_sub_epi64(__A, __B),
892314564Sdim                                             (__v8di)_mm512_setzero_si512());
893288943Sdim}
894288943Sdim
895341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
896288943Sdim_mm512_add_epi32 (__m512i __A, __m512i __B)
897288943Sdim{
898309124Sdim  return (__m512i) ((__v16su) __A + (__v16su) __B);
899288943Sdim}
900288943Sdim
901341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
902314564Sdim_mm512_mask_add_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
903288943Sdim{
904314564Sdim  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
905314564Sdim                                             (__v16si)_mm512_add_epi32(__A, __B),
906314564Sdim                                             (__v16si)__W);
907288943Sdim}
908288943Sdim
909341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
910288943Sdim_mm512_maskz_add_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
911288943Sdim{
912314564Sdim  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
913314564Sdim                                             (__v16si)_mm512_add_epi32(__A, __B),
914314564Sdim                                             (__v16si)_mm512_setzero_si512());
915288943Sdim}
916288943Sdim
917341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
918288943Sdim_mm512_sub_epi32 (__m512i __A, __m512i __B)
919288943Sdim{
920309124Sdim  return (__m512i) ((__v16su) __A - (__v16su) __B);
921288943Sdim}
922288943Sdim
923341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
924314564Sdim_mm512_mask_sub_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
925288943Sdim{
926314564Sdim  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
927314564Sdim                                             (__v16si)_mm512_sub_epi32(__A, __B),
928314564Sdim                                             (__v16si)__W);
929288943Sdim}
930288943Sdim
931341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
932314564Sdim_mm512_maskz_sub_epi32(__mmask16 __U, __m512i __A, __m512i __B)
933288943Sdim{
934314564Sdim  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
935314564Sdim                                             (__v16si)_mm512_sub_epi32(__A, __B),
936314564Sdim                                             (__v16si)_mm512_setzero_si512());
937288943Sdim}
938288943Sdim
939341825Sdim#define _mm512_max_round_pd(A, B, R) \
940341825Sdim  (__m512d)__builtin_ia32_maxpd512((__v8df)(__m512d)(A), \
941341825Sdim                                   (__v8df)(__m512d)(B), (int)(R))
942309124Sdim
943341825Sdim#define _mm512_mask_max_round_pd(W, U, A, B, R) \
944341825Sdim  (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
945341825Sdim                                   (__v8df)_mm512_max_round_pd((A), (B), (R)), \
946341825Sdim                                   (__v8df)(W))
947309124Sdim
948341825Sdim#define _mm512_maskz_max_round_pd(U, A, B, R) \
949341825Sdim  (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
950341825Sdim                                   (__v8df)_mm512_max_round_pd((A), (B), (R)), \
951341825Sdim                                   (__v8df)_mm512_setzero_pd())
952309124Sdim
953341825Sdimstatic  __inline__ __m512d __DEFAULT_FN_ATTRS512
954277325Sdim_mm512_max_pd(__m512d __A, __m512d __B)
955277325Sdim{
956341825Sdim  return (__m512d) __builtin_ia32_maxpd512((__v8df) __A, (__v8df) __B,
957341825Sdim                                           _MM_FROUND_CUR_DIRECTION);
958277325Sdim}
959277325Sdim
960341825Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS512
961309124Sdim_mm512_mask_max_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
962309124Sdim{
963341825Sdim  return (__m512d)__builtin_ia32_selectpd_512(__U,
964341825Sdim                                              (__v8df)_mm512_max_pd(__A, __B),
965341825Sdim                                              (__v8df)__W);
966309124Sdim}
967309124Sdim
968341825Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS512
969309124Sdim_mm512_maskz_max_pd (__mmask8 __U, __m512d __A, __m512d __B)
970309124Sdim{
971341825Sdim  return (__m512d)__builtin_ia32_selectpd_512(__U,
972341825Sdim                                              (__v8df)_mm512_max_pd(__A, __B),
973341825Sdim                                              (__v8df)_mm512_setzero_pd());
974309124Sdim}
975309124Sdim
976341825Sdim#define _mm512_max_round_ps(A, B, R) \
977341825Sdim  (__m512)__builtin_ia32_maxps512((__v16sf)(__m512)(A), \
978341825Sdim                                  (__v16sf)(__m512)(B), (int)(R))
979309124Sdim
980341825Sdim#define _mm512_mask_max_round_ps(W, U, A, B, R) \
981341825Sdim  (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
982341825Sdim                                  (__v16sf)_mm512_max_round_ps((A), (B), (R)), \
983341825Sdim                                  (__v16sf)(W))
984309124Sdim
985341825Sdim#define _mm512_maskz_max_round_ps(U, A, B, R) \
986341825Sdim  (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
987341825Sdim                                  (__v16sf)_mm512_max_round_ps((A), (B), (R)), \
988341825Sdim                                  (__v16sf)_mm512_setzero_ps())
989309124Sdim
990341825Sdimstatic  __inline__ __m512 __DEFAULT_FN_ATTRS512
991277325Sdim_mm512_max_ps(__m512 __A, __m512 __B)
992277325Sdim{
993341825Sdim  return (__m512) __builtin_ia32_maxps512((__v16sf) __A, (__v16sf) __B,
994341825Sdim                                          _MM_FROUND_CUR_DIRECTION);
995277325Sdim}
996277325Sdim
997341825Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS512
998309124Sdim_mm512_mask_max_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
999309124Sdim{
1000341825Sdim  return (__m512)__builtin_ia32_selectps_512(__U,
1001341825Sdim                                             (__v16sf)_mm512_max_ps(__A, __B),
1002341825Sdim                                             (__v16sf)__W);
1003309124Sdim}
1004309124Sdim
1005341825Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS512
1006309124Sdim_mm512_maskz_max_ps (__mmask16 __U, __m512 __A, __m512 __B)
1007309124Sdim{
1008341825Sdim  return (__m512)__builtin_ia32_selectps_512(__U,
1009341825Sdim                                             (__v16sf)_mm512_max_ps(__A, __B),
1010341825Sdim                                             (__v16sf)_mm512_setzero_ps());
1011309124Sdim}
1012309124Sdim
1013341825Sdimstatic __inline__ __m128 __DEFAULT_FN_ATTRS128
1014296417Sdim_mm_mask_max_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
1015309124Sdim  return (__m128) __builtin_ia32_maxss_round_mask ((__v4sf) __A,
1016296417Sdim                (__v4sf) __B,
1017296417Sdim                (__v4sf) __W,
1018296417Sdim                (__mmask8) __U,
1019296417Sdim                _MM_FROUND_CUR_DIRECTION);
1020296417Sdim}
1021296417Sdim
1022341825Sdimstatic __inline__ __m128 __DEFAULT_FN_ATTRS128
1023296417Sdim_mm_maskz_max_ss(__mmask8 __U,__m128 __A, __m128 __B) {
1024309124Sdim  return (__m128) __builtin_ia32_maxss_round_mask ((__v4sf) __A,
1025296417Sdim                (__v4sf) __B,
1026296417Sdim                (__v4sf)  _mm_setzero_ps (),
1027296417Sdim                (__mmask8) __U,
1028296417Sdim                _MM_FROUND_CUR_DIRECTION);
1029296417Sdim}
1030296417Sdim
1031341825Sdim#define _mm_max_round_ss(A, B, R) \
1032309124Sdim  (__m128)__builtin_ia32_maxss_round_mask((__v4sf)(__m128)(A), \
1033309124Sdim                                          (__v4sf)(__m128)(B), \
1034309124Sdim                                          (__v4sf)_mm_setzero_ps(), \
1035341825Sdim                                          (__mmask8)-1, (int)(R))
1036296417Sdim
1037341825Sdim#define _mm_mask_max_round_ss(W, U, A, B, R) \
1038309124Sdim  (__m128)__builtin_ia32_maxss_round_mask((__v4sf)(__m128)(A), \
1039309124Sdim                                          (__v4sf)(__m128)(B), \
1040309124Sdim                                          (__v4sf)(__m128)(W), (__mmask8)(U), \
1041341825Sdim                                          (int)(R))
1042296417Sdim
1043341825Sdim#define _mm_maskz_max_round_ss(U, A, B, R) \
1044309124Sdim  (__m128)__builtin_ia32_maxss_round_mask((__v4sf)(__m128)(A), \
1045309124Sdim                                          (__v4sf)(__m128)(B), \
1046309124Sdim                                          (__v4sf)_mm_setzero_ps(), \
1047341825Sdim                                          (__mmask8)(U), (int)(R))
1048296417Sdim
1049341825Sdimstatic __inline__ __m128d __DEFAULT_FN_ATTRS128
1050296417Sdim_mm_mask_max_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
1051309124Sdim  return (__m128d) __builtin_ia32_maxsd_round_mask ((__v2df) __A,
1052296417Sdim                (__v2df) __B,
1053296417Sdim                (__v2df) __W,
1054296417Sdim                (__mmask8) __U,
1055296417Sdim                _MM_FROUND_CUR_DIRECTION);
1056296417Sdim}
1057296417Sdim
1058341825Sdimstatic __inline__ __m128d __DEFAULT_FN_ATTRS128
1059296417Sdim_mm_maskz_max_sd(__mmask8 __U,__m128d __A, __m128d __B) {
1060309124Sdim  return (__m128d) __builtin_ia32_maxsd_round_mask ((__v2df) __A,
1061296417Sdim                (__v2df) __B,
1062296417Sdim                (__v2df)  _mm_setzero_pd (),
1063296417Sdim                (__mmask8) __U,
1064296417Sdim                _MM_FROUND_CUR_DIRECTION);
1065296417Sdim}
1066296417Sdim
1067341825Sdim#define _mm_max_round_sd(A, B, R) \
1068309124Sdim  (__m128d)__builtin_ia32_maxsd_round_mask((__v2df)(__m128d)(A), \
1069309124Sdim                                           (__v2df)(__m128d)(B), \
1070309124Sdim                                           (__v2df)_mm_setzero_pd(), \
1071341825Sdim                                           (__mmask8)-1, (int)(R))
1072296417Sdim
1073341825Sdim#define _mm_mask_max_round_sd(W, U, A, B, R) \
1074309124Sdim  (__m128d)__builtin_ia32_maxsd_round_mask((__v2df)(__m128d)(A), \
1075309124Sdim                                           (__v2df)(__m128d)(B), \
1076309124Sdim                                           (__v2df)(__m128d)(W), \
1077341825Sdim                                           (__mmask8)(U), (int)(R))
1078296417Sdim
1079341825Sdim#define _mm_maskz_max_round_sd(U, A, B, R) \
1080309124Sdim  (__m128d)__builtin_ia32_maxsd_round_mask((__v2df)(__m128d)(A), \
1081309124Sdim                                           (__v2df)(__m128d)(B), \
1082309124Sdim                                           (__v2df)_mm_setzero_pd(), \
1083341825Sdim                                           (__mmask8)(U), (int)(R))
1084296417Sdim
1085277325Sdimstatic __inline __m512i
1086341825Sdim__DEFAULT_FN_ATTRS512
1087277325Sdim_mm512_max_epi32(__m512i __A, __m512i __B)
1088277325Sdim{
1089341825Sdim  return (__m512i)__builtin_ia32_pmaxsd512((__v16si)__A, (__v16si)__B);
1090277325Sdim}
1091277325Sdim
1092341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
1093309124Sdim_mm512_mask_max_epi32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
1094309124Sdim{
1095341825Sdim  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1096341825Sdim                                            (__v16si)_mm512_max_epi32(__A, __B),
1097341825Sdim                                            (__v16si)__W);
1098309124Sdim}
1099309124Sdim
1100341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
1101309124Sdim_mm512_maskz_max_epi32 (__mmask16 __M, __m512i __A, __m512i __B)
1102309124Sdim{
1103341825Sdim  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1104341825Sdim                                            (__v16si)_mm512_max_epi32(__A, __B),
1105341825Sdim                                            (__v16si)_mm512_setzero_si512());
1106309124Sdim}
1107309124Sdim
1108341825Sdimstatic __inline __m512i __DEFAULT_FN_ATTRS512
1109277325Sdim_mm512_max_epu32(__m512i __A, __m512i __B)
1110277325Sdim{
1111341825Sdim  return (__m512i)__builtin_ia32_pmaxud512((__v16si)__A, (__v16si)__B);
1112277325Sdim}
1113277325Sdim
1114341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
1115309124Sdim_mm512_mask_max_epu32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
1116309124Sdim{
1117341825Sdim  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1118341825Sdim                                            (__v16si)_mm512_max_epu32(__A, __B),
1119341825Sdim                                            (__v16si)__W);
1120309124Sdim}
1121309124Sdim
1122341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
1123309124Sdim_mm512_maskz_max_epu32 (__mmask16 __M, __m512i __A, __m512i __B)
1124309124Sdim{
1125341825Sdim  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1126341825Sdim                                            (__v16si)_mm512_max_epu32(__A, __B),
1127341825Sdim                                            (__v16si)_mm512_setzero_si512());
1128309124Sdim}
1129309124Sdim
1130341825Sdimstatic __inline __m512i __DEFAULT_FN_ATTRS512
1131277325Sdim_mm512_max_epi64(__m512i __A, __m512i __B)
1132277325Sdim{
1133341825Sdim  return (__m512i)__builtin_ia32_pmaxsq512((__v8di)__A, (__v8di)__B);
1134277325Sdim}
1135277325Sdim
1136341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
1137309124Sdim_mm512_mask_max_epi64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
1138309124Sdim{
1139341825Sdim  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1140341825Sdim                                             (__v8di)_mm512_max_epi64(__A, __B),
1141341825Sdim                                             (__v8di)__W);
1142309124Sdim}
1143309124Sdim
1144341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
1145309124Sdim_mm512_maskz_max_epi64 (__mmask8 __M, __m512i __A, __m512i __B)
1146309124Sdim{
1147341825Sdim  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1148341825Sdim                                             (__v8di)_mm512_max_epi64(__A, __B),
1149341825Sdim                                             (__v8di)_mm512_setzero_si512());
1150309124Sdim}
1151309124Sdim
1152341825Sdimstatic __inline __m512i __DEFAULT_FN_ATTRS512
1153277325Sdim_mm512_max_epu64(__m512i __A, __m512i __B)
1154277325Sdim{
1155341825Sdim  return (__m512i)__builtin_ia32_pmaxuq512((__v8di)__A, (__v8di)__B);
1156277325Sdim}
1157277325Sdim
1158341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
1159309124Sdim_mm512_mask_max_epu64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
1160309124Sdim{
1161341825Sdim  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1162341825Sdim                                             (__v8di)_mm512_max_epu64(__A, __B),
1163341825Sdim                                             (__v8di)__W);
1164309124Sdim}
1165309124Sdim
1166341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
1167309124Sdim_mm512_maskz_max_epu64 (__mmask8 __M, __m512i __A, __m512i __B)
1168309124Sdim{
1169341825Sdim  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1170341825Sdim                                             (__v8di)_mm512_max_epu64(__A, __B),
1171341825Sdim                                             (__v8di)_mm512_setzero_si512());
1172309124Sdim}
1173309124Sdim
1174341825Sdim#define _mm512_min_round_pd(A, B, R) \
1175341825Sdim  (__m512d)__builtin_ia32_minpd512((__v8df)(__m512d)(A), \
1176341825Sdim                                   (__v8df)(__m512d)(B), (int)(R))
1177309124Sdim
1178341825Sdim#define _mm512_mask_min_round_pd(W, U, A, B, R) \
1179341825Sdim  (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
1180341825Sdim                                   (__v8df)_mm512_min_round_pd((A), (B), (R)), \
1181341825Sdim                                   (__v8df)(W))
1182309124Sdim
1183341825Sdim#define _mm512_maskz_min_round_pd(U, A, B, R) \
1184341825Sdim  (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
1185341825Sdim                                   (__v8df)_mm512_min_round_pd((A), (B), (R)), \
1186341825Sdim                                   (__v8df)_mm512_setzero_pd())
1187309124Sdim
1188341825Sdimstatic  __inline__ __m512d __DEFAULT_FN_ATTRS512
1189277325Sdim_mm512_min_pd(__m512d __A, __m512d __B)
1190277325Sdim{
1191341825Sdim  return (__m512d) __builtin_ia32_minpd512((__v8df) __A, (__v8df) __B,
1192341825Sdim                                           _MM_FROUND_CUR_DIRECTION);
1193277325Sdim}
1194277325Sdim
1195341825Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS512
1196309124Sdim_mm512_mask_min_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
1197309124Sdim{
1198341825Sdim  return (__m512d)__builtin_ia32_selectpd_512(__U,
1199341825Sdim                                              (__v8df)_mm512_min_pd(__A, __B),
1200341825Sdim                                              (__v8df)__W);
1201309124Sdim}
1202309124Sdim
1203341825Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS512
1204309124Sdim_mm512_maskz_min_pd (__mmask8 __U, __m512d __A, __m512d __B)
1205309124Sdim{
1206341825Sdim  return (__m512d)__builtin_ia32_selectpd_512(__U,
1207341825Sdim                                              (__v8df)_mm512_min_pd(__A, __B),
1208341825Sdim                                              (__v8df)_mm512_setzero_pd());
1209309124Sdim}
1210309124Sdim
1211341825Sdim#define _mm512_min_round_ps(A, B, R) \
1212341825Sdim  (__m512)__builtin_ia32_minps512((__v16sf)(__m512)(A), \
1213341825Sdim                                  (__v16sf)(__m512)(B), (int)(R))
1214341825Sdim
1215341825Sdim#define _mm512_mask_min_round_ps(W, U, A, B, R) \
1216341825Sdim  (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
1217341825Sdim                                  (__v16sf)_mm512_min_round_ps((A), (B), (R)), \
1218341825Sdim                                  (__v16sf)(W))
1219341825Sdim
1220341825Sdim#define _mm512_maskz_min_round_ps(U, A, B, R) \
1221341825Sdim  (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
1222341825Sdim                                  (__v16sf)_mm512_min_round_ps((A), (B), (R)), \
1223341825Sdim                                  (__v16sf)_mm512_setzero_ps())
1224341825Sdim
1225341825Sdimstatic  __inline__ __m512 __DEFAULT_FN_ATTRS512
1226277325Sdim_mm512_min_ps(__m512 __A, __m512 __B)
1227277325Sdim{
1228341825Sdim  return (__m512) __builtin_ia32_minps512((__v16sf) __A, (__v16sf) __B,
1229341825Sdim                                          _MM_FROUND_CUR_DIRECTION);
1230277325Sdim}
1231277325Sdim
1232341825Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS512
1233309124Sdim_mm512_mask_min_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
1234309124Sdim{
1235341825Sdim  return (__m512)__builtin_ia32_selectps_512(__U,
1236341825Sdim                                             (__v16sf)_mm512_min_ps(__A, __B),
1237341825Sdim                                             (__v16sf)__W);
1238309124Sdim}
1239309124Sdim
1240341825Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS512
1241309124Sdim_mm512_maskz_min_ps (__mmask16 __U, __m512 __A, __m512 __B)
1242309124Sdim{
1243341825Sdim  return (__m512)__builtin_ia32_selectps_512(__U,
1244341825Sdim                                             (__v16sf)_mm512_min_ps(__A, __B),
1245341825Sdim                                             (__v16sf)_mm512_setzero_ps());
1246309124Sdim}
1247309124Sdim
1248341825Sdimstatic __inline__ __m128 __DEFAULT_FN_ATTRS128
1249296417Sdim_mm_mask_min_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
1250309124Sdim  return (__m128) __builtin_ia32_minss_round_mask ((__v4sf) __A,
1251296417Sdim                (__v4sf) __B,
1252296417Sdim                (__v4sf) __W,
1253296417Sdim                (__mmask8) __U,
1254296417Sdim                _MM_FROUND_CUR_DIRECTION);
1255296417Sdim}
1256296417Sdim
1257341825Sdimstatic __inline__ __m128 __DEFAULT_FN_ATTRS128
1258296417Sdim_mm_maskz_min_ss(__mmask8 __U,__m128 __A, __m128 __B) {
1259309124Sdim  return (__m128) __builtin_ia32_minss_round_mask ((__v4sf) __A,
1260296417Sdim                (__v4sf) __B,
1261296417Sdim                (__v4sf)  _mm_setzero_ps (),
1262296417Sdim                (__mmask8) __U,
1263296417Sdim                _MM_FROUND_CUR_DIRECTION);
1264296417Sdim}
1265296417Sdim
1266341825Sdim#define _mm_min_round_ss(A, B, R) \
1267309124Sdim  (__m128)__builtin_ia32_minss_round_mask((__v4sf)(__m128)(A), \
1268309124Sdim                                          (__v4sf)(__m128)(B), \
1269309124Sdim                                          (__v4sf)_mm_setzero_ps(), \
1270341825Sdim                                          (__mmask8)-1, (int)(R))
1271296417Sdim
1272341825Sdim#define _mm_mask_min_round_ss(W, U, A, B, R) \
1273309124Sdim  (__m128)__builtin_ia32_minss_round_mask((__v4sf)(__m128)(A), \
1274309124Sdim                                          (__v4sf)(__m128)(B), \
1275309124Sdim                                          (__v4sf)(__m128)(W), (__mmask8)(U), \
1276341825Sdim                                          (int)(R))
1277296417Sdim
1278341825Sdim#define _mm_maskz_min_round_ss(U, A, B, R) \
1279309124Sdim  (__m128)__builtin_ia32_minss_round_mask((__v4sf)(__m128)(A), \
1280309124Sdim                                          (__v4sf)(__m128)(B), \
1281309124Sdim                                          (__v4sf)_mm_setzero_ps(), \
1282341825Sdim                                          (__mmask8)(U), (int)(R))
1283296417Sdim
1284341825Sdimstatic __inline__ __m128d __DEFAULT_FN_ATTRS128
1285296417Sdim_mm_mask_min_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
1286309124Sdim  return (__m128d) __builtin_ia32_minsd_round_mask ((__v2df) __A,
1287296417Sdim                (__v2df) __B,
1288296417Sdim                (__v2df) __W,
1289296417Sdim                (__mmask8) __U,
1290296417Sdim                _MM_FROUND_CUR_DIRECTION);
1291296417Sdim}
1292296417Sdim
1293341825Sdimstatic __inline__ __m128d __DEFAULT_FN_ATTRS128
1294296417Sdim_mm_maskz_min_sd(__mmask8 __U,__m128d __A, __m128d __B) {
1295309124Sdim  return (__m128d) __builtin_ia32_minsd_round_mask ((__v2df) __A,
1296296417Sdim                (__v2df) __B,
1297296417Sdim                (__v2df)  _mm_setzero_pd (),
1298296417Sdim                (__mmask8) __U,
1299296417Sdim                _MM_FROUND_CUR_DIRECTION);
1300296417Sdim}
1301296417Sdim
1302341825Sdim#define _mm_min_round_sd(A, B, R) \
1303309124Sdim  (__m128d)__builtin_ia32_minsd_round_mask((__v2df)(__m128d)(A), \
1304309124Sdim                                           (__v2df)(__m128d)(B), \
1305309124Sdim                                           (__v2df)_mm_setzero_pd(), \
1306341825Sdim                                           (__mmask8)-1, (int)(R))
1307296417Sdim
1308341825Sdim#define _mm_mask_min_round_sd(W, U, A, B, R) \
1309309124Sdim  (__m128d)__builtin_ia32_minsd_round_mask((__v2df)(__m128d)(A), \
1310309124Sdim                                           (__v2df)(__m128d)(B), \
1311309124Sdim                                           (__v2df)(__m128d)(W), \
1312341825Sdim                                           (__mmask8)(U), (int)(R))
1313296417Sdim
1314341825Sdim#define _mm_maskz_min_round_sd(U, A, B, R) \
1315309124Sdim  (__m128d)__builtin_ia32_minsd_round_mask((__v2df)(__m128d)(A), \
1316309124Sdim                                           (__v2df)(__m128d)(B), \
1317309124Sdim                                           (__v2df)_mm_setzero_pd(), \
1318341825Sdim                                           (__mmask8)(U), (int)(R))
1319296417Sdim
1320277325Sdimstatic __inline __m512i
1321341825Sdim__DEFAULT_FN_ATTRS512
1322277325Sdim_mm512_min_epi32(__m512i __A, __m512i __B)
1323277325Sdim{
1324341825Sdim  return (__m512i)__builtin_ia32_pminsd512((__v16si)__A, (__v16si)__B);
1325277325Sdim}
1326277325Sdim
1327341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
1328309124Sdim_mm512_mask_min_epi32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
1329309124Sdim{
1330341825Sdim  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1331341825Sdim                                            (__v16si)_mm512_min_epi32(__A, __B),
1332341825Sdim                                            (__v16si)__W);
1333309124Sdim}
1334309124Sdim
1335341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
1336309124Sdim_mm512_maskz_min_epi32 (__mmask16 __M, __m512i __A, __m512i __B)
1337309124Sdim{
1338341825Sdim  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1339341825Sdim                                            (__v16si)_mm512_min_epi32(__A, __B),
1340341825Sdim                                            (__v16si)_mm512_setzero_si512());
1341309124Sdim}
1342309124Sdim
1343341825Sdimstatic __inline __m512i __DEFAULT_FN_ATTRS512
1344277325Sdim_mm512_min_epu32(__m512i __A, __m512i __B)
1345277325Sdim{
1346341825Sdim  return (__m512i)__builtin_ia32_pminud512((__v16si)__A, (__v16si)__B);
1347277325Sdim}
1348277325Sdim
1349341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
1350309124Sdim_mm512_mask_min_epu32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
1351309124Sdim{
1352341825Sdim  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1353341825Sdim                                            (__v16si)_mm512_min_epu32(__A, __B),
1354341825Sdim                                            (__v16si)__W);
1355309124Sdim}
1356309124Sdim
1357341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
1358309124Sdim_mm512_maskz_min_epu32 (__mmask16 __M, __m512i __A, __m512i __B)
1359309124Sdim{
1360341825Sdim  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1361341825Sdim                                            (__v16si)_mm512_min_epu32(__A, __B),
1362341825Sdim                                            (__v16si)_mm512_setzero_si512());
1363309124Sdim}
1364309124Sdim
1365341825Sdimstatic __inline __m512i __DEFAULT_FN_ATTRS512
1366277325Sdim_mm512_min_epi64(__m512i __A, __m512i __B)
1367277325Sdim{
1368341825Sdim  return (__m512i)__builtin_ia32_pminsq512((__v8di)__A, (__v8di)__B);
1369277325Sdim}
1370277325Sdim
1371341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
1372309124Sdim_mm512_mask_min_epi64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
1373309124Sdim{
1374341825Sdim  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1375341825Sdim                                             (__v8di)_mm512_min_epi64(__A, __B),
1376341825Sdim                                             (__v8di)__W);
1377309124Sdim}
1378309124Sdim
1379341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
1380309124Sdim_mm512_maskz_min_epi64 (__mmask8 __M, __m512i __A, __m512i __B)
1381309124Sdim{
1382341825Sdim  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1383341825Sdim                                             (__v8di)_mm512_min_epi64(__A, __B),
1384341825Sdim                                             (__v8di)_mm512_setzero_si512());
1385309124Sdim}
1386309124Sdim
1387341825Sdimstatic __inline __m512i __DEFAULT_FN_ATTRS512
1388277325Sdim_mm512_min_epu64(__m512i __A, __m512i __B)
1389277325Sdim{
1390341825Sdim  return (__m512i)__builtin_ia32_pminuq512((__v8di)__A, (__v8di)__B);
1391277325Sdim}
1392277325Sdim
1393341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
1394309124Sdim_mm512_mask_min_epu64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
1395309124Sdim{
1396341825Sdim  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1397341825Sdim                                             (__v8di)_mm512_min_epu64(__A, __B),
1398341825Sdim                                             (__v8di)__W);
1399309124Sdim}
1400309124Sdim
1401341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
1402309124Sdim_mm512_maskz_min_epu64 (__mmask8 __M, __m512i __A, __m512i __B)
1403309124Sdim{
1404341825Sdim  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1405341825Sdim                                             (__v8di)_mm512_min_epu64(__A, __B),
1406341825Sdim                                             (__v8di)_mm512_setzero_si512());
1407309124Sdim}
1408309124Sdim
1409341825Sdimstatic __inline __m512i __DEFAULT_FN_ATTRS512
1410277325Sdim_mm512_mul_epi32(__m512i __X, __m512i __Y)
1411277325Sdim{
1412314564Sdim  return (__m512i)__builtin_ia32_pmuldq512((__v16si)__X, (__v16si) __Y);
1413277325Sdim}
1414277325Sdim
1415341825Sdimstatic __inline __m512i __DEFAULT_FN_ATTRS512
1416314564Sdim_mm512_mask_mul_epi32(__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y)
1417288943Sdim{
1418314564Sdim  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1419314564Sdim                                             (__v8di)_mm512_mul_epi32(__X, __Y),
1420314564Sdim                                             (__v8di)__W);
1421288943Sdim}
1422288943Sdim
1423341825Sdimstatic __inline __m512i __DEFAULT_FN_ATTRS512
1424314564Sdim_mm512_maskz_mul_epi32(__mmask8 __M, __m512i __X, __m512i __Y)
1425288943Sdim{
1426314564Sdim  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1427314564Sdim                                             (__v8di)_mm512_mul_epi32(__X, __Y),
1428314564Sdim                                             (__v8di)_mm512_setzero_si512 ());
1429288943Sdim}
1430288943Sdim
1431341825Sdimstatic __inline __m512i __DEFAULT_FN_ATTRS512
1432277325Sdim_mm512_mul_epu32(__m512i __X, __m512i __Y)
1433277325Sdim{
1434314564Sdim  return (__m512i)__builtin_ia32_pmuludq512((__v16si)__X, (__v16si)__Y);
1435277325Sdim}
1436277325Sdim
1437341825Sdimstatic __inline __m512i __DEFAULT_FN_ATTRS512
1438314564Sdim_mm512_mask_mul_epu32(__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y)
1439288943Sdim{
1440314564Sdim  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1441314564Sdim                                             (__v8di)_mm512_mul_epu32(__X, __Y),
1442314564Sdim                                             (__v8di)__W);
1443288943Sdim}
1444288943Sdim
1445341825Sdimstatic __inline __m512i __DEFAULT_FN_ATTRS512
1446314564Sdim_mm512_maskz_mul_epu32(__mmask8 __M, __m512i __X, __m512i __Y)
1447288943Sdim{
1448314564Sdim  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1449314564Sdim                                             (__v8di)_mm512_mul_epu32(__X, __Y),
1450314564Sdim                                             (__v8di)_mm512_setzero_si512 ());
1451288943Sdim}
1452288943Sdim
1453341825Sdimstatic __inline __m512i __DEFAULT_FN_ATTRS512
1454288943Sdim_mm512_mullo_epi32 (__m512i __A, __m512i __B)
1455288943Sdim{
1456309124Sdim  return (__m512i) ((__v16su) __A * (__v16su) __B);
1457288943Sdim}
1458288943Sdim
1459341825Sdimstatic __inline __m512i __DEFAULT_FN_ATTRS512
1460314564Sdim_mm512_maskz_mullo_epi32(__mmask16 __M, __m512i __A, __m512i __B)
1461288943Sdim{
1462314564Sdim  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1463314564Sdim                                             (__v16si)_mm512_mullo_epi32(__A, __B),
1464314564Sdim                                             (__v16si)_mm512_setzero_si512());
1465288943Sdim}
1466288943Sdim
1467341825Sdimstatic __inline __m512i __DEFAULT_FN_ATTRS512
1468314564Sdim_mm512_mask_mullo_epi32(__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
1469288943Sdim{
1470314564Sdim  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1471314564Sdim                                             (__v16si)_mm512_mullo_epi32(__A, __B),
1472314564Sdim                                             (__v16si)__W);
1473288943Sdim}
1474288943Sdim
1475341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
1476341825Sdim_mm512_mullox_epi64 (__m512i __A, __m512i __B) {
1477341825Sdim  return (__m512i) ((__v8du) __A * (__v8du) __B);
1478341825Sdim}
1479309124Sdim
1480341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
1481341825Sdim_mm512_mask_mullox_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) {
1482341825Sdim  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
1483341825Sdim                                             (__v8di)_mm512_mullox_epi64(__A, __B),
1484341825Sdim                                             (__v8di)__W);
1485341825Sdim}
1486309124Sdim
1487341825Sdim#define _mm512_sqrt_round_pd(A, R) \
1488341825Sdim  (__m512d)__builtin_ia32_sqrtpd512((__v8df)(__m512d)(A), (int)(R))
1489309124Sdim
1490341825Sdim#define _mm512_mask_sqrt_round_pd(W, U, A, R) \
1491341825Sdim  (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
1492341825Sdim                                       (__v8df)_mm512_sqrt_round_pd((A), (R)), \
1493341825Sdim                                       (__v8df)(__m512d)(W))
1494341825Sdim
1495341825Sdim#define _mm512_maskz_sqrt_round_pd(U, A, R) \
1496341825Sdim  (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
1497341825Sdim                                       (__v8df)_mm512_sqrt_round_pd((A), (R)), \
1498341825Sdim                                       (__v8df)_mm512_setzero_pd())
1499341825Sdim
1500341825Sdimstatic  __inline__ __m512d __DEFAULT_FN_ATTRS512
1501341825Sdim_mm512_sqrt_pd(__m512d __A)
1502277325Sdim{
1503341825Sdim  return (__m512d)__builtin_ia32_sqrtpd512((__v8df)__A,
1504341825Sdim                                           _MM_FROUND_CUR_DIRECTION);
1505277325Sdim}
1506277325Sdim
1507341825Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS512
1508309124Sdim_mm512_mask_sqrt_pd (__m512d __W, __mmask8 __U, __m512d __A)
1509309124Sdim{
1510341825Sdim  return (__m512d)__builtin_ia32_selectpd_512(__U,
1511341825Sdim                                              (__v8df)_mm512_sqrt_pd(__A),
1512341825Sdim                                              (__v8df)__W);
1513309124Sdim}
1514309124Sdim
1515341825Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS512
1516309124Sdim_mm512_maskz_sqrt_pd (__mmask8 __U, __m512d __A)
1517309124Sdim{
1518341825Sdim  return (__m512d)__builtin_ia32_selectpd_512(__U,
1519341825Sdim                                              (__v8df)_mm512_sqrt_pd(__A),
1520341825Sdim                                              (__v8df)_mm512_setzero_pd());
1521309124Sdim}
1522309124Sdim
1523341825Sdim#define _mm512_sqrt_round_ps(A, R) \
1524341825Sdim  (__m512)__builtin_ia32_sqrtps512((__v16sf)(__m512)(A), (int)(R))
1525309124Sdim
1526341825Sdim#define _mm512_mask_sqrt_round_ps(W, U, A, R) \
1527341825Sdim  (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
1528341825Sdim                                      (__v16sf)_mm512_sqrt_round_ps((A), (R)), \
1529341825Sdim                                      (__v16sf)(__m512)(W))
1530309124Sdim
1531341825Sdim#define _mm512_maskz_sqrt_round_ps(U, A, R) \
1532341825Sdim  (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
1533341825Sdim                                      (__v16sf)_mm512_sqrt_round_ps((A), (R)), \
1534341825Sdim                                      (__v16sf)_mm512_setzero_ps())
1535309124Sdim
1536341825Sdimstatic  __inline__ __m512 __DEFAULT_FN_ATTRS512
1537341825Sdim_mm512_sqrt_ps(__m512 __A)
1538277325Sdim{
1539341825Sdim  return (__m512)__builtin_ia32_sqrtps512((__v16sf)__A,
1540341825Sdim                                          _MM_FROUND_CUR_DIRECTION);
1541277325Sdim}
1542277325Sdim
1543341825Sdimstatic  __inline__ __m512 __DEFAULT_FN_ATTRS512
1544309124Sdim_mm512_mask_sqrt_ps(__m512 __W, __mmask16 __U, __m512 __A)
1545309124Sdim{
1546341825Sdim  return (__m512)__builtin_ia32_selectps_512(__U,
1547341825Sdim                                             (__v16sf)_mm512_sqrt_ps(__A),
1548341825Sdim                                             (__v16sf)__W);
1549309124Sdim}
1550309124Sdim
1551341825Sdimstatic  __inline__ __m512 __DEFAULT_FN_ATTRS512
1552309124Sdim_mm512_maskz_sqrt_ps( __mmask16 __U, __m512 __A)
1553309124Sdim{
1554341825Sdim  return (__m512)__builtin_ia32_selectps_512(__U,
1555341825Sdim                                             (__v16sf)_mm512_sqrt_ps(__A),
1556341825Sdim                                             (__v16sf)_mm512_setzero_ps());
1557309124Sdim}
1558309124Sdim
1559341825Sdimstatic  __inline__ __m512d __DEFAULT_FN_ATTRS512
1560277325Sdim_mm512_rsqrt14_pd(__m512d __A)
1561277325Sdim{
1562277325Sdim  return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
1563277325Sdim                 (__v8df)
1564277325Sdim                 _mm512_setzero_pd (),
1565277325Sdim                 (__mmask8) -1);}
1566277325Sdim
1567341825Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS512
1568309124Sdim_mm512_mask_rsqrt14_pd (__m512d __W, __mmask8 __U, __m512d __A)
1569309124Sdim{
1570309124Sdim  return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
1571309124Sdim                  (__v8df) __W,
1572309124Sdim                  (__mmask8) __U);
1573309124Sdim}
1574309124Sdim
1575341825Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS512
1576309124Sdim_mm512_maskz_rsqrt14_pd (__mmask8 __U, __m512d __A)
1577309124Sdim{
1578309124Sdim  return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
1579309124Sdim                  (__v8df)
1580309124Sdim                  _mm512_setzero_pd (),
1581309124Sdim                  (__mmask8) __U);
1582309124Sdim}
1583309124Sdim
1584341825Sdimstatic  __inline__ __m512 __DEFAULT_FN_ATTRS512
1585277325Sdim_mm512_rsqrt14_ps(__m512 __A)
1586277325Sdim{
1587277325Sdim  return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
1588277325Sdim                (__v16sf)
1589277325Sdim                _mm512_setzero_ps (),
1590277325Sdim                (__mmask16) -1);
1591277325Sdim}
1592277325Sdim
1593341825Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS512
1594309124Sdim_mm512_mask_rsqrt14_ps (__m512 __W, __mmask16 __U, __m512 __A)
1595309124Sdim{
1596309124Sdim  return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
1597309124Sdim                 (__v16sf) __W,
1598309124Sdim                 (__mmask16) __U);
1599309124Sdim}
1600309124Sdim
1601341825Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS512
1602309124Sdim_mm512_maskz_rsqrt14_ps (__mmask16 __U, __m512 __A)
1603309124Sdim{
1604309124Sdim  return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
1605309124Sdim                 (__v16sf)
1606309124Sdim                 _mm512_setzero_ps (),
1607309124Sdim                 (__mmask16) __U);
1608309124Sdim}
1609309124Sdim
1610341825Sdimstatic  __inline__ __m128 __DEFAULT_FN_ATTRS128
1611277325Sdim_mm_rsqrt14_ss(__m128 __A, __m128 __B)
1612277325Sdim{
1613309124Sdim  return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __A,
1614277325Sdim             (__v4sf) __B,
1615277325Sdim             (__v4sf)
1616277325Sdim             _mm_setzero_ps (),
1617277325Sdim             (__mmask8) -1);
1618277325Sdim}
1619277325Sdim
1620341825Sdimstatic __inline__ __m128 __DEFAULT_FN_ATTRS128
1621309124Sdim_mm_mask_rsqrt14_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
1622309124Sdim{
1623309124Sdim return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __A,
1624309124Sdim          (__v4sf) __B,
1625309124Sdim          (__v4sf) __W,
1626309124Sdim          (__mmask8) __U);
1627309124Sdim}
1628309124Sdim
1629341825Sdimstatic __inline__ __m128 __DEFAULT_FN_ATTRS128
1630309124Sdim_mm_maskz_rsqrt14_ss (__mmask8 __U, __m128 __A, __m128 __B)
1631309124Sdim{
1632309124Sdim return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __A,
1633309124Sdim          (__v4sf) __B,
1634309124Sdim          (__v4sf) _mm_setzero_ps (),
1635309124Sdim          (__mmask8) __U);
1636309124Sdim}
1637309124Sdim
1638341825Sdimstatic  __inline__ __m128d __DEFAULT_FN_ATTRS128
1639277325Sdim_mm_rsqrt14_sd(__m128d __A, __m128d __B)
1640277325Sdim{
1641309124Sdim  return (__m128d) __builtin_ia32_rsqrt14sd_mask ((__v2df) __A,
1642277325Sdim              (__v2df) __B,
1643277325Sdim              (__v2df)
1644277325Sdim              _mm_setzero_pd (),
1645277325Sdim              (__mmask8) -1);
1646277325Sdim}
1647277325Sdim
1648341825Sdimstatic __inline__ __m128d __DEFAULT_FN_ATTRS128
1649309124Sdim_mm_mask_rsqrt14_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
1650309124Sdim{
1651309124Sdim return (__m128d) __builtin_ia32_rsqrt14sd_mask ( (__v2df) __A,
1652309124Sdim          (__v2df) __B,
1653309124Sdim          (__v2df) __W,
1654309124Sdim          (__mmask8) __U);
1655309124Sdim}
1656309124Sdim
1657341825Sdimstatic __inline__ __m128d __DEFAULT_FN_ATTRS128
1658309124Sdim_mm_maskz_rsqrt14_sd (__mmask8 __U, __m128d __A, __m128d __B)
1659309124Sdim{
1660309124Sdim return (__m128d) __builtin_ia32_rsqrt14sd_mask ( (__v2df) __A,
1661309124Sdim          (__v2df) __B,
1662309124Sdim          (__v2df) _mm_setzero_pd (),
1663309124Sdim          (__mmask8) __U);
1664309124Sdim}
1665309124Sdim
1666341825Sdimstatic  __inline__ __m512d __DEFAULT_FN_ATTRS512
1667277325Sdim_mm512_rcp14_pd(__m512d __A)
1668277325Sdim{
1669277325Sdim  return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
1670277325Sdim               (__v8df)
1671277325Sdim               _mm512_setzero_pd (),
1672277325Sdim               (__mmask8) -1);
1673277325Sdim}
1674277325Sdim
1675341825Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS512
1676309124Sdim_mm512_mask_rcp14_pd (__m512d __W, __mmask8 __U, __m512d __A)
1677309124Sdim{
1678309124Sdim  return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
1679309124Sdim                (__v8df) __W,
1680309124Sdim                (__mmask8) __U);
1681309124Sdim}
1682309124Sdim
1683341825Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS512
1684309124Sdim_mm512_maskz_rcp14_pd (__mmask8 __U, __m512d __A)
1685309124Sdim{
1686309124Sdim  return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
1687309124Sdim                (__v8df)
1688309124Sdim                _mm512_setzero_pd (),
1689309124Sdim                (__mmask8) __U);
1690309124Sdim}
1691309124Sdim
1692341825Sdimstatic  __inline__ __m512 __DEFAULT_FN_ATTRS512
1693277325Sdim_mm512_rcp14_ps(__m512 __A)
1694277325Sdim{
1695277325Sdim  return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
1696277325Sdim              (__v16sf)
1697277325Sdim              _mm512_setzero_ps (),
1698277325Sdim              (__mmask16) -1);
1699277325Sdim}
1700309124Sdim
1701341825Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS512
1702309124Sdim_mm512_mask_rcp14_ps (__m512 __W, __mmask16 __U, __m512 __A)
1703309124Sdim{
1704309124Sdim  return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
1705309124Sdim                   (__v16sf) __W,
1706309124Sdim                   (__mmask16) __U);
1707309124Sdim}
1708309124Sdim
1709341825Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS512
1710309124Sdim_mm512_maskz_rcp14_ps (__mmask16 __U, __m512 __A)
1711309124Sdim{
1712309124Sdim  return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
1713309124Sdim                   (__v16sf)
1714309124Sdim                   _mm512_setzero_ps (),
1715309124Sdim                   (__mmask16) __U);
1716309124Sdim}
1717309124Sdim
1718341825Sdimstatic  __inline__ __m128 __DEFAULT_FN_ATTRS128
1719277325Sdim_mm_rcp14_ss(__m128 __A, __m128 __B)
1720277325Sdim{
1721309124Sdim  return (__m128) __builtin_ia32_rcp14ss_mask ((__v4sf) __A,
1722277325Sdim                 (__v4sf) __B,
1723277325Sdim                 (__v4sf)
1724277325Sdim                 _mm_setzero_ps (),
1725277325Sdim                 (__mmask8) -1);
1726277325Sdim}
1727277325Sdim
1728341825Sdimstatic __inline__ __m128 __DEFAULT_FN_ATTRS128
1729309124Sdim_mm_mask_rcp14_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
1730309124Sdim{
1731309124Sdim return (__m128) __builtin_ia32_rcp14ss_mask ((__v4sf) __A,
1732309124Sdim          (__v4sf) __B,
1733309124Sdim          (__v4sf) __W,
1734309124Sdim          (__mmask8) __U);
1735309124Sdim}
1736309124Sdim
1737341825Sdimstatic __inline__ __m128 __DEFAULT_FN_ATTRS128
1738309124Sdim_mm_maskz_rcp14_ss (__mmask8 __U, __m128 __A, __m128 __B)
1739309124Sdim{
1740309124Sdim return (__m128) __builtin_ia32_rcp14ss_mask ((__v4sf) __A,
1741309124Sdim          (__v4sf) __B,
1742309124Sdim          (__v4sf) _mm_setzero_ps (),
1743309124Sdim          (__mmask8) __U);
1744309124Sdim}
1745309124Sdim
1746341825Sdimstatic  __inline__ __m128d __DEFAULT_FN_ATTRS128
1747277325Sdim_mm_rcp14_sd(__m128d __A, __m128d __B)
1748277325Sdim{
1749309124Sdim  return (__m128d) __builtin_ia32_rcp14sd_mask ((__v2df) __A,
1750277325Sdim            (__v2df) __B,
1751277325Sdim            (__v2df)
1752277325Sdim            _mm_setzero_pd (),
1753277325Sdim            (__mmask8) -1);
1754277325Sdim}
1755277325Sdim
1756341825Sdimstatic __inline__ __m128d __DEFAULT_FN_ATTRS128
1757309124Sdim_mm_mask_rcp14_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
1758309124Sdim{
1759309124Sdim return (__m128d) __builtin_ia32_rcp14sd_mask ( (__v2df) __A,
1760309124Sdim          (__v2df) __B,
1761309124Sdim          (__v2df) __W,
1762309124Sdim          (__mmask8) __U);
1763309124Sdim}
1764309124Sdim
1765341825Sdimstatic __inline__ __m128d __DEFAULT_FN_ATTRS128
1766309124Sdim_mm_maskz_rcp14_sd (__mmask8 __U, __m128d __A, __m128d __B)
1767309124Sdim{
1768309124Sdim return (__m128d) __builtin_ia32_rcp14sd_mask ( (__v2df) __A,
1769309124Sdim          (__v2df) __B,
1770309124Sdim          (__v2df) _mm_setzero_pd (),
1771309124Sdim          (__mmask8) __U);
1772309124Sdim}
1773309124Sdim
1774341825Sdimstatic __inline __m512 __DEFAULT_FN_ATTRS512
1775277325Sdim_mm512_floor_ps(__m512 __A)
1776277325Sdim{
1777277325Sdim  return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
1778277325Sdim                                                  _MM_FROUND_FLOOR,
1779277325Sdim                                                  (__v16sf) __A, -1,
1780277325Sdim                                                  _MM_FROUND_CUR_DIRECTION);
1781277325Sdim}
1782277325Sdim
1783341825Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS512
1784309124Sdim_mm512_mask_floor_ps (__m512 __W, __mmask16 __U, __m512 __A)
1785309124Sdim{
1786309124Sdim  return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
1787309124Sdim                   _MM_FROUND_FLOOR,
1788309124Sdim                   (__v16sf) __W, __U,
1789309124Sdim                   _MM_FROUND_CUR_DIRECTION);
1790309124Sdim}
1791309124Sdim
1792341825Sdimstatic __inline __m512d __DEFAULT_FN_ATTRS512
1793277325Sdim_mm512_floor_pd(__m512d __A)
1794277325Sdim{
1795277325Sdim  return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
1796277325Sdim                                                   _MM_FROUND_FLOOR,
1797277325Sdim                                                   (__v8df) __A, -1,
1798277325Sdim                                                   _MM_FROUND_CUR_DIRECTION);
1799277325Sdim}
1800277325Sdim
1801341825Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS512
1802309124Sdim_mm512_mask_floor_pd (__m512d __W, __mmask8 __U, __m512d __A)
1803309124Sdim{
1804309124Sdim  return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
1805309124Sdim                _MM_FROUND_FLOOR,
1806309124Sdim                (__v8df) __W, __U,
1807309124Sdim                _MM_FROUND_CUR_DIRECTION);
1808309124Sdim}
1809309124Sdim
1810341825Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS512
1811309124Sdim_mm512_mask_ceil_ps (__m512 __W, __mmask16 __U, __m512 __A)
1812309124Sdim{
1813309124Sdim  return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
1814309124Sdim                   _MM_FROUND_CEIL,
1815309124Sdim                   (__v16sf) __W, __U,
1816309124Sdim                   _MM_FROUND_CUR_DIRECTION);
1817309124Sdim}
1818309124Sdim
1819341825Sdimstatic __inline __m512 __DEFAULT_FN_ATTRS512
1820277325Sdim_mm512_ceil_ps(__m512 __A)
1821277325Sdim{
1822277325Sdim  return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
1823277325Sdim                                                  _MM_FROUND_CEIL,
1824277325Sdim                                                  (__v16sf) __A, -1,
1825277325Sdim                                                  _MM_FROUND_CUR_DIRECTION);
1826277325Sdim}
1827277325Sdim
1828341825Sdimstatic __inline __m512d __DEFAULT_FN_ATTRS512
1829277325Sdim_mm512_ceil_pd(__m512d __A)
1830277325Sdim{
1831277325Sdim  return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
1832277325Sdim                                                   _MM_FROUND_CEIL,
1833277325Sdim                                                   (__v8df) __A, -1,
1834277325Sdim                                                   _MM_FROUND_CUR_DIRECTION);
1835277325Sdim}
1836277325Sdim
1837341825Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS512
1838309124Sdim_mm512_mask_ceil_pd (__m512d __W, __mmask8 __U, __m512d __A)
1839309124Sdim{
1840309124Sdim  return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
1841309124Sdim                _MM_FROUND_CEIL,
1842309124Sdim                (__v8df) __W, __U,
1843309124Sdim                _MM_FROUND_CUR_DIRECTION);
1844309124Sdim}
1845309124Sdim
1846341825Sdimstatic __inline __m512i __DEFAULT_FN_ATTRS512
1847277325Sdim_mm512_abs_epi64(__m512i __A)
1848277325Sdim{
1849341825Sdim  return (__m512i)__builtin_ia32_pabsq512((__v8di)__A);
1850277325Sdim}
1851277325Sdim
1852341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
1853309124Sdim_mm512_mask_abs_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
1854309124Sdim{
1855341825Sdim  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
1856341825Sdim                                             (__v8di)_mm512_abs_epi64(__A),
1857341825Sdim                                             (__v8di)__W);
1858309124Sdim}
1859309124Sdim
1860341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
1861309124Sdim_mm512_maskz_abs_epi64 (__mmask8 __U, __m512i __A)
1862309124Sdim{
1863341825Sdim  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
1864341825Sdim                                             (__v8di)_mm512_abs_epi64(__A),
1865341825Sdim                                             (__v8di)_mm512_setzero_si512());
1866309124Sdim}
1867309124Sdim
1868341825Sdimstatic __inline __m512i __DEFAULT_FN_ATTRS512
1869277325Sdim_mm512_abs_epi32(__m512i __A)
1870277325Sdim{
1871341825Sdim  return (__m512i)__builtin_ia32_pabsd512((__v16si) __A);
1872277325Sdim}
1873277325Sdim
1874341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
1875309124Sdim_mm512_mask_abs_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
1876309124Sdim{
1877341825Sdim  return (__m512i)__builtin_ia32_selectd_512(__U,
1878341825Sdim                                             (__v16si)_mm512_abs_epi32(__A),
1879341825Sdim                                             (__v16si)__W);
1880309124Sdim}
1881309124Sdim
1882341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
1883309124Sdim_mm512_maskz_abs_epi32 (__mmask16 __U, __m512i __A)
1884309124Sdim{
1885341825Sdim  return (__m512i)__builtin_ia32_selectd_512(__U,
1886341825Sdim                                             (__v16si)_mm512_abs_epi32(__A),
1887341825Sdim                                             (__v16si)_mm512_setzero_si512());
1888309124Sdim}
1889309124Sdim
1890341825Sdimstatic __inline__ __m128 __DEFAULT_FN_ATTRS128
1891296417Sdim_mm_mask_add_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
1892341825Sdim  __A = _mm_add_ss(__A, __B);
1893341825Sdim  return __builtin_ia32_selectss_128(__U, __A, __W);
1894296417Sdim}
1895296417Sdim
1896341825Sdimstatic __inline__ __m128 __DEFAULT_FN_ATTRS128
1897296417Sdim_mm_maskz_add_ss(__mmask8 __U,__m128 __A, __m128 __B) {
1898341825Sdim  __A = _mm_add_ss(__A, __B);
1899341825Sdim  return __builtin_ia32_selectss_128(__U, __A, _mm_setzero_ps());
1900296417Sdim}
1901296417Sdim
1902341825Sdim#define _mm_add_round_ss(A, B, R) \
1903309124Sdim  (__m128)__builtin_ia32_addss_round_mask((__v4sf)(__m128)(A), \
1904309124Sdim                                          (__v4sf)(__m128)(B), \
1905309124Sdim                                          (__v4sf)_mm_setzero_ps(), \
1906341825Sdim                                          (__mmask8)-1, (int)(R))
1907296417Sdim
1908341825Sdim#define _mm_mask_add_round_ss(W, U, A, B, R) \
1909309124Sdim  (__m128)__builtin_ia32_addss_round_mask((__v4sf)(__m128)(A), \
1910309124Sdim                                          (__v4sf)(__m128)(B), \
1911309124Sdim                                          (__v4sf)(__m128)(W), (__mmask8)(U), \
1912341825Sdim                                          (int)(R))
1913296417Sdim
1914341825Sdim#define _mm_maskz_add_round_ss(U, A, B, R) \
1915309124Sdim  (__m128)__builtin_ia32_addss_round_mask((__v4sf)(__m128)(A), \
1916309124Sdim                                          (__v4sf)(__m128)(B), \
1917309124Sdim                                          (__v4sf)_mm_setzero_ps(), \
1918341825Sdim                                          (__mmask8)(U), (int)(R))
1919296417Sdim
1920341825Sdimstatic __inline__ __m128d __DEFAULT_FN_ATTRS128
1921296417Sdim_mm_mask_add_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
1922341825Sdim  __A = _mm_add_sd(__A, __B);
1923341825Sdim  return __builtin_ia32_selectsd_128(__U, __A, __W);
1924296417Sdim}
1925296417Sdim
1926341825Sdimstatic __inline__ __m128d __DEFAULT_FN_ATTRS128
1927296417Sdim_mm_maskz_add_sd(__mmask8 __U,__m128d __A, __m128d __B) {
1928341825Sdim  __A = _mm_add_sd(__A, __B);
1929341825Sdim  return __builtin_ia32_selectsd_128(__U, __A, _mm_setzero_pd());
1930296417Sdim}
1931341825Sdim#define _mm_add_round_sd(A, B, R) \
1932309124Sdim  (__m128d)__builtin_ia32_addsd_round_mask((__v2df)(__m128d)(A), \
1933309124Sdim                                           (__v2df)(__m128d)(B), \
1934309124Sdim                                           (__v2df)_mm_setzero_pd(), \
1935341825Sdim                                           (__mmask8)-1, (int)(R))
1936296417Sdim
1937341825Sdim#define _mm_mask_add_round_sd(W, U, A, B, R) \
1938309124Sdim  (__m128d)__builtin_ia32_addsd_round_mask((__v2df)(__m128d)(A), \
1939309124Sdim                                           (__v2df)(__m128d)(B), \
1940309124Sdim                                           (__v2df)(__m128d)(W), \
1941341825Sdim                                           (__mmask8)(U), (int)(R))
1942296417Sdim
1943341825Sdim#define _mm_maskz_add_round_sd(U, A, B, R) \
1944309124Sdim  (__m128d)__builtin_ia32_addsd_round_mask((__v2df)(__m128d)(A), \
1945309124Sdim                                           (__v2df)(__m128d)(B), \
1946309124Sdim                                           (__v2df)_mm_setzero_pd(), \
1947341825Sdim                                           (__mmask8)(U), (int)(R))
1948296417Sdim
1949341825Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS512
1950296417Sdim_mm512_mask_add_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
1951314564Sdim  return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
1952314564Sdim                                              (__v8df)_mm512_add_pd(__A, __B),
1953314564Sdim                                              (__v8df)__W);
1954296417Sdim}
1955296417Sdim
1956341825Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS512
1957296417Sdim_mm512_maskz_add_pd(__mmask8 __U, __m512d __A, __m512d __B) {
1958314564Sdim  return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
1959314564Sdim                                              (__v8df)_mm512_add_pd(__A, __B),
1960314564Sdim                                              (__v8df)_mm512_setzero_pd());
1961296417Sdim}
1962296417Sdim
1963341825Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS512
1964296417Sdim_mm512_mask_add_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
1965314564Sdim  return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
1966314564Sdim                                             (__v16sf)_mm512_add_ps(__A, __B),
1967314564Sdim                                             (__v16sf)__W);
1968296417Sdim}
1969296417Sdim
1970341825Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS512
1971296417Sdim_mm512_maskz_add_ps(__mmask16 __U, __m512 __A, __m512 __B) {
1972314564Sdim  return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
1973314564Sdim                                             (__v16sf)_mm512_add_ps(__A, __B),
1974314564Sdim                                             (__v16sf)_mm512_setzero_ps());
1975296417Sdim}
1976296417Sdim
1977341825Sdim#define _mm512_add_round_pd(A, B, R) \
1978341825Sdim  (__m512d)__builtin_ia32_addpd512((__v8df)(__m512d)(A), \
1979341825Sdim                                   (__v8df)(__m512d)(B), (int)(R))
1980296417Sdim
1981341825Sdim#define _mm512_mask_add_round_pd(W, U, A, B, R) \
1982341825Sdim  (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
1983341825Sdim                                   (__v8df)_mm512_add_round_pd((A), (B), (R)), \
1984353358Sdim                                   (__v8df)(__m512d)(W))
1985296417Sdim
1986341825Sdim#define _mm512_maskz_add_round_pd(U, A, B, R) \
1987341825Sdim  (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
1988341825Sdim                                   (__v8df)_mm512_add_round_pd((A), (B), (R)), \
1989353358Sdim                                   (__v8df)_mm512_setzero_pd())
1990296417Sdim
1991341825Sdim#define _mm512_add_round_ps(A, B, R) \
1992341825Sdim  (__m512)__builtin_ia32_addps512((__v16sf)(__m512)(A), \
1993341825Sdim                                  (__v16sf)(__m512)(B), (int)(R))
1994296417Sdim
1995341825Sdim#define _mm512_mask_add_round_ps(W, U, A, B, R) \
1996341825Sdim  (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
1997341825Sdim                                  (__v16sf)_mm512_add_round_ps((A), (B), (R)), \
1998353358Sdim                                  (__v16sf)(__m512)(W))
1999296417Sdim
2000341825Sdim#define _mm512_maskz_add_round_ps(U, A, B, R) \
2001341825Sdim  (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
2002341825Sdim                                  (__v16sf)_mm512_add_round_ps((A), (B), (R)), \
2003353358Sdim                                  (__v16sf)_mm512_setzero_ps())
2004296417Sdim
2005341825Sdimstatic __inline__ __m128 __DEFAULT_FN_ATTRS128
2006296417Sdim_mm_mask_sub_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
2007341825Sdim  __A = _mm_sub_ss(__A, __B);
2008341825Sdim  return __builtin_ia32_selectss_128(__U, __A, __W);
2009296417Sdim}
2010296417Sdim
2011341825Sdimstatic __inline__ __m128 __DEFAULT_FN_ATTRS128
2012296417Sdim_mm_maskz_sub_ss(__mmask8 __U,__m128 __A, __m128 __B) {
2013341825Sdim  __A = _mm_sub_ss(__A, __B);
2014341825Sdim  return __builtin_ia32_selectss_128(__U, __A, _mm_setzero_ps());
2015296417Sdim}
2016341825Sdim#define _mm_sub_round_ss(A, B, R) \
2017309124Sdim  (__m128)__builtin_ia32_subss_round_mask((__v4sf)(__m128)(A), \
2018309124Sdim                                          (__v4sf)(__m128)(B), \
2019309124Sdim                                          (__v4sf)_mm_setzero_ps(), \
2020341825Sdim                                          (__mmask8)-1, (int)(R))
2021296417Sdim
2022341825Sdim#define _mm_mask_sub_round_ss(W, U, A, B, R) \
2023309124Sdim  (__m128)__builtin_ia32_subss_round_mask((__v4sf)(__m128)(A), \
2024309124Sdim                                          (__v4sf)(__m128)(B), \
2025309124Sdim                                          (__v4sf)(__m128)(W), (__mmask8)(U), \
2026341825Sdim                                          (int)(R))
2027296417Sdim
2028341825Sdim#define _mm_maskz_sub_round_ss(U, A, B, R) \
2029309124Sdim  (__m128)__builtin_ia32_subss_round_mask((__v4sf)(__m128)(A), \
2030309124Sdim                                          (__v4sf)(__m128)(B), \
2031309124Sdim                                          (__v4sf)_mm_setzero_ps(), \
2032341825Sdim                                          (__mmask8)(U), (int)(R))
2033296417Sdim
2034341825Sdimstatic __inline__ __m128d __DEFAULT_FN_ATTRS128
2035296417Sdim_mm_mask_sub_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
2036341825Sdim  __A = _mm_sub_sd(__A, __B);
2037341825Sdim  return __builtin_ia32_selectsd_128(__U, __A, __W);
2038296417Sdim}
2039296417Sdim
2040341825Sdimstatic __inline__ __m128d __DEFAULT_FN_ATTRS128
2041296417Sdim_mm_maskz_sub_sd(__mmask8 __U,__m128d __A, __m128d __B) {
2042341825Sdim  __A = _mm_sub_sd(__A, __B);
2043341825Sdim  return __builtin_ia32_selectsd_128(__U, __A, _mm_setzero_pd());
2044296417Sdim}
2045296417Sdim
2046341825Sdim#define _mm_sub_round_sd(A, B, R) \
2047309124Sdim  (__m128d)__builtin_ia32_subsd_round_mask((__v2df)(__m128d)(A), \
2048309124Sdim                                           (__v2df)(__m128d)(B), \
2049309124Sdim                                           (__v2df)_mm_setzero_pd(), \
2050341825Sdim                                           (__mmask8)-1, (int)(R))
2051296417Sdim
2052341825Sdim#define _mm_mask_sub_round_sd(W, U, A, B, R) \
2053309124Sdim  (__m128d)__builtin_ia32_subsd_round_mask((__v2df)(__m128d)(A), \
2054309124Sdim                                           (__v2df)(__m128d)(B), \
2055309124Sdim                                           (__v2df)(__m128d)(W), \
2056341825Sdim                                           (__mmask8)(U), (int)(R))
2057296417Sdim
2058341825Sdim#define _mm_maskz_sub_round_sd(U, A, B, R) \
2059309124Sdim  (__m128d)__builtin_ia32_subsd_round_mask((__v2df)(__m128d)(A), \
2060309124Sdim                                           (__v2df)(__m128d)(B), \
2061309124Sdim                                           (__v2df)_mm_setzero_pd(), \
2062341825Sdim                                           (__mmask8)(U), (int)(R))
2063296417Sdim
2064341825Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS512
2065296417Sdim_mm512_mask_sub_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
2066314564Sdim  return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
2067314564Sdim                                              (__v8df)_mm512_sub_pd(__A, __B),
2068314564Sdim                                              (__v8df)__W);
2069296417Sdim}
2070296417Sdim
2071341825Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS512
2072296417Sdim_mm512_maskz_sub_pd(__mmask8 __U, __m512d __A, __m512d __B) {
2073314564Sdim  return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
2074314564Sdim                                              (__v8df)_mm512_sub_pd(__A, __B),
2075314564Sdim                                              (__v8df)_mm512_setzero_pd());
2076296417Sdim}
2077296417Sdim
2078341825Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS512
2079296417Sdim_mm512_mask_sub_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
2080314564Sdim  return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
2081314564Sdim                                             (__v16sf)_mm512_sub_ps(__A, __B),
2082314564Sdim                                             (__v16sf)__W);
2083296417Sdim}
2084296417Sdim
2085341825Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS512
2086296417Sdim_mm512_maskz_sub_ps(__mmask16 __U, __m512 __A, __m512 __B) {
2087314564Sdim  return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
2088314564Sdim                                             (__v16sf)_mm512_sub_ps(__A, __B),
2089314564Sdim                                             (__v16sf)_mm512_setzero_ps());
2090296417Sdim}
2091296417Sdim
2092341825Sdim#define _mm512_sub_round_pd(A, B, R) \
2093341825Sdim  (__m512d)__builtin_ia32_subpd512((__v8df)(__m512d)(A), \
2094341825Sdim                                   (__v8df)(__m512d)(B), (int)(R))
2095296417Sdim
2096341825Sdim#define _mm512_mask_sub_round_pd(W, U, A, B, R) \
2097341825Sdim  (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
2098341825Sdim                                   (__v8df)_mm512_sub_round_pd((A), (B), (R)), \
2099353358Sdim                                   (__v8df)(__m512d)(W))
2100296417Sdim
2101341825Sdim#define _mm512_maskz_sub_round_pd(U, A, B, R) \
2102341825Sdim  (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
2103341825Sdim                                   (__v8df)_mm512_sub_round_pd((A), (B), (R)), \
2104353358Sdim                                   (__v8df)_mm512_setzero_pd())
2105296417Sdim
2106341825Sdim#define _mm512_sub_round_ps(A, B, R) \
2107341825Sdim  (__m512)__builtin_ia32_subps512((__v16sf)(__m512)(A), \
2108341825Sdim                                  (__v16sf)(__m512)(B), (int)(R))
2109296417Sdim
2110341825Sdim#define _mm512_mask_sub_round_ps(W, U, A, B, R) \
2111341825Sdim  (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
2112341825Sdim                                  (__v16sf)_mm512_sub_round_ps((A), (B), (R)), \
2113353358Sdim                                  (__v16sf)(__m512)(W))
2114296417Sdim
2115341825Sdim#define _mm512_maskz_sub_round_ps(U, A, B, R) \
2116341825Sdim  (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
2117341825Sdim                                  (__v16sf)_mm512_sub_round_ps((A), (B), (R)), \
2118353358Sdim                                  (__v16sf)_mm512_setzero_ps())
2119296417Sdim
2120341825Sdimstatic __inline__ __m128 __DEFAULT_FN_ATTRS128
2121296417Sdim_mm_mask_mul_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
2122341825Sdim  __A = _mm_mul_ss(__A, __B);
2123341825Sdim  return __builtin_ia32_selectss_128(__U, __A, __W);
2124296417Sdim}
2125296417Sdim
2126341825Sdimstatic __inline__ __m128 __DEFAULT_FN_ATTRS128
2127296417Sdim_mm_maskz_mul_ss(__mmask8 __U,__m128 __A, __m128 __B) {
2128341825Sdim  __A = _mm_mul_ss(__A, __B);
2129341825Sdim  return __builtin_ia32_selectss_128(__U, __A, _mm_setzero_ps());
2130296417Sdim}
2131341825Sdim#define _mm_mul_round_ss(A, B, R) \
2132309124Sdim  (__m128)__builtin_ia32_mulss_round_mask((__v4sf)(__m128)(A), \
2133309124Sdim                                          (__v4sf)(__m128)(B), \
2134309124Sdim                                          (__v4sf)_mm_setzero_ps(), \
2135341825Sdim                                          (__mmask8)-1, (int)(R))
2136296417Sdim
2137341825Sdim#define _mm_mask_mul_round_ss(W, U, A, B, R) \
2138309124Sdim  (__m128)__builtin_ia32_mulss_round_mask((__v4sf)(__m128)(A), \
2139309124Sdim                                          (__v4sf)(__m128)(B), \
2140309124Sdim                                          (__v4sf)(__m128)(W), (__mmask8)(U), \
2141341825Sdim                                          (int)(R))
2142296417Sdim
2143341825Sdim#define _mm_maskz_mul_round_ss(U, A, B, R) \
2144309124Sdim  (__m128)__builtin_ia32_mulss_round_mask((__v4sf)(__m128)(A), \
2145309124Sdim                                          (__v4sf)(__m128)(B), \
2146309124Sdim                                          (__v4sf)_mm_setzero_ps(), \
2147341825Sdim                                          (__mmask8)(U), (int)(R))
2148296417Sdim
2149341825Sdimstatic __inline__ __m128d __DEFAULT_FN_ATTRS128
2150296417Sdim_mm_mask_mul_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
2151341825Sdim  __A = _mm_mul_sd(__A, __B);
2152341825Sdim  return __builtin_ia32_selectsd_128(__U, __A, __W);
2153296417Sdim}
2154296417Sdim
2155341825Sdimstatic __inline__ __m128d __DEFAULT_FN_ATTRS128
2156296417Sdim_mm_maskz_mul_sd(__mmask8 __U,__m128d __A, __m128d __B) {
2157341825Sdim  __A = _mm_mul_sd(__A, __B);
2158341825Sdim  return __builtin_ia32_selectsd_128(__U, __A, _mm_setzero_pd());
2159296417Sdim}
2160296417Sdim
2161341825Sdim#define _mm_mul_round_sd(A, B, R) \
2162309124Sdim  (__m128d)__builtin_ia32_mulsd_round_mask((__v2df)(__m128d)(A), \
2163309124Sdim                                           (__v2df)(__m128d)(B), \
2164309124Sdim                                           (__v2df)_mm_setzero_pd(), \
2165341825Sdim                                           (__mmask8)-1, (int)(R))
2166296417Sdim
2167341825Sdim#define _mm_mask_mul_round_sd(W, U, A, B, R) \
2168309124Sdim  (__m128d)__builtin_ia32_mulsd_round_mask((__v2df)(__m128d)(A), \
2169309124Sdim                                           (__v2df)(__m128d)(B), \
2170309124Sdim                                           (__v2df)(__m128d)(W), \
2171341825Sdim                                           (__mmask8)(U), (int)(R))
2172296417Sdim
2173341825Sdim#define _mm_maskz_mul_round_sd(U, A, B, R) \
2174309124Sdim  (__m128d)__builtin_ia32_mulsd_round_mask((__v2df)(__m128d)(A), \
2175309124Sdim                                           (__v2df)(__m128d)(B), \
2176309124Sdim                                           (__v2df)_mm_setzero_pd(), \
2177341825Sdim                                           (__mmask8)(U), (int)(R))
2178296417Sdim
2179341825Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS512
2180296417Sdim_mm512_mask_mul_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
2181314564Sdim  return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
2182314564Sdim                                              (__v8df)_mm512_mul_pd(__A, __B),
2183314564Sdim                                              (__v8df)__W);
2184296417Sdim}
2185296417Sdim
2186341825Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS512
2187296417Sdim_mm512_maskz_mul_pd(__mmask8 __U, __m512d __A, __m512d __B) {
2188314564Sdim  return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
2189314564Sdim                                              (__v8df)_mm512_mul_pd(__A, __B),
2190314564Sdim                                              (__v8df)_mm512_setzero_pd());
2191296417Sdim}
2192296417Sdim
2193341825Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS512
2194296417Sdim_mm512_mask_mul_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
2195314564Sdim  return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
2196314564Sdim                                             (__v16sf)_mm512_mul_ps(__A, __B),
2197314564Sdim                                             (__v16sf)__W);
2198296417Sdim}
2199296417Sdim
2200341825Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS512
2201296417Sdim_mm512_maskz_mul_ps(__mmask16 __U, __m512 __A, __m512 __B) {
2202314564Sdim  return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
2203314564Sdim                                             (__v16sf)_mm512_mul_ps(__A, __B),
2204314564Sdim                                             (__v16sf)_mm512_setzero_ps());
2205296417Sdim}
2206296417Sdim
2207341825Sdim#define _mm512_mul_round_pd(A, B, R) \
2208341825Sdim  (__m512d)__builtin_ia32_mulpd512((__v8df)(__m512d)(A), \
2209341825Sdim                                   (__v8df)(__m512d)(B), (int)(R))
2210296417Sdim
2211341825Sdim#define _mm512_mask_mul_round_pd(W, U, A, B, R) \
2212341825Sdim  (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
2213341825Sdim                                   (__v8df)_mm512_mul_round_pd((A), (B), (R)), \
2214353358Sdim                                   (__v8df)(__m512d)(W))
2215296417Sdim
2216341825Sdim#define _mm512_maskz_mul_round_pd(U, A, B, R) \
2217341825Sdim  (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
2218341825Sdim                                   (__v8df)_mm512_mul_round_pd((A), (B), (R)), \
2219353358Sdim                                   (__v8df)_mm512_setzero_pd())
2220296417Sdim
2221341825Sdim#define _mm512_mul_round_ps(A, B, R) \
2222341825Sdim  (__m512)__builtin_ia32_mulps512((__v16sf)(__m512)(A), \
2223341825Sdim                                  (__v16sf)(__m512)(B), (int)(R))
2224296417Sdim
2225341825Sdim#define _mm512_mask_mul_round_ps(W, U, A, B, R) \
2226341825Sdim  (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
2227341825Sdim                                  (__v16sf)_mm512_mul_round_ps((A), (B), (R)), \
2228353358Sdim                                  (__v16sf)(__m512)(W))
2229296417Sdim
2230341825Sdim#define _mm512_maskz_mul_round_ps(U, A, B, R) \
2231341825Sdim  (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
2232341825Sdim                                  (__v16sf)_mm512_mul_round_ps((A), (B), (R)), \
2233353358Sdim                                  (__v16sf)_mm512_setzero_ps())
2234296417Sdim
2235341825Sdimstatic __inline__ __m128 __DEFAULT_FN_ATTRS128
2236296417Sdim_mm_mask_div_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
2237341825Sdim  __A = _mm_div_ss(__A, __B);
2238341825Sdim  return __builtin_ia32_selectss_128(__U, __A, __W);
2239296417Sdim}
2240296417Sdim
2241341825Sdimstatic __inline__ __m128 __DEFAULT_FN_ATTRS128
2242296417Sdim_mm_maskz_div_ss(__mmask8 __U,__m128 __A, __m128 __B) {
2243341825Sdim  __A = _mm_div_ss(__A, __B);
2244341825Sdim  return __builtin_ia32_selectss_128(__U, __A, _mm_setzero_ps());
2245296417Sdim}
2246296417Sdim
2247341825Sdim#define _mm_div_round_ss(A, B, R) \
2248309124Sdim  (__m128)__builtin_ia32_divss_round_mask((__v4sf)(__m128)(A), \
2249309124Sdim                                          (__v4sf)(__m128)(B), \
2250309124Sdim                                          (__v4sf)_mm_setzero_ps(), \
2251341825Sdim                                          (__mmask8)-1, (int)(R))
2252296417Sdim
2253341825Sdim#define _mm_mask_div_round_ss(W, U, A, B, R) \
2254309124Sdim  (__m128)__builtin_ia32_divss_round_mask((__v4sf)(__m128)(A), \
2255309124Sdim                                          (__v4sf)(__m128)(B), \
2256309124Sdim                                          (__v4sf)(__m128)(W), (__mmask8)(U), \
2257341825Sdim                                          (int)(R))
2258296417Sdim
2259341825Sdim#define _mm_maskz_div_round_ss(U, A, B, R) \
2260309124Sdim  (__m128)__builtin_ia32_divss_round_mask((__v4sf)(__m128)(A), \
2261309124Sdim                                          (__v4sf)(__m128)(B), \
2262309124Sdim                                          (__v4sf)_mm_setzero_ps(), \
2263341825Sdim                                          (__mmask8)(U), (int)(R))
2264296417Sdim
2265341825Sdimstatic __inline__ __m128d __DEFAULT_FN_ATTRS128
2266296417Sdim_mm_mask_div_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
2267341825Sdim  __A = _mm_div_sd(__A, __B);
2268341825Sdim  return __builtin_ia32_selectsd_128(__U, __A, __W);
2269296417Sdim}
2270296417Sdim
2271341825Sdimstatic __inline__ __m128d __DEFAULT_FN_ATTRS128
2272296417Sdim_mm_maskz_div_sd(__mmask8 __U,__m128d __A, __m128d __B) {
2273341825Sdim  __A = _mm_div_sd(__A, __B);
2274341825Sdim  return __builtin_ia32_selectsd_128(__U, __A, _mm_setzero_pd());
2275296417Sdim}
2276296417Sdim
2277341825Sdim#define _mm_div_round_sd(A, B, R) \
2278309124Sdim  (__m128d)__builtin_ia32_divsd_round_mask((__v2df)(__m128d)(A), \
2279309124Sdim                                           (__v2df)(__m128d)(B), \
2280309124Sdim                                           (__v2df)_mm_setzero_pd(), \
2281341825Sdim                                           (__mmask8)-1, (int)(R))
2282296417Sdim
2283341825Sdim#define _mm_mask_div_round_sd(W, U, A, B, R) \
2284309124Sdim  (__m128d)__builtin_ia32_divsd_round_mask((__v2df)(__m128d)(A), \
2285309124Sdim                                           (__v2df)(__m128d)(B), \
2286309124Sdim                                           (__v2df)(__m128d)(W), \
2287341825Sdim                                           (__mmask8)(U), (int)(R))
2288296417Sdim
2289341825Sdim#define _mm_maskz_div_round_sd(U, A, B, R) \
2290309124Sdim  (__m128d)__builtin_ia32_divsd_round_mask((__v2df)(__m128d)(A), \
2291309124Sdim                                           (__v2df)(__m128d)(B), \
2292309124Sdim                                           (__v2df)_mm_setzero_pd(), \
2293341825Sdim                                           (__mmask8)(U), (int)(R))
2294296417Sdim
2295341825Sdimstatic __inline __m512d __DEFAULT_FN_ATTRS512
2296309124Sdim_mm512_div_pd(__m512d __a, __m512d __b)
2297309124Sdim{
2298309124Sdim  return (__m512d)((__v8df)__a/(__v8df)__b);
2299309124Sdim}
2300309124Sdim
2301341825Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS512
2302296417Sdim_mm512_mask_div_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
2303314564Sdim  return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
2304314564Sdim                                              (__v8df)_mm512_div_pd(__A, __B),
2305314564Sdim                                              (__v8df)__W);
2306296417Sdim}
2307296417Sdim
2308341825Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS512
2309296417Sdim_mm512_maskz_div_pd(__mmask8 __U, __m512d __A, __m512d __B) {
2310314564Sdim  return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
2311314564Sdim                                              (__v8df)_mm512_div_pd(__A, __B),
2312314564Sdim                                              (__v8df)_mm512_setzero_pd());
2313296417Sdim}
2314296417Sdim
2315341825Sdimstatic __inline __m512 __DEFAULT_FN_ATTRS512
2316309124Sdim_mm512_div_ps(__m512 __a, __m512 __b)
2317309124Sdim{
2318309124Sdim  return (__m512)((__v16sf)__a/(__v16sf)__b);
2319309124Sdim}
2320309124Sdim
2321341825Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS512
2322296417Sdim_mm512_mask_div_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
2323314564Sdim  return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
2324314564Sdim                                             (__v16sf)_mm512_div_ps(__A, __B),
2325314564Sdim                                             (__v16sf)__W);
2326296417Sdim}
2327296417Sdim
2328341825Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS512
2329296417Sdim_mm512_maskz_div_ps(__mmask16 __U, __m512 __A, __m512 __B) {
2330314564Sdim  return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
2331314564Sdim                                             (__v16sf)_mm512_div_ps(__A, __B),
2332314564Sdim                                             (__v16sf)_mm512_setzero_ps());
2333296417Sdim}
2334296417Sdim
2335341825Sdim#define _mm512_div_round_pd(A, B, R) \
2336341825Sdim  (__m512d)__builtin_ia32_divpd512((__v8df)(__m512d)(A), \
2337341825Sdim                                   (__v8df)(__m512d)(B), (int)(R))
2338296417Sdim
2339341825Sdim#define _mm512_mask_div_round_pd(W, U, A, B, R) \
2340341825Sdim  (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
2341341825Sdim                                   (__v8df)_mm512_div_round_pd((A), (B), (R)), \
2342353358Sdim                                   (__v8df)(__m512d)(W))
2343296417Sdim
2344341825Sdim#define _mm512_maskz_div_round_pd(U, A, B, R) \
2345341825Sdim  (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
2346341825Sdim                                   (__v8df)_mm512_div_round_pd((A), (B), (R)), \
2347353358Sdim                                   (__v8df)_mm512_setzero_pd())
2348296417Sdim
2349341825Sdim#define _mm512_div_round_ps(A, B, R) \
2350341825Sdim  (__m512)__builtin_ia32_divps512((__v16sf)(__m512)(A), \
2351341825Sdim                                  (__v16sf)(__m512)(B), (int)(R))
2352296417Sdim
2353341825Sdim#define _mm512_mask_div_round_ps(W, U, A, B, R) \
2354341825Sdim  (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
2355341825Sdim                                  (__v16sf)_mm512_div_round_ps((A), (B), (R)), \
2356353358Sdim                                  (__v16sf)(__m512)(W))
2357296417Sdim
2358341825Sdim#define _mm512_maskz_div_round_ps(U, A, B, R) \
2359341825Sdim  (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
2360341825Sdim                                  (__v16sf)_mm512_div_round_ps((A), (B), (R)), \
2361353358Sdim                                  (__v16sf)_mm512_setzero_ps())
2362296417Sdim
2363341825Sdim#define _mm512_roundscale_ps(A, B) \
2364309124Sdim  (__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(A), (int)(B), \
2365341825Sdim                                         (__v16sf)_mm512_undefined_ps(), \
2366341825Sdim                                         (__mmask16)-1, \
2367341825Sdim                                         _MM_FROUND_CUR_DIRECTION)
2368288943Sdim
2369341825Sdim#define _mm512_mask_roundscale_ps(A, B, C, imm) \
2370309124Sdim  (__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(C), (int)(imm), \
2371309124Sdim                                         (__v16sf)(__m512)(A), (__mmask16)(B), \
2372341825Sdim                                         _MM_FROUND_CUR_DIRECTION)
2373309124Sdim
2374341825Sdim#define _mm512_maskz_roundscale_ps(A, B, imm) \
2375309124Sdim  (__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(B), (int)(imm), \
2376309124Sdim                                         (__v16sf)_mm512_setzero_ps(), \
2377309124Sdim                                         (__mmask16)(A), \
2378341825Sdim                                         _MM_FROUND_CUR_DIRECTION)
2379309124Sdim
2380341825Sdim#define _mm512_mask_roundscale_round_ps(A, B, C, imm, R) \
2381309124Sdim  (__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(C), (int)(imm), \
2382309124Sdim                                         (__v16sf)(__m512)(A), (__mmask16)(B), \
2383341825Sdim                                         (int)(R))
2384309124Sdim
2385341825Sdim#define _mm512_maskz_roundscale_round_ps(A, B, imm, R) \
2386309124Sdim  (__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(B), (int)(imm), \
2387309124Sdim                                         (__v16sf)_mm512_setzero_ps(), \
2388341825Sdim                                         (__mmask16)(A), (int)(R))
2389309124Sdim
2390341825Sdim#define _mm512_roundscale_round_ps(A, imm, R) \
2391309124Sdim  (__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(A), (int)(imm), \
2392309124Sdim                                         (__v16sf)_mm512_undefined_ps(), \
2393341825Sdim                                         (__mmask16)-1, (int)(R))
2394309124Sdim
2395341825Sdim#define _mm512_roundscale_pd(A, B) \
2396309124Sdim  (__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(A), (int)(B), \
2397341825Sdim                                          (__v8df)_mm512_undefined_pd(), \
2398341825Sdim                                          (__mmask8)-1, \
2399341825Sdim                                          _MM_FROUND_CUR_DIRECTION)
2400288943Sdim
2401341825Sdim#define _mm512_mask_roundscale_pd(A, B, C, imm) \
2402309124Sdim  (__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(C), (int)(imm), \
2403309124Sdim                                          (__v8df)(__m512d)(A), (__mmask8)(B), \
2404341825Sdim                                          _MM_FROUND_CUR_DIRECTION)
2405309124Sdim
2406341825Sdim#define _mm512_maskz_roundscale_pd(A, B, imm) \
2407309124Sdim  (__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(B), (int)(imm), \
2408309124Sdim                                          (__v8df)_mm512_setzero_pd(), \
2409309124Sdim                                          (__mmask8)(A), \
2410341825Sdim                                          _MM_FROUND_CUR_DIRECTION)
2411309124Sdim
2412341825Sdim#define _mm512_mask_roundscale_round_pd(A, B, C, imm, R) \
2413309124Sdim  (__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(C), (int)(imm), \
2414309124Sdim                                          (__v8df)(__m512d)(A), (__mmask8)(B), \
2415341825Sdim                                          (int)(R))
2416309124Sdim
2417341825Sdim#define _mm512_maskz_roundscale_round_pd(A, B, imm, R) \
2418309124Sdim  (__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(B), (int)(imm), \
2419309124Sdim                                          (__v8df)_mm512_setzero_pd(), \
2420341825Sdim                                          (__mmask8)(A), (int)(R))
2421309124Sdim
2422341825Sdim#define _mm512_roundscale_round_pd(A, imm, R) \
2423309124Sdim  (__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(A), (int)(imm), \
2424309124Sdim                                          (__v8df)_mm512_undefined_pd(), \
2425341825Sdim                                          (__mmask8)-1, (int)(R))
2426309124Sdim
2427341825Sdim#define _mm512_fmadd_round_pd(A, B, C, R) \
2428309124Sdim  (__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \
2429309124Sdim                                           (__v8df)(__m512d)(B), \
2430341825Sdim                                           (__v8df)(__m512d)(C), \
2431341825Sdim                                           (__mmask8)-1, (int)(R))
2432288943Sdim
2433288943Sdim
2434341825Sdim#define _mm512_mask_fmadd_round_pd(A, U, B, C, R) \
2435309124Sdim  (__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \
2436309124Sdim                                           (__v8df)(__m512d)(B), \
2437309124Sdim                                           (__v8df)(__m512d)(C), \
2438341825Sdim                                           (__mmask8)(U), (int)(R))
2439288943Sdim
2440288943Sdim
2441341825Sdim#define _mm512_mask3_fmadd_round_pd(A, B, C, U, R) \
2442309124Sdim  (__m512d)__builtin_ia32_vfmaddpd512_mask3((__v8df)(__m512d)(A), \
2443309124Sdim                                            (__v8df)(__m512d)(B), \
2444309124Sdim                                            (__v8df)(__m512d)(C), \
2445341825Sdim                                            (__mmask8)(U), (int)(R))
2446288943Sdim
2447288943Sdim
2448341825Sdim#define _mm512_maskz_fmadd_round_pd(U, A, B, C, R) \
2449309124Sdim  (__m512d)__builtin_ia32_vfmaddpd512_maskz((__v8df)(__m512d)(A), \
2450309124Sdim                                            (__v8df)(__m512d)(B), \
2451309124Sdim                                            (__v8df)(__m512d)(C), \
2452341825Sdim                                            (__mmask8)(U), (int)(R))
2453288943Sdim
2454288943Sdim
2455341825Sdim#define _mm512_fmsub_round_pd(A, B, C, R) \
2456309124Sdim  (__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \
2457309124Sdim                                           (__v8df)(__m512d)(B), \
2458309124Sdim                                           -(__v8df)(__m512d)(C), \
2459341825Sdim                                           (__mmask8)-1, (int)(R))
2460288943Sdim
2461288943Sdim
2462341825Sdim#define _mm512_mask_fmsub_round_pd(A, U, B, C, R) \
2463309124Sdim  (__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \
2464309124Sdim                                           (__v8df)(__m512d)(B), \
2465309124Sdim                                           -(__v8df)(__m512d)(C), \
2466341825Sdim                                           (__mmask8)(U), (int)(R))
2467288943Sdim
2468288943Sdim
2469341825Sdim#define _mm512_maskz_fmsub_round_pd(U, A, B, C, R) \
2470309124Sdim  (__m512d)__builtin_ia32_vfmaddpd512_maskz((__v8df)(__m512d)(A), \
2471309124Sdim                                            (__v8df)(__m512d)(B), \
2472309124Sdim                                            -(__v8df)(__m512d)(C), \
2473341825Sdim                                            (__mmask8)(U), (int)(R))
2474288943Sdim
2475288943Sdim
2476341825Sdim#define _mm512_fnmadd_round_pd(A, B, C, R) \
2477309124Sdim  (__m512d)__builtin_ia32_vfmaddpd512_mask(-(__v8df)(__m512d)(A), \
2478309124Sdim                                           (__v8df)(__m512d)(B), \
2479341825Sdim                                           (__v8df)(__m512d)(C), \
2480341825Sdim                                           (__mmask8)-1, (int)(R))
2481288943Sdim
2482288943Sdim
2483341825Sdim#define _mm512_mask3_fnmadd_round_pd(A, B, C, U, R) \
2484309124Sdim  (__m512d)__builtin_ia32_vfmaddpd512_mask3(-(__v8df)(__m512d)(A), \
2485309124Sdim                                            (__v8df)(__m512d)(B), \
2486309124Sdim                                            (__v8df)(__m512d)(C), \
2487341825Sdim                                            (__mmask8)(U), (int)(R))
2488288943Sdim
2489288943Sdim
2490341825Sdim#define _mm512_maskz_fnmadd_round_pd(U, A, B, C, R) \
2491309124Sdim  (__m512d)__builtin_ia32_vfmaddpd512_maskz(-(__v8df)(__m512d)(A), \
2492309124Sdim                                            (__v8df)(__m512d)(B), \
2493309124Sdim                                            (__v8df)(__m512d)(C), \
2494341825Sdim                                            (__mmask8)(U), (int)(R))
2495288943Sdim
2496288943Sdim
2497341825Sdim#define _mm512_fnmsub_round_pd(A, B, C, R) \
2498309124Sdim  (__m512d)__builtin_ia32_vfmaddpd512_mask(-(__v8df)(__m512d)(A), \
2499309124Sdim                                           (__v8df)(__m512d)(B), \
2500309124Sdim                                           -(__v8df)(__m512d)(C), \
2501341825Sdim                                           (__mmask8)-1, (int)(R))
2502288943Sdim
2503288943Sdim
2504341825Sdim#define _mm512_maskz_fnmsub_round_pd(U, A, B, C, R) \
2505309124Sdim  (__m512d)__builtin_ia32_vfmaddpd512_maskz(-(__v8df)(__m512d)(A), \
2506309124Sdim                                            (__v8df)(__m512d)(B), \
2507309124Sdim                                            -(__v8df)(__m512d)(C), \
2508341825Sdim                                            (__mmask8)(U), (int)(R))
2509288943Sdim
2510288943Sdim
2511341825Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS512
2512288943Sdim_mm512_fmadd_pd(__m512d __A, __m512d __B, __m512d __C)
2513277325Sdim{
2514288943Sdim  return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
2515288943Sdim                                                    (__v8df) __B,
2516288943Sdim                                                    (__v8df) __C,
2517288943Sdim                                                    (__mmask8) -1,
2518288943Sdim                                                    _MM_FROUND_CUR_DIRECTION);
2519277325Sdim}
2520288943Sdim
2521341825Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS512
2522288943Sdim_mm512_mask_fmadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
2523277325Sdim{
2524288943Sdim  return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
2525288943Sdim                                                    (__v8df) __B,
2526288943Sdim                                                    (__v8df) __C,
2527288943Sdim                                                    (__mmask8) __U,
2528288943Sdim                                                    _MM_FROUND_CUR_DIRECTION);
2529277325Sdim}
2530277325Sdim
2531341825Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS512
2532288943Sdim_mm512_mask3_fmadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
2533277325Sdim{
2534288943Sdim  return (__m512d) __builtin_ia32_vfmaddpd512_mask3 ((__v8df) __A,
2535288943Sdim                                                     (__v8df) __B,
2536288943Sdim                                                     (__v8df) __C,
2537288943Sdim                                                     (__mmask8) __U,
2538288943Sdim                                                     _MM_FROUND_CUR_DIRECTION);
2539277325Sdim}
2540277325Sdim
2541341825Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS512
2542288943Sdim_mm512_maskz_fmadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
2543288943Sdim{
2544288943Sdim  return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A,
2545288943Sdim                                                     (__v8df) __B,
2546288943Sdim                                                     (__v8df) __C,
2547288943Sdim                                                     (__mmask8) __U,
2548288943Sdim                                                     _MM_FROUND_CUR_DIRECTION);
2549288943Sdim}
2550288943Sdim
2551341825Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS512
2552277325Sdim_mm512_fmsub_pd(__m512d __A, __m512d __B, __m512d __C)
2553277325Sdim{
2554288943Sdim  return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
2555288943Sdim                                                    (__v8df) __B,
2556288943Sdim                                                    -(__v8df) __C,
2557288943Sdim                                                    (__mmask8) -1,
2558288943Sdim                                                    _MM_FROUND_CUR_DIRECTION);
2559277325Sdim}
2560277325Sdim
2561341825Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS512
2562288943Sdim_mm512_mask_fmsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
2563288943Sdim{
2564288943Sdim  return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
2565288943Sdim                                                    (__v8df) __B,
2566288943Sdim                                                    -(__v8df) __C,
2567288943Sdim                                                    (__mmask8) __U,
2568288943Sdim                                                    _MM_FROUND_CUR_DIRECTION);
2569288943Sdim}
2570288943Sdim
2571341825Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS512
2572288943Sdim_mm512_maskz_fmsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
2573288943Sdim{
2574288943Sdim  return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A,
2575288943Sdim                                                     (__v8df) __B,
2576288943Sdim                                                     -(__v8df) __C,
2577288943Sdim                                                     (__mmask8) __U,
2578288943Sdim                                                     _MM_FROUND_CUR_DIRECTION);
2579288943Sdim}
2580288943Sdim
2581341825Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS512
2582277325Sdim_mm512_fnmadd_pd(__m512d __A, __m512d __B, __m512d __C)
2583277325Sdim{
2584341825Sdim  return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
2585341825Sdim                                                    -(__v8df) __B,
2586288943Sdim                                                    (__v8df) __C,
2587288943Sdim                                                    (__mmask8) -1,
2588288943Sdim                                                    _MM_FROUND_CUR_DIRECTION);
2589277325Sdim}
2590277325Sdim
2591341825Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS512
2592288943Sdim_mm512_mask3_fnmadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
2593288943Sdim{
2594288943Sdim  return (__m512d) __builtin_ia32_vfmaddpd512_mask3 (-(__v8df) __A,
2595288943Sdim                                                     (__v8df) __B,
2596288943Sdim                                                     (__v8df) __C,
2597288943Sdim                                                     (__mmask8) __U,
2598288943Sdim                                                     _MM_FROUND_CUR_DIRECTION);
2599288943Sdim}
2600288943Sdim
2601341825Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS512
2602288943Sdim_mm512_maskz_fnmadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
2603288943Sdim{
2604288943Sdim  return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A,
2605288943Sdim                                                     (__v8df) __B,
2606288943Sdim                                                     (__v8df) __C,
2607288943Sdim                                                     (__mmask8) __U,
2608288943Sdim                                                     _MM_FROUND_CUR_DIRECTION);
2609288943Sdim}
2610288943Sdim
2611341825Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS512
2612288943Sdim_mm512_fnmsub_pd(__m512d __A, __m512d __B, __m512d __C)
2613288943Sdim{
2614341825Sdim  return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
2615341825Sdim                                                    -(__v8df) __B,
2616288943Sdim                                                    -(__v8df) __C,
2617288943Sdim                                                    (__mmask8) -1,
2618288943Sdim                                                    _MM_FROUND_CUR_DIRECTION);
2619288943Sdim}
2620288943Sdim
2621341825Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS512
2622288943Sdim_mm512_maskz_fnmsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
2623288943Sdim{
2624288943Sdim  return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A,
2625288943Sdim                                                     (__v8df) __B,
2626288943Sdim                                                     -(__v8df) __C,
2627288943Sdim                                                     (__mmask8) __U,
2628288943Sdim                                                     _MM_FROUND_CUR_DIRECTION);
2629288943Sdim}
2630288943Sdim
2631341825Sdim#define _mm512_fmadd_round_ps(A, B, C, R) \
2632309124Sdim  (__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
2633309124Sdim                                          (__v16sf)(__m512)(B), \
2634341825Sdim                                          (__v16sf)(__m512)(C), \
2635341825Sdim                                          (__mmask16)-1, (int)(R))
2636288943Sdim
2637288943Sdim
2638341825Sdim#define _mm512_mask_fmadd_round_ps(A, U, B, C, R) \
2639309124Sdim  (__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
2640309124Sdim                                          (__v16sf)(__m512)(B), \
2641309124Sdim                                          (__v16sf)(__m512)(C), \
2642341825Sdim                                          (__mmask16)(U), (int)(R))
2643288943Sdim
2644288943Sdim
2645341825Sdim#define _mm512_mask3_fmadd_round_ps(A, B, C, U, R) \
2646309124Sdim  (__m512)__builtin_ia32_vfmaddps512_mask3((__v16sf)(__m512)(A), \
2647309124Sdim                                           (__v16sf)(__m512)(B), \
2648309124Sdim                                           (__v16sf)(__m512)(C), \
2649341825Sdim                                           (__mmask16)(U), (int)(R))
2650288943Sdim
2651288943Sdim
2652341825Sdim#define _mm512_maskz_fmadd_round_ps(U, A, B, C, R) \
2653309124Sdim  (__m512)__builtin_ia32_vfmaddps512_maskz((__v16sf)(__m512)(A), \
2654309124Sdim                                           (__v16sf)(__m512)(B), \
2655309124Sdim                                           (__v16sf)(__m512)(C), \
2656341825Sdim                                           (__mmask16)(U), (int)(R))
2657288943Sdim
2658288943Sdim
2659341825Sdim#define _mm512_fmsub_round_ps(A, B, C, R) \
2660309124Sdim  (__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
2661309124Sdim                                          (__v16sf)(__m512)(B), \
2662309124Sdim                                          -(__v16sf)(__m512)(C), \
2663341825Sdim                                          (__mmask16)-1, (int)(R))
2664288943Sdim
2665288943Sdim
2666341825Sdim#define _mm512_mask_fmsub_round_ps(A, U, B, C, R) \
2667309124Sdim  (__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
2668309124Sdim                                          (__v16sf)(__m512)(B), \
2669309124Sdim                                          -(__v16sf)(__m512)(C), \
2670341825Sdim                                          (__mmask16)(U), (int)(R))
2671288943Sdim
2672288943Sdim
2673341825Sdim#define _mm512_maskz_fmsub_round_ps(U, A, B, C, R) \
2674309124Sdim  (__m512)__builtin_ia32_vfmaddps512_maskz((__v16sf)(__m512)(A), \
2675309124Sdim                                           (__v16sf)(__m512)(B), \
2676309124Sdim                                           -(__v16sf)(__m512)(C), \
2677341825Sdim                                           (__mmask16)(U), (int)(R))
2678288943Sdim
2679288943Sdim
2680341825Sdim#define _mm512_fnmadd_round_ps(A, B, C, R) \
2681341825Sdim  (__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
2682341825Sdim                                          -(__v16sf)(__m512)(B), \
2683341825Sdim                                          (__v16sf)(__m512)(C), \
2684341825Sdim                                          (__mmask16)-1, (int)(R))
2685288943Sdim
2686288943Sdim
2687341825Sdim#define _mm512_mask3_fnmadd_round_ps(A, B, C, U, R) \
2688309124Sdim  (__m512)__builtin_ia32_vfmaddps512_mask3(-(__v16sf)(__m512)(A), \
2689309124Sdim                                           (__v16sf)(__m512)(B), \
2690309124Sdim                                           (__v16sf)(__m512)(C), \
2691341825Sdim                                           (__mmask16)(U), (int)(R))
2692288943Sdim
2693288943Sdim
2694341825Sdim#define _mm512_maskz_fnmadd_round_ps(U, A, B, C, R) \
2695309124Sdim  (__m512)__builtin_ia32_vfmaddps512_maskz(-(__v16sf)(__m512)(A), \
2696309124Sdim                                           (__v16sf)(__m512)(B), \
2697309124Sdim                                           (__v16sf)(__m512)(C), \
2698341825Sdim                                           (__mmask16)(U), (int)(R))
2699288943Sdim
2700288943Sdim
2701341825Sdim#define _mm512_fnmsub_round_ps(A, B, C, R) \
2702341825Sdim  (__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
2703341825Sdim                                          -(__v16sf)(__m512)(B), \
2704309124Sdim                                          -(__v16sf)(__m512)(C), \
2705341825Sdim                                          (__mmask16)-1, (int)(R))
2706288943Sdim
2707288943Sdim
2708341825Sdim#define _mm512_maskz_fnmsub_round_ps(U, A, B, C, R) \
2709309124Sdim  (__m512)__builtin_ia32_vfmaddps512_maskz(-(__v16sf)(__m512)(A), \
2710309124Sdim                                           (__v16sf)(__m512)(B), \
2711309124Sdim                                           -(__v16sf)(__m512)(C), \
2712341825Sdim                                           (__mmask16)(U), (int)(R))
2713288943Sdim
2714288943Sdim
2715341825Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS512
2716277325Sdim_mm512_fmadd_ps(__m512 __A, __m512 __B, __m512 __C)
2717277325Sdim{
2718288943Sdim  return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
2719288943Sdim                                                   (__v16sf) __B,
2720288943Sdim                                                   (__v16sf) __C,
2721288943Sdim                                                   (__mmask16) -1,
2722288943Sdim                                                   _MM_FROUND_CUR_DIRECTION);
2723277325Sdim}
2724277325Sdim
2725341825Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS512
2726288943Sdim_mm512_mask_fmadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
2727288943Sdim{
2728288943Sdim  return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
2729288943Sdim                                                   (__v16sf) __B,
2730288943Sdim                                                   (__v16sf) __C,
2731288943Sdim                                                   (__mmask16) __U,
2732288943Sdim                                                   _MM_FROUND_CUR_DIRECTION);
2733288943Sdim}
2734288943Sdim
2735341825Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS512
2736288943Sdim_mm512_mask3_fmadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
2737288943Sdim{
2738288943Sdim  return (__m512) __builtin_ia32_vfmaddps512_mask3 ((__v16sf) __A,
2739288943Sdim                                                    (__v16sf) __B,
2740288943Sdim                                                    (__v16sf) __C,
2741288943Sdim                                                    (__mmask16) __U,
2742288943Sdim                                                    _MM_FROUND_CUR_DIRECTION);
2743288943Sdim}
2744288943Sdim
2745341825Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS512
2746288943Sdim_mm512_maskz_fmadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
2747288943Sdim{
2748288943Sdim  return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A,
2749288943Sdim                                                    (__v16sf) __B,
2750288943Sdim                                                    (__v16sf) __C,
2751288943Sdim                                                    (__mmask16) __U,
2752288943Sdim                                                    _MM_FROUND_CUR_DIRECTION);
2753288943Sdim}
2754288943Sdim
2755341825Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS512
2756277325Sdim_mm512_fmsub_ps(__m512 __A, __m512 __B, __m512 __C)
2757277325Sdim{
2758288943Sdim  return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
2759288943Sdim                                                   (__v16sf) __B,
2760288943Sdim                                                   -(__v16sf) __C,
2761288943Sdim                                                   (__mmask16) -1,
2762288943Sdim                                                   _MM_FROUND_CUR_DIRECTION);
2763277325Sdim}
2764277325Sdim
2765341825Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS512
2766288943Sdim_mm512_mask_fmsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
2767288943Sdim{
2768288943Sdim  return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
2769288943Sdim                                                   (__v16sf) __B,
2770288943Sdim                                                   -(__v16sf) __C,
2771288943Sdim                                                   (__mmask16) __U,
2772288943Sdim                                                   _MM_FROUND_CUR_DIRECTION);
2773288943Sdim}
2774288943Sdim
2775341825Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS512
2776288943Sdim_mm512_maskz_fmsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
2777288943Sdim{
2778288943Sdim  return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A,
2779288943Sdim                                                    (__v16sf) __B,
2780288943Sdim                                                    -(__v16sf) __C,
2781288943Sdim                                                    (__mmask16) __U,
2782288943Sdim                                                    _MM_FROUND_CUR_DIRECTION);
2783288943Sdim}
2784288943Sdim
2785341825Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS512
2786277325Sdim_mm512_fnmadd_ps(__m512 __A, __m512 __B, __m512 __C)
2787277325Sdim{
2788341825Sdim  return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
2789341825Sdim                                                   -(__v16sf) __B,
2790288943Sdim                                                   (__v16sf) __C,
2791288943Sdim                                                   (__mmask16) -1,
2792288943Sdim                                                   _MM_FROUND_CUR_DIRECTION);
2793277325Sdim}
2794277325Sdim
2795341825Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS512
2796288943Sdim_mm512_mask3_fnmadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
2797288943Sdim{
2798288943Sdim  return (__m512) __builtin_ia32_vfmaddps512_mask3 (-(__v16sf) __A,
2799288943Sdim                                                    (__v16sf) __B,
2800288943Sdim                                                    (__v16sf) __C,
2801288943Sdim                                                    (__mmask16) __U,
2802288943Sdim                                                    _MM_FROUND_CUR_DIRECTION);
2803288943Sdim}
2804288943Sdim
2805341825Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS512
2806288943Sdim_mm512_maskz_fnmadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
2807288943Sdim{
2808288943Sdim  return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A,
2809288943Sdim                                                    (__v16sf) __B,
2810288943Sdim                                                    (__v16sf) __C,
2811288943Sdim                                                    (__mmask16) __U,
2812288943Sdim                                                    _MM_FROUND_CUR_DIRECTION);
2813288943Sdim}
2814288943Sdim
2815341825Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS512
2816288943Sdim_mm512_fnmsub_ps(__m512 __A, __m512 __B, __m512 __C)
2817288943Sdim{
2818341825Sdim  return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
2819341825Sdim                                                   -(__v16sf) __B,
2820288943Sdim                                                   -(__v16sf) __C,
2821288943Sdim                                                   (__mmask16) -1,
2822288943Sdim                                                   _MM_FROUND_CUR_DIRECTION);
2823288943Sdim}
2824288943Sdim
2825341825Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS512
2826288943Sdim_mm512_maskz_fnmsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
2827288943Sdim{
2828288943Sdim  return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A,
2829288943Sdim                                                    (__v16sf) __B,
2830288943Sdim                                                    -(__v16sf) __C,
2831288943Sdim                                                    (__mmask16) __U,
2832288943Sdim                                                    _MM_FROUND_CUR_DIRECTION);
2833288943Sdim}
2834288943Sdim
2835341825Sdim#define _mm512_fmaddsub_round_pd(A, B, C, R) \
2836309124Sdim  (__m512d)__builtin_ia32_vfmaddsubpd512_mask((__v8df)(__m512d)(A), \
2837309124Sdim                                              (__v8df)(__m512d)(B), \
2838309124Sdim                                              (__v8df)(__m512d)(C), \
2839341825Sdim                                              (__mmask8)-1, (int)(R))
2840288943Sdim
2841288943Sdim
2842341825Sdim#define _mm512_mask_fmaddsub_round_pd(A, U, B, C, R) \
2843309124Sdim  (__m512d)__builtin_ia32_vfmaddsubpd512_mask((__v8df)(__m512d)(A), \
2844309124Sdim                                              (__v8df)(__m512d)(B), \
2845309124Sdim                                              (__v8df)(__m512d)(C), \
2846341825Sdim                                              (__mmask8)(U), (int)(R))
2847288943Sdim
2848288943Sdim
2849341825Sdim#define _mm512_mask3_fmaddsub_round_pd(A, B, C, U, R) \
2850309124Sdim  (__m512d)__builtin_ia32_vfmaddsubpd512_mask3((__v8df)(__m512d)(A), \
2851309124Sdim                                               (__v8df)(__m512d)(B), \
2852309124Sdim                                               (__v8df)(__m512d)(C), \
2853341825Sdim                                               (__mmask8)(U), (int)(R))
2854288943Sdim
2855288943Sdim
2856341825Sdim#define _mm512_maskz_fmaddsub_round_pd(U, A, B, C, R) \
2857309124Sdim  (__m512d)__builtin_ia32_vfmaddsubpd512_maskz((__v8df)(__m512d)(A), \
2858309124Sdim                                               (__v8df)(__m512d)(B), \
2859309124Sdim                                               (__v8df)(__m512d)(C), \
2860341825Sdim                                               (__mmask8)(U), (int)(R))
2861288943Sdim
2862288943Sdim
2863341825Sdim#define _mm512_fmsubadd_round_pd(A, B, C, R) \
2864309124Sdim  (__m512d)__builtin_ia32_vfmaddsubpd512_mask((__v8df)(__m512d)(A), \
2865309124Sdim                                              (__v8df)(__m512d)(B), \
2866309124Sdim                                              -(__v8df)(__m512d)(C), \
2867341825Sdim                                              (__mmask8)-1, (int)(R))
2868288943Sdim
2869288943Sdim
2870341825Sdim#define _mm512_mask_fmsubadd_round_pd(A, U, B, C, R) \
2871309124Sdim  (__m512d)__builtin_ia32_vfmaddsubpd512_mask((__v8df)(__m512d)(A), \
2872309124Sdim                                              (__v8df)(__m512d)(B), \
2873309124Sdim                                              -(__v8df)(__m512d)(C), \
2874341825Sdim                                              (__mmask8)(U), (int)(R))
2875288943Sdim
2876288943Sdim
2877341825Sdim#define _mm512_maskz_fmsubadd_round_pd(U, A, B, C, R) \
2878309124Sdim  (__m512d)__builtin_ia32_vfmaddsubpd512_maskz((__v8df)(__m512d)(A), \
2879309124Sdim                                               (__v8df)(__m512d)(B), \
2880309124Sdim                                               -(__v8df)(__m512d)(C), \
2881341825Sdim                                               (__mmask8)(U), (int)(R))
2882288943Sdim
2883288943Sdim
2884341825Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS512
2885288943Sdim_mm512_fmaddsub_pd(__m512d __A, __m512d __B, __m512d __C)
2886288943Sdim{
2887288943Sdim  return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
2888341825Sdim                                                      (__v8df) __B,
2889341825Sdim                                                      (__v8df) __C,
2890341825Sdim                                                      (__mmask8) -1,
2891341825Sdim                                                      _MM_FROUND_CUR_DIRECTION);
2892288943Sdim}
2893288943Sdim
2894341825Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS512
2895288943Sdim_mm512_mask_fmaddsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
2896288943Sdim{
2897288943Sdim  return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
2898341825Sdim                                                      (__v8df) __B,
2899341825Sdim                                                      (__v8df) __C,
2900341825Sdim                                                      (__mmask8) __U,
2901341825Sdim                                                      _MM_FROUND_CUR_DIRECTION);
2902288943Sdim}
2903288943Sdim
2904341825Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS512
2905288943Sdim_mm512_mask3_fmaddsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
2906288943Sdim{
2907288943Sdim  return (__m512d) __builtin_ia32_vfmaddsubpd512_mask3 ((__v8df) __A,
2908341825Sdim                                                       (__v8df) __B,
2909341825Sdim                                                       (__v8df) __C,
2910341825Sdim                                                       (__mmask8) __U,
2911341825Sdim                                                       _MM_FROUND_CUR_DIRECTION);
2912288943Sdim}
2913288943Sdim
2914341825Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS512
2915288943Sdim_mm512_maskz_fmaddsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
2916288943Sdim{
2917288943Sdim  return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
2918341825Sdim                                                       (__v8df) __B,
2919341825Sdim                                                       (__v8df) __C,
2920341825Sdim                                                       (__mmask8) __U,
2921341825Sdim                                                       _MM_FROUND_CUR_DIRECTION);
2922288943Sdim}
2923288943Sdim
2924341825Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS512
2925288943Sdim_mm512_fmsubadd_pd(__m512d __A, __m512d __B, __m512d __C)
2926288943Sdim{
2927288943Sdim  return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
2928288943Sdim                                                       (__v8df) __B,
2929288943Sdim                                                       -(__v8df) __C,
2930288943Sdim                                                       (__mmask8) -1,
2931288943Sdim                                                       _MM_FROUND_CUR_DIRECTION);
2932288943Sdim}
2933288943Sdim
2934341825Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS512
2935288943Sdim_mm512_mask_fmsubadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
2936288943Sdim{
2937288943Sdim  return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
2938288943Sdim                                                       (__v8df) __B,
2939288943Sdim                                                       -(__v8df) __C,
2940288943Sdim                                                       (__mmask8) __U,
2941288943Sdim                                                       _MM_FROUND_CUR_DIRECTION);
2942288943Sdim}
2943288943Sdim
2944341825Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS512
2945288943Sdim_mm512_maskz_fmsubadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
2946288943Sdim{
2947288943Sdim  return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
2948288943Sdim                                                        (__v8df) __B,
2949288943Sdim                                                        -(__v8df) __C,
2950288943Sdim                                                        (__mmask8) __U,
2951288943Sdim                                                        _MM_FROUND_CUR_DIRECTION);
2952288943Sdim}
2953288943Sdim
2954341825Sdim#define _mm512_fmaddsub_round_ps(A, B, C, R) \
2955309124Sdim  (__m512)__builtin_ia32_vfmaddsubps512_mask((__v16sf)(__m512)(A), \
2956309124Sdim                                             (__v16sf)(__m512)(B), \
2957309124Sdim                                             (__v16sf)(__m512)(C), \
2958341825Sdim                                             (__mmask16)-1, (int)(R))
2959288943Sdim
2960288943Sdim
2961341825Sdim#define _mm512_mask_fmaddsub_round_ps(A, U, B, C, R) \
2962309124Sdim  (__m512)__builtin_ia32_vfmaddsubps512_mask((__v16sf)(__m512)(A), \
2963309124Sdim                                             (__v16sf)(__m512)(B), \
2964309124Sdim                                             (__v16sf)(__m512)(C), \
2965341825Sdim                                             (__mmask16)(U), (int)(R))
2966288943Sdim
2967288943Sdim
2968341825Sdim#define _mm512_mask3_fmaddsub_round_ps(A, B, C, U, R) \
2969309124Sdim  (__m512)__builtin_ia32_vfmaddsubps512_mask3((__v16sf)(__m512)(A), \
2970309124Sdim                                              (__v16sf)(__m512)(B), \
2971309124Sdim                                              (__v16sf)(__m512)(C), \
2972341825Sdim                                              (__mmask16)(U), (int)(R))
2973288943Sdim
2974288943Sdim
2975341825Sdim#define _mm512_maskz_fmaddsub_round_ps(U, A, B, C, R) \
2976309124Sdim  (__m512)__builtin_ia32_vfmaddsubps512_maskz((__v16sf)(__m512)(A), \
2977309124Sdim                                              (__v16sf)(__m512)(B), \
2978309124Sdim                                              (__v16sf)(__m512)(C), \
2979341825Sdim                                              (__mmask16)(U), (int)(R))
2980288943Sdim
2981288943Sdim
2982341825Sdim#define _mm512_fmsubadd_round_ps(A, B, C, R) \
2983309124Sdim  (__m512)__builtin_ia32_vfmaddsubps512_mask((__v16sf)(__m512)(A), \
2984309124Sdim                                             (__v16sf)(__m512)(B), \
2985309124Sdim                                             -(__v16sf)(__m512)(C), \
2986341825Sdim                                             (__mmask16)-1, (int)(R))
2987288943Sdim
2988288943Sdim
2989341825Sdim#define _mm512_mask_fmsubadd_round_ps(A, U, B, C, R) \
2990309124Sdim  (__m512)__builtin_ia32_vfmaddsubps512_mask((__v16sf)(__m512)(A), \
2991309124Sdim                                             (__v16sf)(__m512)(B), \
2992309124Sdim                                             -(__v16sf)(__m512)(C), \
2993341825Sdim                                             (__mmask16)(U), (int)(R))
2994288943Sdim
2995288943Sdim
2996341825Sdim#define _mm512_maskz_fmsubadd_round_ps(U, A, B, C, R) \
2997309124Sdim  (__m512)__builtin_ia32_vfmaddsubps512_maskz((__v16sf)(__m512)(A), \
2998309124Sdim                                              (__v16sf)(__m512)(B), \
2999309124Sdim                                              -(__v16sf)(__m512)(C), \
3000341825Sdim                                              (__mmask16)(U), (int)(R))
3001288943Sdim
3002288943Sdim
3003341825Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS512
3004288943Sdim_mm512_fmaddsub_ps(__m512 __A, __m512 __B, __m512 __C)
3005288943Sdim{
3006288943Sdim  return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
3007288943Sdim                                                      (__v16sf) __B,
3008288943Sdim                                                      (__v16sf) __C,
3009288943Sdim                                                      (__mmask16) -1,
3010288943Sdim                                                      _MM_FROUND_CUR_DIRECTION);
3011288943Sdim}
3012288943Sdim
3013341825Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS512
3014288943Sdim_mm512_mask_fmaddsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
3015288943Sdim{
3016288943Sdim  return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
3017288943Sdim                                                      (__v16sf) __B,
3018288943Sdim                                                      (__v16sf) __C,
3019288943Sdim                                                      (__mmask16) __U,
3020288943Sdim                                                      _MM_FROUND_CUR_DIRECTION);
3021288943Sdim}
3022288943Sdim
3023341825Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS512
3024288943Sdim_mm512_mask3_fmaddsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
3025288943Sdim{
3026288943Sdim  return (__m512) __builtin_ia32_vfmaddsubps512_mask3 ((__v16sf) __A,
3027288943Sdim                                                       (__v16sf) __B,
3028288943Sdim                                                       (__v16sf) __C,
3029288943Sdim                                                       (__mmask16) __U,
3030288943Sdim                                                       _MM_FROUND_CUR_DIRECTION);
3031288943Sdim}
3032288943Sdim
3033341825Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS512
3034288943Sdim_mm512_maskz_fmaddsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
3035288943Sdim{
3036288943Sdim  return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
3037288943Sdim                                                       (__v16sf) __B,
3038288943Sdim                                                       (__v16sf) __C,
3039288943Sdim                                                       (__mmask16) __U,
3040288943Sdim                                                       _MM_FROUND_CUR_DIRECTION);
3041288943Sdim}
3042288943Sdim
3043341825Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS512
3044288943Sdim_mm512_fmsubadd_ps(__m512 __A, __m512 __B, __m512 __C)
3045288943Sdim{
3046288943Sdim  return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
3047288943Sdim                                                      (__v16sf) __B,
3048288943Sdim                                                      -(__v16sf) __C,
3049288943Sdim                                                      (__mmask16) -1,
3050288943Sdim                                                      _MM_FROUND_CUR_DIRECTION);
3051288943Sdim}
3052288943Sdim
3053341825Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS512
3054288943Sdim_mm512_mask_fmsubadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
3055288943Sdim{
3056288943Sdim  return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
3057288943Sdim                                                      (__v16sf) __B,
3058288943Sdim                                                      -(__v16sf) __C,
3059288943Sdim                                                      (__mmask16) __U,
3060288943Sdim                                                      _MM_FROUND_CUR_DIRECTION);
3061288943Sdim}
3062288943Sdim
3063341825Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS512
3064288943Sdim_mm512_maskz_fmsubadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
3065288943Sdim{
3066288943Sdim  return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
3067288943Sdim                                                       (__v16sf) __B,
3068288943Sdim                                                       -(__v16sf) __C,
3069288943Sdim                                                       (__mmask16) __U,
3070288943Sdim                                                       _MM_FROUND_CUR_DIRECTION);
3071288943Sdim}
3072288943Sdim
3073341825Sdim#define _mm512_mask3_fmsub_round_pd(A, B, C, U, R) \
3074309124Sdim  (__m512d)__builtin_ia32_vfmsubpd512_mask3((__v8df)(__m512d)(A), \
3075309124Sdim                                            (__v8df)(__m512d)(B), \
3076309124Sdim                                            (__v8df)(__m512d)(C), \
3077341825Sdim                                            (__mmask8)(U), (int)(R))
3078288943Sdim
3079288943Sdim
3080341825Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS512
3081288943Sdim_mm512_mask3_fmsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
3082288943Sdim{
3083341825Sdim  return (__m512d)__builtin_ia32_vfmsubpd512_mask3 ((__v8df) __A,
3084341825Sdim                                                    (__v8df) __B,
3085341825Sdim                                                    (__v8df) __C,
3086341825Sdim                                                    (__mmask8) __U,
3087341825Sdim                                                    _MM_FROUND_CUR_DIRECTION);
3088288943Sdim}
3089288943Sdim
3090341825Sdim#define _mm512_mask3_fmsub_round_ps(A, B, C, U, R) \
3091309124Sdim  (__m512)__builtin_ia32_vfmsubps512_mask3((__v16sf)(__m512)(A), \
3092309124Sdim                                           (__v16sf)(__m512)(B), \
3093309124Sdim                                           (__v16sf)(__m512)(C), \
3094341825Sdim                                           (__mmask16)(U), (int)(R))
3095288943Sdim
3096341825Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS512
3097288943Sdim_mm512_mask3_fmsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
3098288943Sdim{
3099341825Sdim  return (__m512)__builtin_ia32_vfmsubps512_mask3 ((__v16sf) __A,
3100341825Sdim                                                   (__v16sf) __B,
3101341825Sdim                                                   (__v16sf) __C,
3102341825Sdim                                                   (__mmask16) __U,
3103341825Sdim                                                   _MM_FROUND_CUR_DIRECTION);
3104288943Sdim}
3105288943Sdim
3106341825Sdim#define _mm512_mask3_fmsubadd_round_pd(A, B, C, U, R) \
3107309124Sdim  (__m512d)__builtin_ia32_vfmsubaddpd512_mask3((__v8df)(__m512d)(A), \
3108309124Sdim                                               (__v8df)(__m512d)(B), \
3109309124Sdim                                               (__v8df)(__m512d)(C), \
3110341825Sdim                                               (__mmask8)(U), (int)(R))
3111288943Sdim
3112288943Sdim
3113341825Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS512
3114288943Sdim_mm512_mask3_fmsubadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
3115288943Sdim{
3116341825Sdim  return (__m512d)__builtin_ia32_vfmsubaddpd512_mask3 ((__v8df) __A,
3117341825Sdim                                                       (__v8df) __B,
3118341825Sdim                                                       (__v8df) __C,
3119341825Sdim                                                       (__mmask8) __U,
3120341825Sdim                                                       _MM_FROUND_CUR_DIRECTION);
3121288943Sdim}
3122288943Sdim
3123341825Sdim#define _mm512_mask3_fmsubadd_round_ps(A, B, C, U, R) \
3124309124Sdim  (__m512)__builtin_ia32_vfmsubaddps512_mask3((__v16sf)(__m512)(A), \
3125309124Sdim                                              (__v16sf)(__m512)(B), \
3126309124Sdim                                              (__v16sf)(__m512)(C), \
3127341825Sdim                                              (__mmask16)(U), (int)(R))
3128288943Sdim
3129288943Sdim
3130341825Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS512
3131288943Sdim_mm512_mask3_fmsubadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
3132288943Sdim{
3133341825Sdim  return (__m512)__builtin_ia32_vfmsubaddps512_mask3 ((__v16sf) __A,
3134341825Sdim                                                      (__v16sf) __B,
3135341825Sdim                                                      (__v16sf) __C,
3136341825Sdim                                                      (__mmask16) __U,
3137341825Sdim                                                      _MM_FROUND_CUR_DIRECTION);
3138288943Sdim}
3139288943Sdim
3140341825Sdim#define _mm512_mask_fnmadd_round_pd(A, U, B, C, R) \
3141341825Sdim  (__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \
3142341825Sdim                                           -(__v8df)(__m512d)(B), \
3143341825Sdim                                           (__v8df)(__m512d)(C), \
3144341825Sdim                                           (__mmask8)(U), (int)(R))
3145288943Sdim
3146288943Sdim
3147341825Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS512
3148288943Sdim_mm512_mask_fnmadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
3149288943Sdim{
3150341825Sdim  return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
3151341825Sdim                                                    -(__v8df) __B,
3152341825Sdim                                                    (__v8df) __C,
3153341825Sdim                                                    (__mmask8) __U,
3154341825Sdim                                                    _MM_FROUND_CUR_DIRECTION);
3155288943Sdim}
3156288943Sdim
3157341825Sdim#define _mm512_mask_fnmadd_round_ps(A, U, B, C, R) \
3158341825Sdim  (__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
3159341825Sdim                                          -(__v16sf)(__m512)(B), \
3160341825Sdim                                          (__v16sf)(__m512)(C), \
3161341825Sdim                                          (__mmask16)(U), (int)(R))
3162288943Sdim
3163288943Sdim
3164341825Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS512
3165288943Sdim_mm512_mask_fnmadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
3166288943Sdim{
3167341825Sdim  return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
3168341825Sdim                                                   -(__v16sf) __B,
3169341825Sdim                                                   (__v16sf) __C,
3170341825Sdim                                                   (__mmask16) __U,
3171341825Sdim                                                   _MM_FROUND_CUR_DIRECTION);
3172288943Sdim}
3173288943Sdim
3174341825Sdim#define _mm512_mask_fnmsub_round_pd(A, U, B, C, R) \
3175341825Sdim  (__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \
3176341825Sdim                                           -(__v8df)(__m512d)(B), \
3177341825Sdim                                           -(__v8df)(__m512d)(C), \
3178341825Sdim                                           (__mmask8)(U), (int)(R))
3179341825Sdim
3180341825Sdim
3181341825Sdim#define _mm512_mask3_fnmsub_round_pd(A, B, C, U, R) \
3182341825Sdim  (__m512d)__builtin_ia32_vfmsubpd512_mask3(-(__v8df)(__m512d)(A), \
3183309124Sdim                                            (__v8df)(__m512d)(B), \
3184309124Sdim                                            (__v8df)(__m512d)(C), \
3185341825Sdim                                            (__mmask8)(U), (int)(R))
3186288943Sdim
3187288943Sdim
3188341825Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS512
3189288943Sdim_mm512_mask_fnmsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
3190288943Sdim{
3191341825Sdim  return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
3192341825Sdim                                                    -(__v8df) __B,
3193341825Sdim                                                    -(__v8df) __C,
3194341825Sdim                                                    (__mmask8) __U,
3195341825Sdim                                                    _MM_FROUND_CUR_DIRECTION);
3196341825Sdim}
3197341825Sdim
3198341825Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS512
3199341825Sdim_mm512_mask3_fnmsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
3200341825Sdim{
3201341825Sdim  return (__m512d) __builtin_ia32_vfmsubpd512_mask3 (-(__v8df) __A,
3202288943Sdim                                                     (__v8df) __B,
3203288943Sdim                                                     (__v8df) __C,
3204288943Sdim                                                     (__mmask8) __U,
3205288943Sdim                                                     _MM_FROUND_CUR_DIRECTION);
3206288943Sdim}
3207288943Sdim
3208341825Sdim#define _mm512_mask_fnmsub_round_ps(A, U, B, C, R) \
3209341825Sdim  (__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
3210341825Sdim                                          -(__v16sf)(__m512)(B), \
3211341825Sdim                                          -(__v16sf)(__m512)(C), \
3212341825Sdim                                          (__mmask16)(U), (int)(R))
3213288943Sdim
3214341825Sdim
3215341825Sdim#define _mm512_mask3_fnmsub_round_ps(A, B, C, U, R) \
3216341825Sdim  (__m512)__builtin_ia32_vfmsubps512_mask3(-(__v16sf)(__m512)(A), \
3217309124Sdim                                           (__v16sf)(__m512)(B), \
3218309124Sdim                                           (__v16sf)(__m512)(C), \
3219341825Sdim                                           (__mmask16)(U), (int)(R))
3220288943Sdim
3221288943Sdim
3222341825Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS512
3223288943Sdim_mm512_mask_fnmsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
3224288943Sdim{
3225341825Sdim  return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
3226341825Sdim                                                   -(__v16sf) __B,
3227341825Sdim                                                   -(__v16sf) __C,
3228341825Sdim                                                   (__mmask16) __U,
3229341825Sdim                                                   _MM_FROUND_CUR_DIRECTION);
3230341825Sdim}
3231341825Sdim
3232341825Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS512
3233341825Sdim_mm512_mask3_fnmsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
3234341825Sdim{
3235341825Sdim  return (__m512) __builtin_ia32_vfmsubps512_mask3 (-(__v16sf) __A,
3236288943Sdim                                                    (__v16sf) __B,
3237288943Sdim                                                    (__v16sf) __C,
3238288943Sdim                                                    (__mmask16) __U,
3239288943Sdim                                                    _MM_FROUND_CUR_DIRECTION);
3240288943Sdim}
3241288943Sdim
3242288943Sdim
3243288943Sdim
3244277325Sdim/* Vector permutations */
3245277325Sdim
3246341825Sdimstatic __inline __m512i __DEFAULT_FN_ATTRS512
3247277325Sdim_mm512_permutex2var_epi32(__m512i __A, __m512i __I, __m512i __B)
3248277325Sdim{
3249341825Sdim  return (__m512i)__builtin_ia32_vpermi2vard512((__v16si)__A, (__v16si) __I,
3250341825Sdim                                                (__v16si) __B);
3251277325Sdim}
3252309124Sdim
3253341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
3254341825Sdim_mm512_mask_permutex2var_epi32(__m512i __A, __mmask16 __U, __m512i __I,
3255341825Sdim                               __m512i __B)
3256309124Sdim{
3257341825Sdim  return (__m512i)__builtin_ia32_selectd_512(__U,
3258341825Sdim                              (__v16si)_mm512_permutex2var_epi32(__A, __I, __B),
3259341825Sdim                              (__v16si)__A);
3260309124Sdim}
3261309124Sdim
3262341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
3263341825Sdim_mm512_mask2_permutex2var_epi32(__m512i __A, __m512i __I, __mmask16 __U,
3264341825Sdim                                __m512i __B)
3265309124Sdim{
3266341825Sdim  return (__m512i)__builtin_ia32_selectd_512(__U,
3267341825Sdim                              (__v16si)_mm512_permutex2var_epi32(__A, __I, __B),
3268341825Sdim                              (__v16si)__I);
3269309124Sdim}
3270309124Sdim
3271341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
3272341825Sdim_mm512_maskz_permutex2var_epi32(__mmask16 __U, __m512i __A, __m512i __I,
3273341825Sdim                                __m512i __B)
3274341825Sdim{
3275341825Sdim  return (__m512i)__builtin_ia32_selectd_512(__U,
3276341825Sdim                              (__v16si)_mm512_permutex2var_epi32(__A, __I, __B),
3277341825Sdim                              (__v16si)_mm512_setzero_si512());
3278341825Sdim}
3279341825Sdim
3280341825Sdimstatic __inline __m512i __DEFAULT_FN_ATTRS512
3281277325Sdim_mm512_permutex2var_epi64(__m512i __A, __m512i __I, __m512i __B)
3282277325Sdim{
3283341825Sdim  return (__m512i)__builtin_ia32_vpermi2varq512((__v8di)__A, (__v8di) __I,
3284341825Sdim                                                (__v8di) __B);
3285277325Sdim}
3286277325Sdim
3287341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
3288341825Sdim_mm512_mask_permutex2var_epi64(__m512i __A, __mmask8 __U, __m512i __I,
3289341825Sdim                               __m512i __B)
3290341825Sdim{
3291341825Sdim  return (__m512i)__builtin_ia32_selectq_512(__U,
3292341825Sdim                               (__v8di)_mm512_permutex2var_epi64(__A, __I, __B),
3293341825Sdim                               (__v8di)__A);
3294341825Sdim}
3295341825Sdim
3296341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
3297341825Sdim_mm512_mask2_permutex2var_epi64(__m512i __A, __m512i __I, __mmask8 __U,
3298309124Sdim                                __m512i __B)
3299277325Sdim{
3300341825Sdim  return (__m512i)__builtin_ia32_selectq_512(__U,
3301341825Sdim                               (__v8di)_mm512_permutex2var_epi64(__A, __I, __B),
3302341825Sdim                               (__v8di)__I);
3303277325Sdim}
3304309124Sdim
3305341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
3306341825Sdim_mm512_maskz_permutex2var_epi64(__mmask8 __U, __m512i __A, __m512i __I,
3307341825Sdim                                __m512i __B)
3308277325Sdim{
3309341825Sdim  return (__m512i)__builtin_ia32_selectq_512(__U,
3310341825Sdim                               (__v8di)_mm512_permutex2var_epi64(__A, __I, __B),
3311341825Sdim                               (__v8di)_mm512_setzero_si512());
3312277325Sdim}
3313277325Sdim
3314341825Sdim#define _mm512_alignr_epi64(A, B, I) \
3315341825Sdim  (__m512i)__builtin_ia32_alignq512((__v8di)(__m512i)(A), \
3316341825Sdim                                    (__v8di)(__m512i)(B), (int)(I))
3317277325Sdim
3318341825Sdim#define _mm512_mask_alignr_epi64(W, U, A, B, imm) \
3319314564Sdim  (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
3320314564Sdim                                 (__v8di)_mm512_alignr_epi64((A), (B), (imm)), \
3321341825Sdim                                 (__v8di)(__m512i)(W))
3322309124Sdim
3323341825Sdim#define _mm512_maskz_alignr_epi64(U, A, B, imm) \
3324314564Sdim  (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
3325314564Sdim                                 (__v8di)_mm512_alignr_epi64((A), (B), (imm)), \
3326341825Sdim                                 (__v8di)_mm512_setzero_si512())
3327309124Sdim
3328341825Sdim#define _mm512_alignr_epi32(A, B, I) \
3329341825Sdim  (__m512i)__builtin_ia32_alignd512((__v16si)(__m512i)(A), \
3330341825Sdim                                    (__v16si)(__m512i)(B), (int)(I))
3331277325Sdim
3332341825Sdim#define _mm512_mask_alignr_epi32(W, U, A, B, imm) \
3333314564Sdim  (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
3334314564Sdim                                (__v16si)_mm512_alignr_epi32((A), (B), (imm)), \
3335341825Sdim                                (__v16si)(__m512i)(W))
3336309124Sdim
3337341825Sdim#define _mm512_maskz_alignr_epi32(U, A, B, imm) \
3338314564Sdim  (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
3339314564Sdim                                (__v16si)_mm512_alignr_epi32((A), (B), (imm)), \
3340341825Sdim                                (__v16si)_mm512_setzero_si512())
3341288943Sdim/* Vector Extract */
3342288943Sdim
3343341825Sdim#define _mm512_extractf64x4_pd(A, I) \
3344341825Sdim  (__m256d)__builtin_ia32_extractf64x4_mask((__v8df)(__m512d)(A), (int)(I), \
3345341825Sdim                                            (__v4df)_mm256_undefined_pd(), \
3346341825Sdim                                            (__mmask8)-1)
3347288943Sdim
3348341825Sdim#define _mm512_mask_extractf64x4_pd(W, U, A, imm) \
3349341825Sdim  (__m256d)__builtin_ia32_extractf64x4_mask((__v8df)(__m512d)(A), (int)(imm), \
3350341825Sdim                                            (__v4df)(__m256d)(W), \
3351341825Sdim                                            (__mmask8)(U))
3352309124Sdim
3353341825Sdim#define _mm512_maskz_extractf64x4_pd(U, A, imm) \
3354341825Sdim  (__m256d)__builtin_ia32_extractf64x4_mask((__v8df)(__m512d)(A), (int)(imm), \
3355341825Sdim                                            (__v4df)_mm256_setzero_pd(), \
3356341825Sdim                                            (__mmask8)(U))
3357309124Sdim
3358341825Sdim#define _mm512_extractf32x4_ps(A, I) \
3359341825Sdim  (__m128)__builtin_ia32_extractf32x4_mask((__v16sf)(__m512)(A), (int)(I), \
3360341825Sdim                                           (__v4sf)_mm_undefined_ps(), \
3361341825Sdim                                           (__mmask8)-1)
3362288943Sdim
3363341825Sdim#define _mm512_mask_extractf32x4_ps(W, U, A, imm) \
3364341825Sdim  (__m128)__builtin_ia32_extractf32x4_mask((__v16sf)(__m512)(A), (int)(imm), \
3365341825Sdim                                           (__v4sf)(__m128)(W), \
3366341825Sdim                                           (__mmask8)(U))
3367309124Sdim
3368341825Sdim#define _mm512_maskz_extractf32x4_ps(U, A, imm) \
3369341825Sdim  (__m128)__builtin_ia32_extractf32x4_mask((__v16sf)(__m512)(A), (int)(imm), \
3370341825Sdim                                           (__v4sf)_mm_setzero_ps(), \
3371341825Sdim                                           (__mmask8)(U))
3372314564Sdim
3373277325Sdim/* Vector Blend */
3374277325Sdim
3375341825Sdimstatic __inline __m512d __DEFAULT_FN_ATTRS512
3376277325Sdim_mm512_mask_blend_pd(__mmask8 __U, __m512d __A, __m512d __W)
3377277325Sdim{
3378309124Sdim  return (__m512d) __builtin_ia32_selectpd_512 ((__mmask8) __U,
3379277325Sdim                 (__v8df) __W,
3380309124Sdim                 (__v8df) __A);
3381277325Sdim}
3382277325Sdim
3383341825Sdimstatic __inline __m512 __DEFAULT_FN_ATTRS512
3384277325Sdim_mm512_mask_blend_ps(__mmask16 __U, __m512 __A, __m512 __W)
3385277325Sdim{
3386309124Sdim  return (__m512) __builtin_ia32_selectps_512 ((__mmask16) __U,
3387277325Sdim                (__v16sf) __W,
3388309124Sdim                (__v16sf) __A);
3389277325Sdim}
3390277325Sdim
3391341825Sdimstatic __inline __m512i __DEFAULT_FN_ATTRS512
3392277325Sdim_mm512_mask_blend_epi64(__mmask8 __U, __m512i __A, __m512i __W)
3393277325Sdim{
3394309124Sdim  return (__m512i) __builtin_ia32_selectq_512 ((__mmask8) __U,
3395277325Sdim                (__v8di) __W,
3396309124Sdim                (__v8di) __A);
3397277325Sdim}
3398277325Sdim
3399341825Sdimstatic __inline __m512i __DEFAULT_FN_ATTRS512
3400277325Sdim_mm512_mask_blend_epi32(__mmask16 __U, __m512i __A, __m512i __W)
3401277325Sdim{
3402309124Sdim  return (__m512i) __builtin_ia32_selectd_512 ((__mmask16) __U,
3403277325Sdim                (__v16si) __W,
3404309124Sdim                (__v16si) __A);
3405277325Sdim}
3406277325Sdim
3407277325Sdim/* Compare */
3408277325Sdim
3409341825Sdim#define _mm512_cmp_round_ps_mask(A, B, P, R) \
3410288943Sdim  (__mmask16)__builtin_ia32_cmpps512_mask((__v16sf)(__m512)(A), \
3411309124Sdim                                          (__v16sf)(__m512)(B), (int)(P), \
3412341825Sdim                                          (__mmask16)-1, (int)(R))
3413277325Sdim
3414341825Sdim#define _mm512_mask_cmp_round_ps_mask(U, A, B, P, R) \
3415288943Sdim  (__mmask16)__builtin_ia32_cmpps512_mask((__v16sf)(__m512)(A), \
3416309124Sdim                                          (__v16sf)(__m512)(B), (int)(P), \
3417341825Sdim                                          (__mmask16)(U), (int)(R))
3418277325Sdim
3419288943Sdim#define _mm512_cmp_ps_mask(A, B, P) \
3420288943Sdim  _mm512_cmp_round_ps_mask((A), (B), (P), _MM_FROUND_CUR_DIRECTION)
3421288943Sdim#define _mm512_mask_cmp_ps_mask(U, A, B, P) \
3422288943Sdim  _mm512_mask_cmp_round_ps_mask((U), (A), (B), (P), _MM_FROUND_CUR_DIRECTION)
3423288943Sdim
3424314564Sdim#define _mm512_cmpeq_ps_mask(A, B) \
3425314564Sdim    _mm512_cmp_ps_mask((A), (B), _CMP_EQ_OQ)
3426314564Sdim#define _mm512_mask_cmpeq_ps_mask(k, A, B) \
3427314564Sdim    _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_EQ_OQ)
3428314564Sdim
3429314564Sdim#define _mm512_cmplt_ps_mask(A, B) \
3430314564Sdim    _mm512_cmp_ps_mask((A), (B), _CMP_LT_OS)
3431314564Sdim#define _mm512_mask_cmplt_ps_mask(k, A, B) \
3432314564Sdim    _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_LT_OS)
3433314564Sdim
3434314564Sdim#define _mm512_cmple_ps_mask(A, B) \
3435314564Sdim    _mm512_cmp_ps_mask((A), (B), _CMP_LE_OS)
3436314564Sdim#define _mm512_mask_cmple_ps_mask(k, A, B) \
3437314564Sdim    _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_LE_OS)
3438314564Sdim
3439314564Sdim#define _mm512_cmpunord_ps_mask(A, B) \
3440314564Sdim    _mm512_cmp_ps_mask((A), (B), _CMP_UNORD_Q)
3441314564Sdim#define _mm512_mask_cmpunord_ps_mask(k, A, B) \
3442314564Sdim    _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_UNORD_Q)
3443314564Sdim
3444314564Sdim#define _mm512_cmpneq_ps_mask(A, B) \
3445314564Sdim    _mm512_cmp_ps_mask((A), (B), _CMP_NEQ_UQ)
3446314564Sdim#define _mm512_mask_cmpneq_ps_mask(k, A, B) \
3447314564Sdim    _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_NEQ_UQ)
3448314564Sdim
3449314564Sdim#define _mm512_cmpnlt_ps_mask(A, B) \
3450314564Sdim    _mm512_cmp_ps_mask((A), (B), _CMP_NLT_US)
3451314564Sdim#define _mm512_mask_cmpnlt_ps_mask(k, A, B) \
3452314564Sdim    _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_NLT_US)
3453314564Sdim
3454314564Sdim#define _mm512_cmpnle_ps_mask(A, B) \
3455314564Sdim    _mm512_cmp_ps_mask((A), (B), _CMP_NLE_US)
3456314564Sdim#define _mm512_mask_cmpnle_ps_mask(k, A, B) \
3457314564Sdim    _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_NLE_US)
3458314564Sdim
3459314564Sdim#define _mm512_cmpord_ps_mask(A, B) \
3460314564Sdim    _mm512_cmp_ps_mask((A), (B), _CMP_ORD_Q)
3461314564Sdim#define _mm512_mask_cmpord_ps_mask(k, A, B) \
3462314564Sdim    _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_ORD_Q)
3463314564Sdim
3464341825Sdim#define _mm512_cmp_round_pd_mask(A, B, P, R) \
3465288943Sdim  (__mmask8)__builtin_ia32_cmppd512_mask((__v8df)(__m512d)(A), \
3466309124Sdim                                         (__v8df)(__m512d)(B), (int)(P), \
3467341825Sdim                                         (__mmask8)-1, (int)(R))
3468288943Sdim
3469341825Sdim#define _mm512_mask_cmp_round_pd_mask(U, A, B, P, R) \
3470288943Sdim  (__mmask8)__builtin_ia32_cmppd512_mask((__v8df)(__m512d)(A), \
3471309124Sdim                                         (__v8df)(__m512d)(B), (int)(P), \
3472341825Sdim                                         (__mmask8)(U), (int)(R))
3473288943Sdim
3474288943Sdim#define _mm512_cmp_pd_mask(A, B, P) \
3475288943Sdim  _mm512_cmp_round_pd_mask((A), (B), (P), _MM_FROUND_CUR_DIRECTION)
3476288943Sdim#define _mm512_mask_cmp_pd_mask(U, A, B, P) \
3477288943Sdim  _mm512_mask_cmp_round_pd_mask((U), (A), (B), (P), _MM_FROUND_CUR_DIRECTION)
3478288943Sdim
3479314564Sdim#define _mm512_cmpeq_pd_mask(A, B) \
3480314564Sdim    _mm512_cmp_pd_mask((A), (B), _CMP_EQ_OQ)
3481314564Sdim#define _mm512_mask_cmpeq_pd_mask(k, A, B) \
3482314564Sdim    _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_EQ_OQ)
3483314564Sdim
3484314564Sdim#define _mm512_cmplt_pd_mask(A, B) \
3485314564Sdim    _mm512_cmp_pd_mask((A), (B), _CMP_LT_OS)
3486314564Sdim#define _mm512_mask_cmplt_pd_mask(k, A, B) \
3487314564Sdim    _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_LT_OS)
3488314564Sdim
3489314564Sdim#define _mm512_cmple_pd_mask(A, B) \
3490314564Sdim    _mm512_cmp_pd_mask((A), (B), _CMP_LE_OS)
3491314564Sdim#define _mm512_mask_cmple_pd_mask(k, A, B) \
3492314564Sdim    _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_LE_OS)
3493314564Sdim
3494314564Sdim#define _mm512_cmpunord_pd_mask(A, B) \
3495314564Sdim    _mm512_cmp_pd_mask((A), (B), _CMP_UNORD_Q)
3496314564Sdim#define _mm512_mask_cmpunord_pd_mask(k, A, B) \
3497314564Sdim    _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_UNORD_Q)
3498314564Sdim
3499314564Sdim#define _mm512_cmpneq_pd_mask(A, B) \
3500314564Sdim    _mm512_cmp_pd_mask((A), (B), _CMP_NEQ_UQ)
3501314564Sdim#define _mm512_mask_cmpneq_pd_mask(k, A, B) \
3502314564Sdim    _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_NEQ_UQ)
3503314564Sdim
3504314564Sdim#define _mm512_cmpnlt_pd_mask(A, B) \
3505314564Sdim    _mm512_cmp_pd_mask((A), (B), _CMP_NLT_US)
3506314564Sdim#define _mm512_mask_cmpnlt_pd_mask(k, A, B) \
3507314564Sdim    _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_NLT_US)
3508314564Sdim
3509314564Sdim#define _mm512_cmpnle_pd_mask(A, B) \
3510314564Sdim    _mm512_cmp_pd_mask((A), (B), _CMP_NLE_US)
3511314564Sdim#define _mm512_mask_cmpnle_pd_mask(k, A, B) \
3512314564Sdim    _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_NLE_US)
3513314564Sdim
3514314564Sdim#define _mm512_cmpord_pd_mask(A, B) \
3515314564Sdim    _mm512_cmp_pd_mask((A), (B), _CMP_ORD_Q)
3516314564Sdim#define _mm512_mask_cmpord_pd_mask(k, A, B) \
3517314564Sdim    _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_ORD_Q)
3518314564Sdim
3519277325Sdim/* Conversion */
3520277325Sdim
3521341825Sdim#define _mm512_cvtt_roundps_epu32(A, R) \
3522309124Sdim  (__m512i)__builtin_ia32_cvttps2udq512_mask((__v16sf)(__m512)(A), \
3523309124Sdim                                             (__v16si)_mm512_undefined_epi32(), \
3524341825Sdim                                             (__mmask16)-1, (int)(R))
3525309124Sdim
3526341825Sdim#define _mm512_mask_cvtt_roundps_epu32(W, U, A, R) \
3527309124Sdim  (__m512i)__builtin_ia32_cvttps2udq512_mask((__v16sf)(__m512)(A), \
3528309124Sdim                                             (__v16si)(__m512i)(W), \
3529341825Sdim                                             (__mmask16)(U), (int)(R))
3530309124Sdim
3531341825Sdim#define _mm512_maskz_cvtt_roundps_epu32(U, A, R) \
3532309124Sdim  (__m512i)__builtin_ia32_cvttps2udq512_mask((__v16sf)(__m512)(A), \
3533309124Sdim                                             (__v16si)_mm512_setzero_si512(), \
3534341825Sdim                                             (__mmask16)(U), (int)(R))
3535309124Sdim
3536309124Sdim
3537341825Sdimstatic __inline __m512i __DEFAULT_FN_ATTRS512
3538277325Sdim_mm512_cvttps_epu32(__m512 __A)
3539277325Sdim{
3540277325Sdim  return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
3541277325Sdim                  (__v16si)
3542277325Sdim                  _mm512_setzero_si512 (),
3543277325Sdim                  (__mmask16) -1,
3544277325Sdim                  _MM_FROUND_CUR_DIRECTION);
3545277325Sdim}
3546277325Sdim
3547341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
3548309124Sdim_mm512_mask_cvttps_epu32 (__m512i __W, __mmask16 __U, __m512 __A)
3549309124Sdim{
3550309124Sdim  return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
3551309124Sdim                   (__v16si) __W,
3552309124Sdim                   (__mmask16) __U,
3553309124Sdim                   _MM_FROUND_CUR_DIRECTION);
3554309124Sdim}
3555309124Sdim
3556341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
3557309124Sdim_mm512_maskz_cvttps_epu32 (__mmask16 __U, __m512 __A)
3558309124Sdim{
3559309124Sdim  return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
3560309124Sdim                   (__v16si) _mm512_setzero_si512 (),
3561309124Sdim                   (__mmask16) __U,
3562309124Sdim                   _MM_FROUND_CUR_DIRECTION);
3563309124Sdim}
3564309124Sdim
3565341825Sdim#define _mm512_cvt_roundepi32_ps(A, R) \
3566309124Sdim  (__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(__m512i)(A), \
3567288943Sdim                                          (__v16sf)_mm512_setzero_ps(), \
3568341825Sdim                                          (__mmask16)-1, (int)(R))
3569277325Sdim
3570341825Sdim#define _mm512_mask_cvt_roundepi32_ps(W, U, A, R) \
3571309124Sdim  (__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(__m512i)(A), \
3572309124Sdim                                          (__v16sf)(__m512)(W), \
3573341825Sdim                                          (__mmask16)(U), (int)(R))
3574309124Sdim
3575341825Sdim#define _mm512_maskz_cvt_roundepi32_ps(U, A, R) \
3576309124Sdim  (__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(__m512i)(A), \
3577309124Sdim                                          (__v16sf)_mm512_setzero_ps(), \
3578341825Sdim                                          (__mmask16)(U), (int)(R))
3579309124Sdim
3580341825Sdim#define _mm512_cvt_roundepu32_ps(A, R) \
3581309124Sdim  (__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(__m512i)(A), \
3582288943Sdim                                           (__v16sf)_mm512_setzero_ps(), \
3583341825Sdim                                           (__mmask16)-1, (int)(R))
3584277325Sdim
3585341825Sdim#define _mm512_mask_cvt_roundepu32_ps(W, U, A, R) \
3586309124Sdim  (__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(__m512i)(A), \
3587309124Sdim                                           (__v16sf)(__m512)(W), \
3588341825Sdim                                           (__mmask16)(U), (int)(R))
3589309124Sdim
3590341825Sdim#define _mm512_maskz_cvt_roundepu32_ps(U, A, R) \
3591309124Sdim  (__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(__m512i)(A), \
3592309124Sdim                                           (__v16sf)_mm512_setzero_ps(), \
3593341825Sdim                                           (__mmask16)(U), (int)(R))
3594309124Sdim
3595341825Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS512
3596309124Sdim_mm512_cvtepu32_ps (__m512i __A)
3597309124Sdim{
3598341825Sdim  return (__m512)__builtin_convertvector((__v16su)__A, __v16sf);
3599309124Sdim}
3600309124Sdim
3601341825Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS512
3602309124Sdim_mm512_mask_cvtepu32_ps (__m512 __W, __mmask16 __U, __m512i __A)
3603309124Sdim{
3604341825Sdim  return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
3605341825Sdim                                             (__v16sf)_mm512_cvtepu32_ps(__A),
3606341825Sdim                                             (__v16sf)__W);
3607309124Sdim}
3608309124Sdim
3609341825Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS512
3610309124Sdim_mm512_maskz_cvtepu32_ps (__mmask16 __U, __m512i __A)
3611309124Sdim{
3612341825Sdim  return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
3613341825Sdim                                             (__v16sf)_mm512_cvtepu32_ps(__A),
3614341825Sdim                                             (__v16sf)_mm512_setzero_ps());
3615309124Sdim}
3616309124Sdim
3617341825Sdimstatic __inline __m512d __DEFAULT_FN_ATTRS512
3618277325Sdim_mm512_cvtepi32_pd(__m256i __A)
3619277325Sdim{
3620314564Sdim  return (__m512d)__builtin_convertvector((__v8si)__A, __v8df);
3621277325Sdim}
3622277325Sdim
3623341825Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS512
3624309124Sdim_mm512_mask_cvtepi32_pd (__m512d __W, __mmask8 __U, __m256i __A)
3625309124Sdim{
3626314564Sdim  return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
3627314564Sdim                                              (__v8df)_mm512_cvtepi32_pd(__A),
3628314564Sdim                                              (__v8df)__W);
3629309124Sdim}
3630309124Sdim
3631341825Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS512
3632309124Sdim_mm512_maskz_cvtepi32_pd (__mmask8 __U, __m256i __A)
3633309124Sdim{
3634314564Sdim  return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
3635314564Sdim                                              (__v8df)_mm512_cvtepi32_pd(__A),
3636314564Sdim                                              (__v8df)_mm512_setzero_pd());
3637309124Sdim}
3638309124Sdim
3639341825Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS512
3640314564Sdim_mm512_cvtepi32lo_pd(__m512i __A)
3641314564Sdim{
3642314564Sdim  return (__m512d) _mm512_cvtepi32_pd(_mm512_castsi512_si256(__A));
3643314564Sdim}
3644314564Sdim
3645341825Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS512
3646314564Sdim_mm512_mask_cvtepi32lo_pd(__m512d __W, __mmask8 __U,__m512i __A)
3647314564Sdim{
3648314564Sdim  return (__m512d) _mm512_mask_cvtepi32_pd(__W, __U, _mm512_castsi512_si256(__A));
3649314564Sdim}
3650314564Sdim
3651341825Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS512
3652309124Sdim_mm512_cvtepi32_ps (__m512i __A)
3653309124Sdim{
3654341825Sdim  return (__m512)__builtin_convertvector((__v16si)__A, __v16sf);
3655309124Sdim}
3656309124Sdim
3657341825Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS512
3658309124Sdim_mm512_mask_cvtepi32_ps (__m512 __W, __mmask16 __U, __m512i __A)
3659309124Sdim{
3660341825Sdim  return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
3661341825Sdim                                             (__v16sf)_mm512_cvtepi32_ps(__A),
3662341825Sdim                                             (__v16sf)__W);
3663309124Sdim}
3664309124Sdim
3665341825Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS512
3666309124Sdim_mm512_maskz_cvtepi32_ps (__mmask16 __U, __m512i __A)
3667309124Sdim{
3668341825Sdim  return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
3669341825Sdim                                             (__v16sf)_mm512_cvtepi32_ps(__A),
3670341825Sdim                                             (__v16sf)_mm512_setzero_ps());
3671309124Sdim}
3672309124Sdim
3673341825Sdimstatic __inline __m512d __DEFAULT_FN_ATTRS512
3674277325Sdim_mm512_cvtepu32_pd(__m256i __A)
3675277325Sdim{
3676314564Sdim  return (__m512d)__builtin_convertvector((__v8su)__A, __v8df);
3677277325Sdim}
3678277325Sdim
3679341825Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS512
3680309124Sdim_mm512_mask_cvtepu32_pd (__m512d __W, __mmask8 __U, __m256i __A)
3681309124Sdim{
3682314564Sdim  return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
3683314564Sdim                                              (__v8df)_mm512_cvtepu32_pd(__A),
3684314564Sdim                                              (__v8df)__W);
3685309124Sdim}
3686309124Sdim
3687341825Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS512
3688309124Sdim_mm512_maskz_cvtepu32_pd (__mmask8 __U, __m256i __A)
3689309124Sdim{
3690314564Sdim  return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
3691314564Sdim                                              (__v8df)_mm512_cvtepu32_pd(__A),
3692314564Sdim                                              (__v8df)_mm512_setzero_pd());
3693309124Sdim}
3694309124Sdim
3695341825Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS512
3696314564Sdim_mm512_cvtepu32lo_pd(__m512i __A)
3697314564Sdim{
3698314564Sdim  return (__m512d) _mm512_cvtepu32_pd(_mm512_castsi512_si256(__A));
3699314564Sdim}
3700314564Sdim
3701341825Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS512
3702314564Sdim_mm512_mask_cvtepu32lo_pd(__m512d __W, __mmask8 __U,__m512i __A)
3703314564Sdim{
3704314564Sdim  return (__m512d) _mm512_mask_cvtepu32_pd(__W, __U, _mm512_castsi512_si256(__A));
3705314564Sdim}
3706314564Sdim
3707341825Sdim#define _mm512_cvt_roundpd_ps(A, R) \
3708309124Sdim  (__m256)__builtin_ia32_cvtpd2ps512_mask((__v8df)(__m512d)(A), \
3709288943Sdim                                          (__v8sf)_mm256_setzero_ps(), \
3710341825Sdim                                          (__mmask8)-1, (int)(R))
3711277325Sdim
3712341825Sdim#define _mm512_mask_cvt_roundpd_ps(W, U, A, R) \
3713309124Sdim  (__m256)__builtin_ia32_cvtpd2ps512_mask((__v8df)(__m512d)(A), \
3714309124Sdim                                          (__v8sf)(__m256)(W), (__mmask8)(U), \
3715341825Sdim                                          (int)(R))
3716309124Sdim
3717341825Sdim#define _mm512_maskz_cvt_roundpd_ps(U, A, R) \
3718309124Sdim  (__m256)__builtin_ia32_cvtpd2ps512_mask((__v8df)(__m512d)(A), \
3719309124Sdim                                          (__v8sf)_mm256_setzero_ps(), \
3720341825Sdim                                          (__mmask8)(U), (int)(R))
3721309124Sdim
3722341825Sdimstatic __inline__ __m256 __DEFAULT_FN_ATTRS512
3723309124Sdim_mm512_cvtpd_ps (__m512d __A)
3724309124Sdim{
3725309124Sdim  return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
3726309124Sdim                (__v8sf) _mm256_undefined_ps (),
3727309124Sdim                (__mmask8) -1,
3728309124Sdim                _MM_FROUND_CUR_DIRECTION);
3729309124Sdim}
3730309124Sdim
3731341825Sdimstatic __inline__ __m256 __DEFAULT_FN_ATTRS512
3732309124Sdim_mm512_mask_cvtpd_ps (__m256 __W, __mmask8 __U, __m512d __A)
3733309124Sdim{
3734309124Sdim  return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
3735309124Sdim                (__v8sf) __W,
3736309124Sdim                (__mmask8) __U,
3737309124Sdim                _MM_FROUND_CUR_DIRECTION);
3738309124Sdim}
3739309124Sdim
3740341825Sdimstatic __inline__ __m256 __DEFAULT_FN_ATTRS512
3741309124Sdim_mm512_maskz_cvtpd_ps (__mmask8 __U, __m512d __A)
3742309124Sdim{
3743309124Sdim  return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
3744309124Sdim                (__v8sf) _mm256_setzero_ps (),
3745309124Sdim                (__mmask8) __U,
3746309124Sdim                _MM_FROUND_CUR_DIRECTION);
3747309124Sdim}
3748309124Sdim
3749341825Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS512
3750314564Sdim_mm512_cvtpd_pslo (__m512d __A)
3751314564Sdim{
3752314564Sdim  return (__m512) __builtin_shufflevector((__v8sf) _mm512_cvtpd_ps(__A),
3753314564Sdim                (__v8sf) _mm256_setzero_ps (),
3754314564Sdim                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
3755314564Sdim}
3756314564Sdim
3757341825Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS512
3758314564Sdim_mm512_mask_cvtpd_pslo (__m512 __W, __mmask8 __U,__m512d __A)
3759314564Sdim{
3760314564Sdim  return (__m512) __builtin_shufflevector (
3761314564Sdim                (__v8sf) _mm512_mask_cvtpd_ps (_mm512_castps512_ps256(__W),
3762314564Sdim                                               __U, __A),
3763314564Sdim                (__v8sf) _mm256_setzero_ps (),
3764314564Sdim                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
3765314564Sdim}
3766314564Sdim
3767341825Sdim#define _mm512_cvt_roundps_ph(A, I) \
3768309124Sdim  (__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \
3769309124Sdim                                            (__v16hi)_mm256_undefined_si256(), \
3770341825Sdim                                            (__mmask16)-1)
3771309124Sdim
3772341825Sdim#define _mm512_mask_cvt_roundps_ph(U, W, A, I) \
3773309124Sdim  (__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \
3774309124Sdim                                            (__v16hi)(__m256i)(U), \
3775341825Sdim                                            (__mmask16)(W))
3776309124Sdim
3777341825Sdim#define _mm512_maskz_cvt_roundps_ph(W, A, I) \
3778309124Sdim  (__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \
3779309124Sdim                                            (__v16hi)_mm256_setzero_si256(), \
3780341825Sdim                                            (__mmask16)(W))
3781309124Sdim
3782353358Sdim#define _mm512_cvtps_ph       _mm512_cvt_roundps_ph
3783353358Sdim#define _mm512_mask_cvtps_ph  _mm512_mask_cvt_roundps_ph
3784353358Sdim#define _mm512_maskz_cvtps_ph _mm512_maskz_cvt_roundps_ph
3785288943Sdim
3786341825Sdim#define _mm512_cvt_roundph_ps(A, R) \
3787309124Sdim  (__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(__m256i)(A), \
3788309124Sdim                                           (__v16sf)_mm512_undefined_ps(), \
3789341825Sdim                                           (__mmask16)-1, (int)(R))
3790309124Sdim
3791341825Sdim#define _mm512_mask_cvt_roundph_ps(W, U, A, R) \
3792309124Sdim  (__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(__m256i)(A), \
3793309124Sdim                                           (__v16sf)(__m512)(W), \
3794341825Sdim                                           (__mmask16)(U), (int)(R))
3795309124Sdim
3796341825Sdim#define _mm512_maskz_cvt_roundph_ps(U, A, R) \
3797309124Sdim  (__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(__m256i)(A), \
3798309124Sdim                                           (__v16sf)_mm512_setzero_ps(), \
3799341825Sdim                                           (__mmask16)(U), (int)(R))
3800309124Sdim
3801309124Sdim
3802341825Sdimstatic  __inline __m512 __DEFAULT_FN_ATTRS512
3803277325Sdim_mm512_cvtph_ps(__m256i __A)
3804277325Sdim{
3805277325Sdim  return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
3806277325Sdim                (__v16sf)
3807277325Sdim                _mm512_setzero_ps (),
3808277325Sdim                (__mmask16) -1,
3809277325Sdim                _MM_FROUND_CUR_DIRECTION);
3810277325Sdim}
3811277325Sdim
3812341825Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS512
3813309124Sdim_mm512_mask_cvtph_ps (__m512 __W, __mmask16 __U, __m256i __A)
3814277325Sdim{
3815309124Sdim  return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
3816309124Sdim                 (__v16sf) __W,
3817309124Sdim                 (__mmask16) __U,
3818309124Sdim                 _MM_FROUND_CUR_DIRECTION);
3819277325Sdim}
3820277325Sdim
3821341825Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS512
3822309124Sdim_mm512_maskz_cvtph_ps (__mmask16 __U, __m256i __A)
3823309124Sdim{
3824309124Sdim  return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
3825309124Sdim                 (__v16sf) _mm512_setzero_ps (),
3826309124Sdim                 (__mmask16) __U,
3827309124Sdim                 _MM_FROUND_CUR_DIRECTION);
3828309124Sdim}
3829309124Sdim
3830341825Sdim#define _mm512_cvtt_roundpd_epi32(A, R) \
3831309124Sdim  (__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df)(__m512d)(A), \
3832309124Sdim                                            (__v8si)_mm256_setzero_si256(), \
3833341825Sdim                                            (__mmask8)-1, (int)(R))
3834309124Sdim
3835341825Sdim#define _mm512_mask_cvtt_roundpd_epi32(W, U, A, R) \
3836309124Sdim  (__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df)(__m512d)(A), \
3837309124Sdim                                            (__v8si)(__m256i)(W), \
3838341825Sdim                                            (__mmask8)(U), (int)(R))
3839309124Sdim
3840341825Sdim#define _mm512_maskz_cvtt_roundpd_epi32(U, A, R) \
3841309124Sdim  (__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df)(__m512d)(A), \
3842309124Sdim                                            (__v8si)_mm256_setzero_si256(), \
3843341825Sdim                                            (__mmask8)(U), (int)(R))
3844309124Sdim
3845341825Sdimstatic __inline __m256i __DEFAULT_FN_ATTRS512
3846296417Sdim_mm512_cvttpd_epi32(__m512d __a)
3847277325Sdim{
3848296417Sdim  return (__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df) __a,
3849277325Sdim                                                   (__v8si)_mm256_setzero_si256(),
3850277325Sdim                                                   (__mmask8) -1,
3851277325Sdim                                                    _MM_FROUND_CUR_DIRECTION);
3852277325Sdim}
3853277325Sdim
3854341825Sdimstatic __inline__ __m256i __DEFAULT_FN_ATTRS512
3855309124Sdim_mm512_mask_cvttpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A)
3856309124Sdim{
3857309124Sdim  return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
3858309124Sdim                  (__v8si) __W,
3859309124Sdim                  (__mmask8) __U,
3860309124Sdim                  _MM_FROUND_CUR_DIRECTION);
3861309124Sdim}
3862277325Sdim
3863341825Sdimstatic __inline__ __m256i __DEFAULT_FN_ATTRS512
3864309124Sdim_mm512_maskz_cvttpd_epi32 (__mmask8 __U, __m512d __A)
3865309124Sdim{
3866309124Sdim  return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
3867309124Sdim                  (__v8si) _mm256_setzero_si256 (),
3868309124Sdim                  (__mmask8) __U,
3869309124Sdim                  _MM_FROUND_CUR_DIRECTION);
3870309124Sdim}
3871309124Sdim
3872341825Sdim#define _mm512_cvtt_roundps_epi32(A, R) \
3873309124Sdim  (__m512i)__builtin_ia32_cvttps2dq512_mask((__v16sf)(__m512)(A), \
3874288943Sdim                                            (__v16si)_mm512_setzero_si512(), \
3875341825Sdim                                            (__mmask16)-1, (int)(R))
3876277325Sdim
3877341825Sdim#define _mm512_mask_cvtt_roundps_epi32(W, U, A, R) \
3878309124Sdim  (__m512i)__builtin_ia32_cvttps2dq512_mask((__v16sf)(__m512)(A), \
3879309124Sdim                                            (__v16si)(__m512i)(W), \
3880341825Sdim                                            (__mmask16)(U), (int)(R))
3881309124Sdim
3882341825Sdim#define _mm512_maskz_cvtt_roundps_epi32(U, A, R) \
3883309124Sdim  (__m512i)__builtin_ia32_cvttps2dq512_mask((__v16sf)(__m512)(A), \
3884309124Sdim                                            (__v16si)_mm512_setzero_si512(), \
3885341825Sdim                                            (__mmask16)(U), (int)(R))
3886309124Sdim
3887341825Sdimstatic __inline __m512i __DEFAULT_FN_ATTRS512
3888309124Sdim_mm512_cvttps_epi32(__m512 __a)
3889309124Sdim{
3890309124Sdim  return (__m512i)
3891309124Sdim    __builtin_ia32_cvttps2dq512_mask((__v16sf) __a,
3892309124Sdim                                     (__v16si) _mm512_setzero_si512 (),
3893309124Sdim                                     (__mmask16) -1, _MM_FROUND_CUR_DIRECTION);
3894309124Sdim}
3895309124Sdim
3896341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
3897309124Sdim_mm512_mask_cvttps_epi32 (__m512i __W, __mmask16 __U, __m512 __A)
3898309124Sdim{
3899309124Sdim  return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
3900309124Sdim                  (__v16si) __W,
3901309124Sdim                  (__mmask16) __U,
3902309124Sdim                  _MM_FROUND_CUR_DIRECTION);
3903309124Sdim}
3904309124Sdim
3905341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
3906309124Sdim_mm512_maskz_cvttps_epi32 (__mmask16 __U, __m512 __A)
3907309124Sdim{
3908309124Sdim  return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
3909309124Sdim                  (__v16si) _mm512_setzero_si512 (),
3910309124Sdim                  (__mmask16) __U,
3911309124Sdim                  _MM_FROUND_CUR_DIRECTION);
3912309124Sdim}
3913309124Sdim
3914341825Sdim#define _mm512_cvt_roundps_epi32(A, R) \
3915309124Sdim  (__m512i)__builtin_ia32_cvtps2dq512_mask((__v16sf)(__m512)(A), \
3916288943Sdim                                           (__v16si)_mm512_setzero_si512(), \
3917341825Sdim                                           (__mmask16)-1, (int)(R))
3918288943Sdim
3919341825Sdim#define _mm512_mask_cvt_roundps_epi32(W, U, A, R) \
3920309124Sdim  (__m512i)__builtin_ia32_cvtps2dq512_mask((__v16sf)(__m512)(A), \
3921309124Sdim                                           (__v16si)(__m512i)(W), \
3922341825Sdim                                           (__mmask16)(U), (int)(R))
3923309124Sdim
3924341825Sdim#define _mm512_maskz_cvt_roundps_epi32(U, A, R) \
3925309124Sdim  (__m512i)__builtin_ia32_cvtps2dq512_mask((__v16sf)(__m512)(A), \
3926309124Sdim                                           (__v16si)_mm512_setzero_si512(), \
3927341825Sdim                                           (__mmask16)(U), (int)(R))
3928309124Sdim
3929341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
3930309124Sdim_mm512_cvtps_epi32 (__m512 __A)
3931309124Sdim{
3932309124Sdim  return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
3933309124Sdim                 (__v16si) _mm512_undefined_epi32 (),
3934309124Sdim                 (__mmask16) -1,
3935309124Sdim                 _MM_FROUND_CUR_DIRECTION);
3936309124Sdim}
3937309124Sdim
3938341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
3939309124Sdim_mm512_mask_cvtps_epi32 (__m512i __W, __mmask16 __U, __m512 __A)
3940309124Sdim{
3941309124Sdim  return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
3942309124Sdim                 (__v16si) __W,
3943309124Sdim                 (__mmask16) __U,
3944309124Sdim                 _MM_FROUND_CUR_DIRECTION);
3945309124Sdim}
3946309124Sdim
3947341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
3948309124Sdim_mm512_maskz_cvtps_epi32 (__mmask16 __U, __m512 __A)
3949309124Sdim{
3950309124Sdim  return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
3951309124Sdim                 (__v16si)
3952309124Sdim                 _mm512_setzero_si512 (),
3953309124Sdim                 (__mmask16) __U,
3954309124Sdim                 _MM_FROUND_CUR_DIRECTION);
3955309124Sdim}
3956309124Sdim
3957341825Sdim#define _mm512_cvt_roundpd_epi32(A, R) \
3958309124Sdim  (__m256i)__builtin_ia32_cvtpd2dq512_mask((__v8df)(__m512d)(A), \
3959288943Sdim                                           (__v8si)_mm256_setzero_si256(), \
3960341825Sdim                                           (__mmask8)-1, (int)(R))
3961288943Sdim
3962341825Sdim#define _mm512_mask_cvt_roundpd_epi32(W, U, A, R) \
3963309124Sdim  (__m256i)__builtin_ia32_cvtpd2dq512_mask((__v8df)(__m512d)(A), \
3964309124Sdim                                           (__v8si)(__m256i)(W), \
3965341825Sdim                                           (__mmask8)(U), (int)(R))
3966309124Sdim
3967341825Sdim#define _mm512_maskz_cvt_roundpd_epi32(U, A, R) \
3968309124Sdim  (__m256i)__builtin_ia32_cvtpd2dq512_mask((__v8df)(__m512d)(A), \
3969309124Sdim                                           (__v8si)_mm256_setzero_si256(), \
3970341825Sdim                                           (__mmask8)(U), (int)(R))
3971309124Sdim
3972341825Sdimstatic __inline__ __m256i __DEFAULT_FN_ATTRS512
3973309124Sdim_mm512_cvtpd_epi32 (__m512d __A)
3974309124Sdim{
3975309124Sdim  return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
3976309124Sdim                 (__v8si)
3977309124Sdim                 _mm256_undefined_si256 (),
3978309124Sdim                 (__mmask8) -1,
3979309124Sdim                 _MM_FROUND_CUR_DIRECTION);
3980309124Sdim}
3981309124Sdim
3982341825Sdimstatic __inline__ __m256i __DEFAULT_FN_ATTRS512
3983309124Sdim_mm512_mask_cvtpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A)
3984309124Sdim{
3985309124Sdim  return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
3986309124Sdim                 (__v8si) __W,
3987309124Sdim                 (__mmask8) __U,
3988309124Sdim                 _MM_FROUND_CUR_DIRECTION);
3989309124Sdim}
3990309124Sdim
3991341825Sdimstatic __inline__ __m256i __DEFAULT_FN_ATTRS512
3992309124Sdim_mm512_maskz_cvtpd_epi32 (__mmask8 __U, __m512d __A)
3993309124Sdim{
3994309124Sdim  return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
3995309124Sdim                 (__v8si)
3996309124Sdim                 _mm256_setzero_si256 (),
3997309124Sdim                 (__mmask8) __U,
3998309124Sdim                 _MM_FROUND_CUR_DIRECTION);
3999309124Sdim}
4000309124Sdim
4001341825Sdim#define _mm512_cvt_roundps_epu32(A, R) \
4002309124Sdim  (__m512i)__builtin_ia32_cvtps2udq512_mask((__v16sf)(__m512)(A), \
4003288943Sdim                                            (__v16si)_mm512_setzero_si512(), \
4004341825Sdim                                            (__mmask16)-1, (int)(R))
4005288943Sdim
4006341825Sdim#define _mm512_mask_cvt_roundps_epu32(W, U, A, R) \
4007309124Sdim  (__m512i)__builtin_ia32_cvtps2udq512_mask((__v16sf)(__m512)(A), \
4008309124Sdim                                            (__v16si)(__m512i)(W), \
4009341825Sdim                                            (__mmask16)(U), (int)(R))
4010309124Sdim
4011341825Sdim#define _mm512_maskz_cvt_roundps_epu32(U, A, R) \
4012309124Sdim  (__m512i)__builtin_ia32_cvtps2udq512_mask((__v16sf)(__m512)(A), \
4013309124Sdim                                            (__v16si)_mm512_setzero_si512(), \
4014341825Sdim                                            (__mmask16)(U), (int)(R))
4015309124Sdim
4016341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
4017309124Sdim_mm512_cvtps_epu32 ( __m512 __A)
4018309124Sdim{
4019309124Sdim  return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,\
4020309124Sdim                  (__v16si)\
4021341825Sdim                  _mm512_undefined_epi32 (),
4022309124Sdim                  (__mmask16) -1,\
4023341825Sdim                  _MM_FROUND_CUR_DIRECTION);
4024309124Sdim}
4025309124Sdim
4026341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
4027309124Sdim_mm512_mask_cvtps_epu32 (__m512i __W, __mmask16 __U, __m512 __A)
4028309124Sdim{
4029309124Sdim  return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
4030309124Sdim                  (__v16si) __W,
4031309124Sdim                  (__mmask16) __U,
4032309124Sdim                  _MM_FROUND_CUR_DIRECTION);
4033309124Sdim}
4034309124Sdim
4035341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
4036309124Sdim_mm512_maskz_cvtps_epu32 ( __mmask16 __U, __m512 __A)
4037309124Sdim{
4038309124Sdim  return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
4039322320Sdim                  (__v16si)
4040309124Sdim                  _mm512_setzero_si512 (),
4041309124Sdim                  (__mmask16) __U ,
4042309124Sdim                  _MM_FROUND_CUR_DIRECTION);
4043309124Sdim}
4044309124Sdim
4045341825Sdim#define _mm512_cvt_roundpd_epu32(A, R) \
4046309124Sdim  (__m256i)__builtin_ia32_cvtpd2udq512_mask((__v8df)(__m512d)(A), \
4047288943Sdim                                            (__v8si)_mm256_setzero_si256(), \
4048341825Sdim                                            (__mmask8)-1, (int)(R))
4049288943Sdim
4050341825Sdim#define _mm512_mask_cvt_roundpd_epu32(W, U, A, R) \
4051309124Sdim  (__m256i)__builtin_ia32_cvtpd2udq512_mask((__v8df)(__m512d)(A), \
4052341825Sdim                                            (__v8si)(__m256i)(W), \
4053341825Sdim                                            (__mmask8)(U), (int)(R))
4054309124Sdim
4055341825Sdim#define _mm512_maskz_cvt_roundpd_epu32(U, A, R) \
4056309124Sdim  (__m256i)__builtin_ia32_cvtpd2udq512_mask((__v8df)(__m512d)(A), \
4057309124Sdim                                            (__v8si)_mm256_setzero_si256(), \
4058341825Sdim                                            (__mmask8)(U), (int)(R))
4059309124Sdim
4060341825Sdimstatic __inline__ __m256i __DEFAULT_FN_ATTRS512
4061309124Sdim_mm512_cvtpd_epu32 (__m512d __A)
4062309124Sdim{
4063309124Sdim  return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
4064309124Sdim                  (__v8si)
4065309124Sdim                  _mm256_undefined_si256 (),
4066309124Sdim                  (__mmask8) -1,
4067309124Sdim                  _MM_FROUND_CUR_DIRECTION);
4068309124Sdim}
4069309124Sdim
4070341825Sdimstatic __inline__ __m256i __DEFAULT_FN_ATTRS512
4071309124Sdim_mm512_mask_cvtpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A)
4072309124Sdim{
4073309124Sdim  return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
4074309124Sdim                  (__v8si) __W,
4075309124Sdim                  (__mmask8) __U,
4076309124Sdim                  _MM_FROUND_CUR_DIRECTION);
4077309124Sdim}
4078309124Sdim
4079341825Sdimstatic __inline__ __m256i __DEFAULT_FN_ATTRS512
4080309124Sdim_mm512_maskz_cvtpd_epu32 (__mmask8 __U, __m512d __A)
4081309124Sdim{
4082309124Sdim  return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
4083309124Sdim                  (__v8si)
4084309124Sdim                  _mm256_setzero_si256 (),
4085309124Sdim                  (__mmask8) __U,
4086309124Sdim                  _MM_FROUND_CUR_DIRECTION);
4087309124Sdim}
4088309124Sdim
4089341825Sdimstatic __inline__ double __DEFAULT_FN_ATTRS512
4090321369Sdim_mm512_cvtsd_f64(__m512d __a)
4091321369Sdim{
4092321369Sdim  return __a[0];
4093321369Sdim}
4094321369Sdim
4095341825Sdimstatic __inline__ float __DEFAULT_FN_ATTRS512
4096321369Sdim_mm512_cvtss_f32(__m512 __a)
4097321369Sdim{
4098321369Sdim  return __a[0];
4099321369Sdim}
4100321369Sdim
4101277325Sdim/* Unpack and Interleave */
4102309124Sdim
4103341825Sdimstatic __inline __m512d __DEFAULT_FN_ATTRS512
4104277325Sdim_mm512_unpackhi_pd(__m512d __a, __m512d __b)
4105277325Sdim{
4106309124Sdim  return (__m512d)__builtin_shufflevector((__v8df)__a, (__v8df)__b,
4107309124Sdim                                          1, 9, 1+2, 9+2, 1+4, 9+4, 1+6, 9+6);
4108277325Sdim}
4109277325Sdim
4110341825Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS512
4111309124Sdim_mm512_mask_unpackhi_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
4112309124Sdim{
4113309124Sdim  return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
4114309124Sdim                                           (__v8df)_mm512_unpackhi_pd(__A, __B),
4115309124Sdim                                           (__v8df)__W);
4116309124Sdim}
4117309124Sdim
4118341825Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS512
4119309124Sdim_mm512_maskz_unpackhi_pd(__mmask8 __U, __m512d __A, __m512d __B)
4120309124Sdim{
4121309124Sdim  return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
4122309124Sdim                                           (__v8df)_mm512_unpackhi_pd(__A, __B),
4123309124Sdim                                           (__v8df)_mm512_setzero_pd());
4124309124Sdim}
4125309124Sdim
4126341825Sdimstatic __inline __m512d __DEFAULT_FN_ATTRS512
4127277325Sdim_mm512_unpacklo_pd(__m512d __a, __m512d __b)
4128277325Sdim{
4129309124Sdim  return (__m512d)__builtin_shufflevector((__v8df)__a, (__v8df)__b,
4130309124Sdim                                          0, 8, 0+2, 8+2, 0+4, 8+4, 0+6, 8+6);
4131277325Sdim}
4132277325Sdim
4133341825Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS512
4134309124Sdim_mm512_mask_unpacklo_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
4135309124Sdim{
4136309124Sdim  return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
4137309124Sdim                                           (__v8df)_mm512_unpacklo_pd(__A, __B),
4138309124Sdim                                           (__v8df)__W);
4139309124Sdim}
4140309124Sdim
4141341825Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS512
4142309124Sdim_mm512_maskz_unpacklo_pd (__mmask8 __U, __m512d __A, __m512d __B)
4143309124Sdim{
4144309124Sdim  return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
4145309124Sdim                                           (__v8df)_mm512_unpacklo_pd(__A, __B),
4146309124Sdim                                           (__v8df)_mm512_setzero_pd());
4147309124Sdim}
4148309124Sdim
4149341825Sdimstatic __inline __m512 __DEFAULT_FN_ATTRS512
4150277325Sdim_mm512_unpackhi_ps(__m512 __a, __m512 __b)
4151277325Sdim{
4152309124Sdim  return (__m512)__builtin_shufflevector((__v16sf)__a, (__v16sf)__b,
4153309124Sdim                                         2,    18,    3,    19,
4154309124Sdim                                         2+4,  18+4,  3+4,  19+4,
4155309124Sdim                                         2+8,  18+8,  3+8,  19+8,
4156309124Sdim                                         2+12, 18+12, 3+12, 19+12);
4157277325Sdim}
4158277325Sdim
4159341825Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS512
4160309124Sdim_mm512_mask_unpackhi_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
4161309124Sdim{
4162309124Sdim  return (__m512)__builtin_ia32_selectps_512((__mmask16) __U,
4163309124Sdim                                          (__v16sf)_mm512_unpackhi_ps(__A, __B),
4164309124Sdim                                          (__v16sf)__W);
4165309124Sdim}
4166309124Sdim
4167341825Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS512
4168309124Sdim_mm512_maskz_unpackhi_ps (__mmask16 __U, __m512 __A, __m512 __B)
4169309124Sdim{
4170309124Sdim  return (__m512)__builtin_ia32_selectps_512((__mmask16) __U,
4171309124Sdim                                          (__v16sf)_mm512_unpackhi_ps(__A, __B),
4172309124Sdim                                          (__v16sf)_mm512_setzero_ps());
4173309124Sdim}
4174309124Sdim
4175341825Sdimstatic __inline __m512 __DEFAULT_FN_ATTRS512
4176277325Sdim_mm512_unpacklo_ps(__m512 __a, __m512 __b)
4177277325Sdim{
4178309124Sdim  return (__m512)__builtin_shufflevector((__v16sf)__a, (__v16sf)__b,
4179309124Sdim                                         0,    16,    1,    17,
4180309124Sdim                                         0+4,  16+4,  1+4,  17+4,
4181309124Sdim                                         0+8,  16+8,  1+8,  17+8,
4182309124Sdim                                         0+12, 16+12, 1+12, 17+12);
4183277325Sdim}
4184277325Sdim
4185341825Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS512
4186309124Sdim_mm512_mask_unpacklo_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
4187309124Sdim{
4188309124Sdim  return (__m512)__builtin_ia32_selectps_512((__mmask16) __U,
4189309124Sdim                                          (__v16sf)_mm512_unpacklo_ps(__A, __B),
4190309124Sdim                                          (__v16sf)__W);
4191309124Sdim}
4192309124Sdim
4193341825Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS512
4194309124Sdim_mm512_maskz_unpacklo_ps (__mmask16 __U, __m512 __A, __m512 __B)
4195309124Sdim{
4196309124Sdim  return (__m512)__builtin_ia32_selectps_512((__mmask16) __U,
4197309124Sdim                                          (__v16sf)_mm512_unpacklo_ps(__A, __B),
4198309124Sdim                                          (__v16sf)_mm512_setzero_ps());
4199309124Sdim}
4200309124Sdim
4201341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
4202309124Sdim_mm512_unpackhi_epi32(__m512i __A, __m512i __B)
4203309124Sdim{
4204309124Sdim  return (__m512i)__builtin_shufflevector((__v16si)__A, (__v16si)__B,
4205309124Sdim                                          2,    18,    3,    19,
4206309124Sdim                                          2+4,  18+4,  3+4,  19+4,
4207309124Sdim                                          2+8,  18+8,  3+8,  19+8,
4208309124Sdim                                          2+12, 18+12, 3+12, 19+12);
4209309124Sdim}
4210309124Sdim
4211341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
4212309124Sdim_mm512_mask_unpackhi_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
4213309124Sdim{
4214309124Sdim  return (__m512i)__builtin_ia32_selectd_512((__mmask16) __U,
4215309124Sdim                                       (__v16si)_mm512_unpackhi_epi32(__A, __B),
4216309124Sdim                                       (__v16si)__W);
4217309124Sdim}
4218309124Sdim
4219341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
4220309124Sdim_mm512_maskz_unpackhi_epi32(__mmask16 __U, __m512i __A, __m512i __B)
4221309124Sdim{
4222309124Sdim  return (__m512i)__builtin_ia32_selectd_512((__mmask16) __U,
4223309124Sdim                                       (__v16si)_mm512_unpackhi_epi32(__A, __B),
4224309124Sdim                                       (__v16si)_mm512_setzero_si512());
4225309124Sdim}
4226309124Sdim
4227341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
4228309124Sdim_mm512_unpacklo_epi32(__m512i __A, __m512i __B)
4229309124Sdim{
4230309124Sdim  return (__m512i)__builtin_shufflevector((__v16si)__A, (__v16si)__B,
4231309124Sdim                                          0,    16,    1,    17,
4232309124Sdim                                          0+4,  16+4,  1+4,  17+4,
4233309124Sdim                                          0+8,  16+8,  1+8,  17+8,
4234309124Sdim                                          0+12, 16+12, 1+12, 17+12);
4235309124Sdim}
4236309124Sdim
4237341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
4238309124Sdim_mm512_mask_unpacklo_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
4239309124Sdim{
4240309124Sdim  return (__m512i)__builtin_ia32_selectd_512((__mmask16) __U,
4241309124Sdim                                       (__v16si)_mm512_unpacklo_epi32(__A, __B),
4242309124Sdim                                       (__v16si)__W);
4243309124Sdim}
4244309124Sdim
4245341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
4246309124Sdim_mm512_maskz_unpacklo_epi32(__mmask16 __U, __m512i __A, __m512i __B)
4247309124Sdim{
4248309124Sdim  return (__m512i)__builtin_ia32_selectd_512((__mmask16) __U,
4249309124Sdim                                       (__v16si)_mm512_unpacklo_epi32(__A, __B),
4250309124Sdim                                       (__v16si)_mm512_setzero_si512());
4251309124Sdim}
4252309124Sdim
4253341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
4254309124Sdim_mm512_unpackhi_epi64(__m512i __A, __m512i __B)
4255309124Sdim{
4256309124Sdim  return (__m512i)__builtin_shufflevector((__v8di)__A, (__v8di)__B,
4257309124Sdim                                          1, 9, 1+2, 9+2, 1+4, 9+4, 1+6, 9+6);
4258309124Sdim}
4259309124Sdim
4260341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
4261309124Sdim_mm512_mask_unpackhi_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
4262309124Sdim{
4263309124Sdim  return (__m512i)__builtin_ia32_selectq_512((__mmask8) __U,
4264309124Sdim                                        (__v8di)_mm512_unpackhi_epi64(__A, __B),
4265309124Sdim                                        (__v8di)__W);
4266309124Sdim}
4267309124Sdim
4268341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
4269309124Sdim_mm512_maskz_unpackhi_epi64(__mmask8 __U, __m512i __A, __m512i __B)
4270309124Sdim{
4271309124Sdim  return (__m512i)__builtin_ia32_selectq_512((__mmask8) __U,
4272309124Sdim                                        (__v8di)_mm512_unpackhi_epi64(__A, __B),
4273309124Sdim                                        (__v8di)_mm512_setzero_si512());
4274309124Sdim}
4275309124Sdim
4276341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
4277309124Sdim_mm512_unpacklo_epi64 (__m512i __A, __m512i __B)
4278309124Sdim{
4279309124Sdim  return (__m512i)__builtin_shufflevector((__v8di)__A, (__v8di)__B,
4280309124Sdim                                          0, 8, 0+2, 8+2, 0+4, 8+4, 0+6, 8+6);
4281309124Sdim}
4282309124Sdim
4283341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
4284309124Sdim_mm512_mask_unpacklo_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
4285309124Sdim{
4286309124Sdim  return (__m512i)__builtin_ia32_selectq_512((__mmask8) __U,
4287309124Sdim                                        (__v8di)_mm512_unpacklo_epi64(__A, __B),
4288309124Sdim                                        (__v8di)__W);
4289309124Sdim}
4290309124Sdim
4291341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
4292309124Sdim_mm512_maskz_unpacklo_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
4293309124Sdim{
4294309124Sdim  return (__m512i)__builtin_ia32_selectq_512((__mmask8) __U,
4295309124Sdim                                        (__v8di)_mm512_unpacklo_epi64(__A, __B),
4296309124Sdim                                        (__v8di)_mm512_setzero_si512());
4297309124Sdim}
4298309124Sdim
4299277325Sdim
4300277325Sdim/* SIMD load ops */
4301277325Sdim
4302341825Sdimstatic __inline __m512i __DEFAULT_FN_ATTRS512
4303309124Sdim_mm512_loadu_si512 (void const *__P)
4304309124Sdim{
4305341825Sdim  struct __loadu_si512 {
4306353358Sdim    __m512i_u __v;
4307341825Sdim  } __attribute__((__packed__, __may_alias__));
4308360784Sdim  return ((const struct __loadu_si512*)__P)->__v;
4309309124Sdim}
4310309124Sdim
4311341825Sdimstatic __inline __m512i __DEFAULT_FN_ATTRS512
4312344779Sdim_mm512_loadu_epi32 (void const *__P)
4313344779Sdim{
4314344779Sdim  struct __loadu_epi32 {
4315353358Sdim    __m512i_u __v;
4316344779Sdim  } __attribute__((__packed__, __may_alias__));
4317360784Sdim  return ((const struct __loadu_epi32*)__P)->__v;
4318344779Sdim}
4319344779Sdim
4320344779Sdimstatic __inline __m512i __DEFAULT_FN_ATTRS512
4321309124Sdim_mm512_mask_loadu_epi32 (__m512i __W, __mmask16 __U, void const *__P)
4322309124Sdim{
4323309124Sdim  return (__m512i) __builtin_ia32_loaddqusi512_mask ((const int *) __P,
4324309124Sdim                  (__v16si) __W,
4325309124Sdim                  (__mmask16) __U);
4326309124Sdim}
4327309124Sdim
4328309124Sdim
4329341825Sdimstatic __inline __m512i __DEFAULT_FN_ATTRS512
4330277325Sdim_mm512_maskz_loadu_epi32(__mmask16 __U, void const *__P)
4331277325Sdim{
4332309124Sdim  return (__m512i) __builtin_ia32_loaddqusi512_mask ((const int *)__P,
4333277325Sdim                                                     (__v16si)
4334277325Sdim                                                     _mm512_setzero_si512 (),
4335277325Sdim                                                     (__mmask16) __U);
4336277325Sdim}
4337277325Sdim
4338341825Sdimstatic __inline __m512i __DEFAULT_FN_ATTRS512
4339344779Sdim_mm512_loadu_epi64 (void const *__P)
4340344779Sdim{
4341344779Sdim  struct __loadu_epi64 {
4342353358Sdim    __m512i_u __v;
4343344779Sdim  } __attribute__((__packed__, __may_alias__));
4344360784Sdim  return ((const struct __loadu_epi64*)__P)->__v;
4345344779Sdim}
4346344779Sdim
4347344779Sdimstatic __inline __m512i __DEFAULT_FN_ATTRS512
4348309124Sdim_mm512_mask_loadu_epi64 (__m512i __W, __mmask8 __U, void const *__P)
4349309124Sdim{
4350309124Sdim  return (__m512i) __builtin_ia32_loaddqudi512_mask ((const long long *) __P,
4351309124Sdim                  (__v8di) __W,
4352309124Sdim                  (__mmask8) __U);
4353309124Sdim}
4354309124Sdim
4355341825Sdimstatic __inline __m512i __DEFAULT_FN_ATTRS512
4356277325Sdim_mm512_maskz_loadu_epi64(__mmask8 __U, void const *__P)
4357277325Sdim{
4358309124Sdim  return (__m512i) __builtin_ia32_loaddqudi512_mask ((const long long *)__P,
4359277325Sdim                                                     (__v8di)
4360277325Sdim                                                     _mm512_setzero_si512 (),
4361277325Sdim                                                     (__mmask8) __U);
4362277325Sdim}
4363277325Sdim
4364341825Sdimstatic __inline __m512 __DEFAULT_FN_ATTRS512
4365309124Sdim_mm512_mask_loadu_ps (__m512 __W, __mmask16 __U, void const *__P)
4366309124Sdim{
4367309124Sdim  return (__m512) __builtin_ia32_loadups512_mask ((const float *) __P,
4368309124Sdim                   (__v16sf) __W,
4369309124Sdim                   (__mmask16) __U);
4370309124Sdim}
4371309124Sdim
4372341825Sdimstatic __inline __m512 __DEFAULT_FN_ATTRS512
4373277325Sdim_mm512_maskz_loadu_ps(__mmask16 __U, void const *__P)
4374277325Sdim{
4375309124Sdim  return (__m512) __builtin_ia32_loadups512_mask ((const float *)__P,
4376277325Sdim                                                  (__v16sf)
4377277325Sdim                                                  _mm512_setzero_ps (),
4378277325Sdim                                                  (__mmask16) __U);
4379277325Sdim}
4380277325Sdim
4381341825Sdimstatic __inline __m512d __DEFAULT_FN_ATTRS512
4382309124Sdim_mm512_mask_loadu_pd (__m512d __W, __mmask8 __U, void const *__P)
4383277325Sdim{
4384309124Sdim  return (__m512d) __builtin_ia32_loadupd512_mask ((const double *) __P,
4385309124Sdim                (__v8df) __W,
4386309124Sdim                (__mmask8) __U);
4387277325Sdim}
4388277325Sdim
4389341825Sdimstatic __inline __m512d __DEFAULT_FN_ATTRS512
4390309124Sdim_mm512_maskz_loadu_pd(__mmask8 __U, void const *__P)
4391288943Sdim{
4392309124Sdim  return (__m512d) __builtin_ia32_loadupd512_mask ((const double *)__P,
4393288943Sdim                                                   (__v8df)
4394288943Sdim                                                   _mm512_setzero_pd (),
4395288943Sdim                                                   (__mmask8) __U);
4396288943Sdim}
4397288943Sdim
4398341825Sdimstatic __inline __m512d __DEFAULT_FN_ATTRS512
4399321369Sdim_mm512_loadu_pd(void const *__p)
4400277325Sdim{
4401277325Sdim  struct __loadu_pd {
4402353358Sdim    __m512d_u __v;
4403288943Sdim  } __attribute__((__packed__, __may_alias__));
4404360784Sdim  return ((const struct __loadu_pd*)__p)->__v;
4405277325Sdim}
4406277325Sdim
4407341825Sdimstatic __inline __m512 __DEFAULT_FN_ATTRS512
4408321369Sdim_mm512_loadu_ps(void const *__p)
4409277325Sdim{
4410277325Sdim  struct __loadu_ps {
4411353358Sdim    __m512_u __v;
4412288943Sdim  } __attribute__((__packed__, __may_alias__));
4413360784Sdim  return ((const struct __loadu_ps*)__p)->__v;
4414277325Sdim}
4415277325Sdim
4416341825Sdimstatic __inline __m512 __DEFAULT_FN_ATTRS512
4417321369Sdim_mm512_load_ps(void const *__p)
4418288943Sdim{
4419360784Sdim  return *(const __m512*)__p;
4420288943Sdim}
4421288943Sdim
4422341825Sdimstatic __inline __m512 __DEFAULT_FN_ATTRS512
4423309124Sdim_mm512_mask_load_ps (__m512 __W, __mmask16 __U, void const *__P)
4424309124Sdim{
4425309124Sdim  return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *) __P,
4426309124Sdim                   (__v16sf) __W,
4427309124Sdim                   (__mmask16) __U);
4428309124Sdim}
4429309124Sdim
4430341825Sdimstatic __inline __m512 __DEFAULT_FN_ATTRS512
4431309124Sdim_mm512_maskz_load_ps(__mmask16 __U, void const *__P)
4432309124Sdim{
4433309124Sdim  return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *)__P,
4434309124Sdim                                                  (__v16sf)
4435309124Sdim                                                  _mm512_setzero_ps (),
4436309124Sdim                                                  (__mmask16) __U);
4437309124Sdim}
4438309124Sdim
4439341825Sdimstatic __inline __m512d __DEFAULT_FN_ATTRS512
4440321369Sdim_mm512_load_pd(void const *__p)
4441288943Sdim{
4442360784Sdim  return *(const __m512d*)__p;
4443288943Sdim}
4444288943Sdim
4445341825Sdimstatic __inline __m512d __DEFAULT_FN_ATTRS512
4446309124Sdim_mm512_mask_load_pd (__m512d __W, __mmask8 __U, void const *__P)
4447309124Sdim{
4448309124Sdim  return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df *) __P,
4449309124Sdim                          (__v8df) __W,
4450309124Sdim                          (__mmask8) __U);
4451309124Sdim}
4452309124Sdim
4453341825Sdimstatic __inline __m512d __DEFAULT_FN_ATTRS512
4454309124Sdim_mm512_maskz_load_pd(__mmask8 __U, void const *__P)
4455309124Sdim{
4456309124Sdim  return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df *)__P,
4457309124Sdim                                                   (__v8df)
4458309124Sdim                                                   _mm512_setzero_pd (),
4459309124Sdim                                                   (__mmask8) __U);
4460309124Sdim}
4461309124Sdim
4462341825Sdimstatic __inline __m512i __DEFAULT_FN_ATTRS512
4463309124Sdim_mm512_load_si512 (void const *__P)
4464309124Sdim{
4465360784Sdim  return *(const __m512i *) __P;
4466309124Sdim}
4467309124Sdim
4468341825Sdimstatic __inline __m512i __DEFAULT_FN_ATTRS512
4469309124Sdim_mm512_load_epi32 (void const *__P)
4470309124Sdim{
4471360784Sdim  return *(const __m512i *) __P;
4472309124Sdim}
4473309124Sdim
4474341825Sdimstatic __inline __m512i __DEFAULT_FN_ATTRS512
4475309124Sdim_mm512_load_epi64 (void const *__P)
4476309124Sdim{
4477360784Sdim  return *(const __m512i *) __P;
4478309124Sdim}
4479309124Sdim
4480277325Sdim/* SIMD store ops */
4481277325Sdim
4482341825Sdimstatic __inline void __DEFAULT_FN_ATTRS512
4483344779Sdim_mm512_storeu_epi64 (void *__P, __m512i __A)
4484344779Sdim{
4485344779Sdim  struct __storeu_epi64 {
4486353358Sdim    __m512i_u __v;
4487344779Sdim  } __attribute__((__packed__, __may_alias__));
4488344779Sdim  ((struct __storeu_epi64*)__P)->__v = __A;
4489344779Sdim}
4490344779Sdim
4491344779Sdimstatic __inline void __DEFAULT_FN_ATTRS512
4492277325Sdim_mm512_mask_storeu_epi64(void *__P, __mmask8 __U, __m512i __A)
4493277325Sdim{
4494309124Sdim  __builtin_ia32_storedqudi512_mask ((long long *)__P, (__v8di) __A,
4495277325Sdim                                     (__mmask8) __U);
4496277325Sdim}
4497277325Sdim
4498341825Sdimstatic __inline void __DEFAULT_FN_ATTRS512
4499309124Sdim_mm512_storeu_si512 (void *__P, __m512i __A)
4500309124Sdim{
4501341825Sdim  struct __storeu_si512 {
4502353358Sdim    __m512i_u __v;
4503341825Sdim  } __attribute__((__packed__, __may_alias__));
4504341825Sdim  ((struct __storeu_si512*)__P)->__v = __A;
4505309124Sdim}
4506309124Sdim
4507341825Sdimstatic __inline void __DEFAULT_FN_ATTRS512
4508344779Sdim_mm512_storeu_epi32 (void *__P, __m512i __A)
4509344779Sdim{
4510344779Sdim  struct __storeu_epi32 {
4511353358Sdim    __m512i_u __v;
4512344779Sdim  } __attribute__((__packed__, __may_alias__));
4513344779Sdim  ((struct __storeu_epi32*)__P)->__v = __A;
4514344779Sdim}
4515344779Sdim
4516344779Sdimstatic __inline void __DEFAULT_FN_ATTRS512
4517277325Sdim_mm512_mask_storeu_epi32(void *__P, __mmask16 __U, __m512i __A)
4518277325Sdim{
4519309124Sdim  __builtin_ia32_storedqusi512_mask ((int *)__P, (__v16si) __A,
4520277325Sdim                                     (__mmask16) __U);
4521277325Sdim}
4522277325Sdim
4523341825Sdimstatic __inline void __DEFAULT_FN_ATTRS512
4524277325Sdim_mm512_mask_storeu_pd(void *__P, __mmask8 __U, __m512d __A)
4525277325Sdim{
4526309124Sdim  __builtin_ia32_storeupd512_mask ((double *)__P, (__v8df) __A, (__mmask8) __U);
4527277325Sdim}
4528277325Sdim
4529341825Sdimstatic __inline void __DEFAULT_FN_ATTRS512
4530277325Sdim_mm512_storeu_pd(void *__P, __m512d __A)
4531277325Sdim{
4532341825Sdim  struct __storeu_pd {
4533353358Sdim    __m512d_u __v;
4534341825Sdim  } __attribute__((__packed__, __may_alias__));
4535341825Sdim  ((struct __storeu_pd*)__P)->__v = __A;
4536277325Sdim}
4537277325Sdim
4538341825Sdimstatic __inline void __DEFAULT_FN_ATTRS512
4539277325Sdim_mm512_mask_storeu_ps(void *__P, __mmask16 __U, __m512 __A)
4540277325Sdim{
4541309124Sdim  __builtin_ia32_storeups512_mask ((float *)__P, (__v16sf) __A,
4542277325Sdim                                   (__mmask16) __U);
4543277325Sdim}
4544277325Sdim
4545341825Sdimstatic __inline void __DEFAULT_FN_ATTRS512
4546277325Sdim_mm512_storeu_ps(void *__P, __m512 __A)
4547277325Sdim{
4548341825Sdim  struct __storeu_ps {
4549353358Sdim    __m512_u __v;
4550341825Sdim  } __attribute__((__packed__, __may_alias__));
4551341825Sdim  ((struct __storeu_ps*)__P)->__v = __A;
4552277325Sdim}
4553277325Sdim
4554341825Sdimstatic __inline void __DEFAULT_FN_ATTRS512
4555288943Sdim_mm512_mask_store_pd(void *__P, __mmask8 __U, __m512d __A)
4556277325Sdim{
4557288943Sdim  __builtin_ia32_storeapd512_mask ((__v8df *)__P, (__v8df) __A, (__mmask8) __U);
4558277325Sdim}
4559277325Sdim
4560341825Sdimstatic __inline void __DEFAULT_FN_ATTRS512
4561277325Sdim_mm512_store_pd(void *__P, __m512d __A)
4562277325Sdim{
4563277325Sdim  *(__m512d*)__P = __A;
4564277325Sdim}
4565277325Sdim
4566341825Sdimstatic __inline void __DEFAULT_FN_ATTRS512
4567288943Sdim_mm512_mask_store_ps(void *__P, __mmask16 __U, __m512 __A)
4568288943Sdim{
4569288943Sdim  __builtin_ia32_storeaps512_mask ((__v16sf *)__P, (__v16sf) __A,
4570288943Sdim                                   (__mmask16) __U);
4571288943Sdim}
4572288943Sdim
4573341825Sdimstatic __inline void __DEFAULT_FN_ATTRS512
4574288943Sdim_mm512_store_ps(void *__P, __m512 __A)
4575288943Sdim{
4576288943Sdim  *(__m512*)__P = __A;
4577288943Sdim}
4578288943Sdim
4579341825Sdimstatic __inline void __DEFAULT_FN_ATTRS512
4580309124Sdim_mm512_store_si512 (void *__P, __m512i __A)
4581309124Sdim{
4582309124Sdim  *(__m512i *) __P = __A;
4583309124Sdim}
4584309124Sdim
4585341825Sdimstatic __inline void __DEFAULT_FN_ATTRS512
4586309124Sdim_mm512_store_epi32 (void *__P, __m512i __A)
4587309124Sdim{
4588309124Sdim  *(__m512i *) __P = __A;
4589309124Sdim}
4590309124Sdim
4591341825Sdimstatic __inline void __DEFAULT_FN_ATTRS512
4592309124Sdim_mm512_store_epi64 (void *__P, __m512i __A)
4593309124Sdim{
4594309124Sdim  *(__m512i *) __P = __A;
4595309124Sdim}
4596309124Sdim
4597277325Sdim/* Mask ops */
4598277325Sdim
4599344779Sdimstatic __inline __mmask16 __DEFAULT_FN_ATTRS
4600277325Sdim_mm512_knot(__mmask16 __M)
4601277325Sdim{
4602277325Sdim  return __builtin_ia32_knothi(__M);
4603277325Sdim}
4604277325Sdim
4605277325Sdim/* Integer compare */
4606277325Sdim
4607327952Sdim#define _mm512_cmpeq_epi32_mask(A, B) \
4608327952Sdim    _mm512_cmp_epi32_mask((A), (B), _MM_CMPINT_EQ)
4609327952Sdim#define _mm512_mask_cmpeq_epi32_mask(k, A, B) \
4610327952Sdim    _mm512_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_EQ)
4611327952Sdim#define _mm512_cmpge_epi32_mask(A, B) \
4612327952Sdim    _mm512_cmp_epi32_mask((A), (B), _MM_CMPINT_GE)
4613327952Sdim#define _mm512_mask_cmpge_epi32_mask(k, A, B) \
4614327952Sdim    _mm512_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_GE)
4615327952Sdim#define _mm512_cmpgt_epi32_mask(A, B) \
4616327952Sdim    _mm512_cmp_epi32_mask((A), (B), _MM_CMPINT_GT)
4617327952Sdim#define _mm512_mask_cmpgt_epi32_mask(k, A, B) \
4618327952Sdim    _mm512_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_GT)
4619327952Sdim#define _mm512_cmple_epi32_mask(A, B) \
4620327952Sdim    _mm512_cmp_epi32_mask((A), (B), _MM_CMPINT_LE)
4621327952Sdim#define _mm512_mask_cmple_epi32_mask(k, A, B) \
4622327952Sdim    _mm512_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_LE)
4623327952Sdim#define _mm512_cmplt_epi32_mask(A, B) \
4624327952Sdim    _mm512_cmp_epi32_mask((A), (B), _MM_CMPINT_LT)
4625327952Sdim#define _mm512_mask_cmplt_epi32_mask(k, A, B) \
4626327952Sdim    _mm512_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_LT)
4627327952Sdim#define _mm512_cmpneq_epi32_mask(A, B) \
4628327952Sdim    _mm512_cmp_epi32_mask((A), (B), _MM_CMPINT_NE)
4629327952Sdim#define _mm512_mask_cmpneq_epi32_mask(k, A, B) \
4630327952Sdim    _mm512_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_NE)
4631277325Sdim
4632327952Sdim#define _mm512_cmpeq_epu32_mask(A, B) \
4633327952Sdim    _mm512_cmp_epu32_mask((A), (B), _MM_CMPINT_EQ)
4634327952Sdim#define _mm512_mask_cmpeq_epu32_mask(k, A, B) \
4635327952Sdim    _mm512_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_EQ)
4636327952Sdim#define _mm512_cmpge_epu32_mask(A, B) \
4637327952Sdim    _mm512_cmp_epu32_mask((A), (B), _MM_CMPINT_GE)
4638327952Sdim#define _mm512_mask_cmpge_epu32_mask(k, A, B) \
4639327952Sdim    _mm512_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_GE)
4640327952Sdim#define _mm512_cmpgt_epu32_mask(A, B) \
4641327952Sdim    _mm512_cmp_epu32_mask((A), (B), _MM_CMPINT_GT)
4642327952Sdim#define _mm512_mask_cmpgt_epu32_mask(k, A, B) \
4643327952Sdim    _mm512_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_GT)
4644327952Sdim#define _mm512_cmple_epu32_mask(A, B) \
4645327952Sdim    _mm512_cmp_epu32_mask((A), (B), _MM_CMPINT_LE)
4646327952Sdim#define _mm512_mask_cmple_epu32_mask(k, A, B) \
4647327952Sdim    _mm512_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_LE)
4648327952Sdim#define _mm512_cmplt_epu32_mask(A, B) \
4649327952Sdim    _mm512_cmp_epu32_mask((A), (B), _MM_CMPINT_LT)
4650327952Sdim#define _mm512_mask_cmplt_epu32_mask(k, A, B) \
4651327952Sdim    _mm512_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_LT)
4652327952Sdim#define _mm512_cmpneq_epu32_mask(A, B) \
4653327952Sdim    _mm512_cmp_epu32_mask((A), (B), _MM_CMPINT_NE)
4654327952Sdim#define _mm512_mask_cmpneq_epu32_mask(k, A, B) \
4655327952Sdim    _mm512_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_NE)
4656277325Sdim
4657327952Sdim#define _mm512_cmpeq_epi64_mask(A, B) \
4658327952Sdim    _mm512_cmp_epi64_mask((A), (B), _MM_CMPINT_EQ)
4659327952Sdim#define _mm512_mask_cmpeq_epi64_mask(k, A, B) \
4660327952Sdim    _mm512_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_EQ)
4661327952Sdim#define _mm512_cmpge_epi64_mask(A, B) \
4662327952Sdim    _mm512_cmp_epi64_mask((A), (B), _MM_CMPINT_GE)
4663327952Sdim#define _mm512_mask_cmpge_epi64_mask(k, A, B) \
4664327952Sdim    _mm512_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_GE)
4665327952Sdim#define _mm512_cmpgt_epi64_mask(A, B) \
4666327952Sdim    _mm512_cmp_epi64_mask((A), (B), _MM_CMPINT_GT)
4667327952Sdim#define _mm512_mask_cmpgt_epi64_mask(k, A, B) \
4668327952Sdim    _mm512_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_GT)
4669327952Sdim#define _mm512_cmple_epi64_mask(A, B) \
4670327952Sdim    _mm512_cmp_epi64_mask((A), (B), _MM_CMPINT_LE)
4671327952Sdim#define _mm512_mask_cmple_epi64_mask(k, A, B) \
4672327952Sdim    _mm512_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_LE)
4673327952Sdim#define _mm512_cmplt_epi64_mask(A, B) \
4674327952Sdim    _mm512_cmp_epi64_mask((A), (B), _MM_CMPINT_LT)
4675327952Sdim#define _mm512_mask_cmplt_epi64_mask(k, A, B) \
4676327952Sdim    _mm512_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_LT)
4677327952Sdim#define _mm512_cmpneq_epi64_mask(A, B) \
4678327952Sdim    _mm512_cmp_epi64_mask((A), (B), _MM_CMPINT_NE)
4679327952Sdim#define _mm512_mask_cmpneq_epi64_mask(k, A, B) \
4680327952Sdim    _mm512_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_NE)
4681288943Sdim
4682327952Sdim#define _mm512_cmpeq_epu64_mask(A, B) \
4683327952Sdim    _mm512_cmp_epu64_mask((A), (B), _MM_CMPINT_EQ)
4684327952Sdim#define _mm512_mask_cmpeq_epu64_mask(k, A, B) \
4685327952Sdim    _mm512_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_EQ)
4686327952Sdim#define _mm512_cmpge_epu64_mask(A, B) \
4687327952Sdim    _mm512_cmp_epu64_mask((A), (B), _MM_CMPINT_GE)
4688327952Sdim#define _mm512_mask_cmpge_epu64_mask(k, A, B) \
4689327952Sdim    _mm512_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_GE)
4690327952Sdim#define _mm512_cmpgt_epu64_mask(A, B) \
4691327952Sdim    _mm512_cmp_epu64_mask((A), (B), _MM_CMPINT_GT)
4692327952Sdim#define _mm512_mask_cmpgt_epu64_mask(k, A, B) \
4693327952Sdim    _mm512_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_GT)
4694327952Sdim#define _mm512_cmple_epu64_mask(A, B) \
4695327952Sdim    _mm512_cmp_epu64_mask((A), (B), _MM_CMPINT_LE)
4696327952Sdim#define _mm512_mask_cmple_epu64_mask(k, A, B) \
4697327952Sdim    _mm512_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_LE)
4698327952Sdim#define _mm512_cmplt_epu64_mask(A, B) \
4699327952Sdim    _mm512_cmp_epu64_mask((A), (B), _MM_CMPINT_LT)
4700327952Sdim#define _mm512_mask_cmplt_epu64_mask(k, A, B) \
4701327952Sdim    _mm512_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_LT)
4702327952Sdim#define _mm512_cmpneq_epu64_mask(A, B) \
4703327952Sdim    _mm512_cmp_epu64_mask((A), (B), _MM_CMPINT_NE)
4704327952Sdim#define _mm512_mask_cmpneq_epu64_mask(k, A, B) \
4705327952Sdim    _mm512_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_NE)
4706288943Sdim
4707341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
4708314564Sdim_mm512_cvtepi8_epi32(__m128i __A)
4709309124Sdim{
4710314564Sdim  /* This function always performs a signed extension, but __v16qi is a char
4711314564Sdim     which may be signed or unsigned, so use __v16qs. */
4712314564Sdim  return (__m512i)__builtin_convertvector((__v16qs)__A, __v16si);
4713309124Sdim}
4714309124Sdim
4715341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
4716314564Sdim_mm512_mask_cvtepi8_epi32(__m512i __W, __mmask16 __U, __m128i __A)
4717309124Sdim{
4718314564Sdim  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
4719314564Sdim                                             (__v16si)_mm512_cvtepi8_epi32(__A),
4720314564Sdim                                             (__v16si)__W);
4721309124Sdim}
4722309124Sdim
4723341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
4724314564Sdim_mm512_maskz_cvtepi8_epi32(__mmask16 __U, __m128i __A)
4725309124Sdim{
4726314564Sdim  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
4727314564Sdim                                             (__v16si)_mm512_cvtepi8_epi32(__A),
4728314564Sdim                                             (__v16si)_mm512_setzero_si512());
4729309124Sdim}
4730309124Sdim
4731341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
4732314564Sdim_mm512_cvtepi8_epi64(__m128i __A)
4733309124Sdim{
4734314564Sdim  /* This function always performs a signed extension, but __v16qi is a char
4735314564Sdim     which may be signed or unsigned, so use __v16qs. */
4736314564Sdim  return (__m512i)__builtin_convertvector(__builtin_shufflevector((__v16qs)__A, (__v16qs)__A, 0, 1, 2, 3, 4, 5, 6, 7), __v8di);
4737309124Sdim}
4738309124Sdim
4739341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
4740314564Sdim_mm512_mask_cvtepi8_epi64(__m512i __W, __mmask8 __U, __m128i __A)
4741309124Sdim{
4742314564Sdim  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4743314564Sdim                                             (__v8di)_mm512_cvtepi8_epi64(__A),
4744314564Sdim                                             (__v8di)__W);
4745309124Sdim}
4746309124Sdim
4747341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
4748314564Sdim_mm512_maskz_cvtepi8_epi64(__mmask8 __U, __m128i __A)
4749309124Sdim{
4750314564Sdim  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4751314564Sdim                                             (__v8di)_mm512_cvtepi8_epi64(__A),
4752314564Sdim                                             (__v8di)_mm512_setzero_si512 ());
4753309124Sdim}
4754309124Sdim
4755341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
4756314564Sdim_mm512_cvtepi32_epi64(__m256i __X)
4757309124Sdim{
4758314564Sdim  return (__m512i)__builtin_convertvector((__v8si)__X, __v8di);
4759309124Sdim}
4760309124Sdim
4761341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
4762314564Sdim_mm512_mask_cvtepi32_epi64(__m512i __W, __mmask8 __U, __m256i __X)
4763309124Sdim{
4764314564Sdim  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4765314564Sdim                                             (__v8di)_mm512_cvtepi32_epi64(__X),
4766314564Sdim                                             (__v8di)__W);
4767309124Sdim}
4768309124Sdim
4769341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
4770314564Sdim_mm512_maskz_cvtepi32_epi64(__mmask8 __U, __m256i __X)
4771309124Sdim{
4772314564Sdim  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4773314564Sdim                                             (__v8di)_mm512_cvtepi32_epi64(__X),
4774314564Sdim                                             (__v8di)_mm512_setzero_si512());
4775309124Sdim}
4776309124Sdim
4777341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
4778314564Sdim_mm512_cvtepi16_epi32(__m256i __A)
4779309124Sdim{
4780314564Sdim  return (__m512i)__builtin_convertvector((__v16hi)__A, __v16si);
4781309124Sdim}
4782309124Sdim
4783341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
4784314564Sdim_mm512_mask_cvtepi16_epi32(__m512i __W, __mmask16 __U, __m256i __A)
4785309124Sdim{
4786314564Sdim  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
4787314564Sdim                                            (__v16si)_mm512_cvtepi16_epi32(__A),
4788314564Sdim                                            (__v16si)__W);
4789309124Sdim}
4790309124Sdim
4791341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
4792314564Sdim_mm512_maskz_cvtepi16_epi32(__mmask16 __U, __m256i __A)
4793309124Sdim{
4794314564Sdim  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
4795314564Sdim                                            (__v16si)_mm512_cvtepi16_epi32(__A),
4796314564Sdim                                            (__v16si)_mm512_setzero_si512 ());
4797309124Sdim}
4798309124Sdim
4799341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
4800314564Sdim_mm512_cvtepi16_epi64(__m128i __A)
4801309124Sdim{
4802314564Sdim  return (__m512i)__builtin_convertvector((__v8hi)__A, __v8di);
4803309124Sdim}
4804309124Sdim
4805341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
4806314564Sdim_mm512_mask_cvtepi16_epi64(__m512i __W, __mmask8 __U, __m128i __A)
4807309124Sdim{
4808314564Sdim  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4809314564Sdim                                             (__v8di)_mm512_cvtepi16_epi64(__A),
4810314564Sdim                                             (__v8di)__W);
4811309124Sdim}
4812309124Sdim
4813341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
4814314564Sdim_mm512_maskz_cvtepi16_epi64(__mmask8 __U, __m128i __A)
4815309124Sdim{
4816314564Sdim  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4817314564Sdim                                             (__v8di)_mm512_cvtepi16_epi64(__A),
4818314564Sdim                                             (__v8di)_mm512_setzero_si512());
4819309124Sdim}
4820309124Sdim
4821341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
4822314564Sdim_mm512_cvtepu8_epi32(__m128i __A)
4823309124Sdim{
4824314564Sdim  return (__m512i)__builtin_convertvector((__v16qu)__A, __v16si);
4825309124Sdim}
4826309124Sdim
4827341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
4828314564Sdim_mm512_mask_cvtepu8_epi32(__m512i __W, __mmask16 __U, __m128i __A)
4829309124Sdim{
4830314564Sdim  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
4831314564Sdim                                             (__v16si)_mm512_cvtepu8_epi32(__A),
4832314564Sdim                                             (__v16si)__W);
4833309124Sdim}
4834309124Sdim
4835341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
4836314564Sdim_mm512_maskz_cvtepu8_epi32(__mmask16 __U, __m128i __A)
4837309124Sdim{
4838314564Sdim  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
4839314564Sdim                                             (__v16si)_mm512_cvtepu8_epi32(__A),
4840314564Sdim                                             (__v16si)_mm512_setzero_si512());
4841309124Sdim}
4842309124Sdim
4843341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
4844314564Sdim_mm512_cvtepu8_epi64(__m128i __A)
4845309124Sdim{
4846314564Sdim  return (__m512i)__builtin_convertvector(__builtin_shufflevector((__v16qu)__A, (__v16qu)__A, 0, 1, 2, 3, 4, 5, 6, 7), __v8di);
4847309124Sdim}
4848309124Sdim
4849341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
4850314564Sdim_mm512_mask_cvtepu8_epi64(__m512i __W, __mmask8 __U, __m128i __A)
4851309124Sdim{
4852314564Sdim  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4853314564Sdim                                             (__v8di)_mm512_cvtepu8_epi64(__A),
4854314564Sdim                                             (__v8di)__W);
4855309124Sdim}
4856309124Sdim
4857341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
4858314564Sdim_mm512_maskz_cvtepu8_epi64(__mmask8 __U, __m128i __A)
4859309124Sdim{
4860314564Sdim  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4861314564Sdim                                             (__v8di)_mm512_cvtepu8_epi64(__A),
4862314564Sdim                                             (__v8di)_mm512_setzero_si512());
4863309124Sdim}
4864309124Sdim
4865341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
4866314564Sdim_mm512_cvtepu32_epi64(__m256i __X)
4867309124Sdim{
4868314564Sdim  return (__m512i)__builtin_convertvector((__v8su)__X, __v8di);
4869309124Sdim}
4870309124Sdim
4871341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
4872314564Sdim_mm512_mask_cvtepu32_epi64(__m512i __W, __mmask8 __U, __m256i __X)
4873309124Sdim{
4874314564Sdim  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4875314564Sdim                                             (__v8di)_mm512_cvtepu32_epi64(__X),
4876314564Sdim                                             (__v8di)__W);
4877309124Sdim}
4878309124Sdim
4879341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
4880314564Sdim_mm512_maskz_cvtepu32_epi64(__mmask8 __U, __m256i __X)
4881309124Sdim{
4882314564Sdim  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4883314564Sdim                                             (__v8di)_mm512_cvtepu32_epi64(__X),
4884314564Sdim                                             (__v8di)_mm512_setzero_si512());
4885309124Sdim}
4886309124Sdim
4887341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
4888314564Sdim_mm512_cvtepu16_epi32(__m256i __A)
4889309124Sdim{
4890314564Sdim  return (__m512i)__builtin_convertvector((__v16hu)__A, __v16si);
4891309124Sdim}
4892309124Sdim
4893341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
4894314564Sdim_mm512_mask_cvtepu16_epi32(__m512i __W, __mmask16 __U, __m256i __A)
4895309124Sdim{
4896314564Sdim  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
4897314564Sdim                                            (__v16si)_mm512_cvtepu16_epi32(__A),
4898314564Sdim                                            (__v16si)__W);
4899309124Sdim}
4900309124Sdim
4901341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
4902314564Sdim_mm512_maskz_cvtepu16_epi32(__mmask16 __U, __m256i __A)
4903309124Sdim{
4904314564Sdim  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
4905314564Sdim                                            (__v16si)_mm512_cvtepu16_epi32(__A),
4906314564Sdim                                            (__v16si)_mm512_setzero_si512());
4907309124Sdim}
4908309124Sdim
4909341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
4910314564Sdim_mm512_cvtepu16_epi64(__m128i __A)
4911309124Sdim{
4912314564Sdim  return (__m512i)__builtin_convertvector((__v8hu)__A, __v8di);
4913309124Sdim}
4914309124Sdim
4915341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
4916314564Sdim_mm512_mask_cvtepu16_epi64(__m512i __W, __mmask8 __U, __m128i __A)
4917309124Sdim{
4918314564Sdim  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4919314564Sdim                                             (__v8di)_mm512_cvtepu16_epi64(__A),
4920314564Sdim                                             (__v8di)__W);
4921309124Sdim}
4922309124Sdim
4923341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
4924314564Sdim_mm512_maskz_cvtepu16_epi64(__mmask8 __U, __m128i __A)
4925309124Sdim{
4926314564Sdim  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4927314564Sdim                                             (__v8di)_mm512_cvtepu16_epi64(__A),
4928314564Sdim                                             (__v8di)_mm512_setzero_si512());
4929309124Sdim}
4930309124Sdim
4931341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
4932309124Sdim_mm512_rorv_epi32 (__m512i __A, __m512i __B)
4933309124Sdim{
4934341825Sdim  return (__m512i)__builtin_ia32_prorvd512((__v16si)__A, (__v16si)__B);
4935309124Sdim}
4936309124Sdim
4937341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
4938309124Sdim_mm512_mask_rorv_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
4939309124Sdim{
4940341825Sdim  return (__m512i)__builtin_ia32_selectd_512(__U,
4941341825Sdim                                           (__v16si)_mm512_rorv_epi32(__A, __B),
4942341825Sdim                                           (__v16si)__W);
4943309124Sdim}
4944309124Sdim
4945341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
4946309124Sdim_mm512_maskz_rorv_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
4947309124Sdim{
4948341825Sdim  return (__m512i)__builtin_ia32_selectd_512(__U,
4949341825Sdim                                           (__v16si)_mm512_rorv_epi32(__A, __B),
4950341825Sdim                                           (__v16si)_mm512_setzero_si512());
4951309124Sdim}
4952309124Sdim
4953341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
4954309124Sdim_mm512_rorv_epi64 (__m512i __A, __m512i __B)
4955309124Sdim{
4956341825Sdim  return (__m512i)__builtin_ia32_prorvq512((__v8di)__A, (__v8di)__B);
4957309124Sdim}
4958309124Sdim
4959341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
4960309124Sdim_mm512_mask_rorv_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
4961309124Sdim{
4962341825Sdim  return (__m512i)__builtin_ia32_selectq_512(__U,
4963341825Sdim                                            (__v8di)_mm512_rorv_epi64(__A, __B),
4964341825Sdim                                            (__v8di)__W);
4965309124Sdim}
4966309124Sdim
4967341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
4968309124Sdim_mm512_maskz_rorv_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
4969309124Sdim{
4970341825Sdim  return (__m512i)__builtin_ia32_selectq_512(__U,
4971341825Sdim                                            (__v8di)_mm512_rorv_epi64(__A, __B),
4972341825Sdim                                            (__v8di)_mm512_setzero_si512());
4973309124Sdim}
4974309124Sdim
4975309124Sdim
4976309124Sdim
4977341825Sdim#define _mm512_cmp_epi32_mask(a, b, p) \
4978296417Sdim  (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)(__m512i)(a), \
4979309124Sdim                                         (__v16si)(__m512i)(b), (int)(p), \
4980341825Sdim                                         (__mmask16)-1)
4981288943Sdim
4982341825Sdim#define _mm512_cmp_epu32_mask(a, b, p) \
4983296417Sdim  (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)(__m512i)(a), \
4984309124Sdim                                          (__v16si)(__m512i)(b), (int)(p), \
4985341825Sdim                                          (__mmask16)-1)
4986288943Sdim
4987341825Sdim#define _mm512_cmp_epi64_mask(a, b, p) \
4988296417Sdim  (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)(__m512i)(a), \
4989309124Sdim                                        (__v8di)(__m512i)(b), (int)(p), \
4990341825Sdim                                        (__mmask8)-1)
4991288943Sdim
4992341825Sdim#define _mm512_cmp_epu64_mask(a, b, p) \
4993296417Sdim  (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)(__m512i)(a), \
4994309124Sdim                                         (__v8di)(__m512i)(b), (int)(p), \
4995341825Sdim                                         (__mmask8)-1)
4996288943Sdim
4997341825Sdim#define _mm512_mask_cmp_epi32_mask(m, a, b, p) \
4998296417Sdim  (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)(__m512i)(a), \
4999309124Sdim                                         (__v16si)(__m512i)(b), (int)(p), \
5000341825Sdim                                         (__mmask16)(m))
5001288943Sdim
5002341825Sdim#define _mm512_mask_cmp_epu32_mask(m, a, b, p) \
5003296417Sdim  (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)(__m512i)(a), \
5004309124Sdim                                          (__v16si)(__m512i)(b), (int)(p), \
5005341825Sdim                                          (__mmask16)(m))
5006288943Sdim
5007341825Sdim#define _mm512_mask_cmp_epi64_mask(m, a, b, p) \
5008296417Sdim  (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)(__m512i)(a), \
5009309124Sdim                                        (__v8di)(__m512i)(b), (int)(p), \
5010341825Sdim                                        (__mmask8)(m))
5011288943Sdim
5012341825Sdim#define _mm512_mask_cmp_epu64_mask(m, a, b, p) \
5013296417Sdim  (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)(__m512i)(a), \
5014309124Sdim                                         (__v8di)(__m512i)(b), (int)(p), \
5015341825Sdim                                         (__mmask8)(m))
5016288943Sdim
5017341825Sdim#define _mm512_rol_epi32(a, b) \
5018341825Sdim  (__m512i)__builtin_ia32_prold512((__v16si)(__m512i)(a), (int)(b))
5019309124Sdim
5020341825Sdim#define _mm512_mask_rol_epi32(W, U, a, b) \
5021341825Sdim  (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
5022341825Sdim                                      (__v16si)_mm512_rol_epi32((a), (b)), \
5023341825Sdim                                      (__v16si)(__m512i)(W))
5024309124Sdim
5025341825Sdim#define _mm512_maskz_rol_epi32(U, a, b) \
5026341825Sdim  (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
5027341825Sdim                                      (__v16si)_mm512_rol_epi32((a), (b)), \
5028341825Sdim                                      (__v16si)_mm512_setzero_si512())
5029309124Sdim
5030341825Sdim#define _mm512_rol_epi64(a, b) \
5031341825Sdim  (__m512i)__builtin_ia32_prolq512((__v8di)(__m512i)(a), (int)(b))
5032309124Sdim
5033341825Sdim#define _mm512_mask_rol_epi64(W, U, a, b) \
5034341825Sdim  (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
5035341825Sdim                                      (__v8di)_mm512_rol_epi64((a), (b)), \
5036341825Sdim                                      (__v8di)(__m512i)(W))
5037309124Sdim
5038341825Sdim#define _mm512_maskz_rol_epi64(U, a, b) \
5039341825Sdim  (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
5040341825Sdim                                      (__v8di)_mm512_rol_epi64((a), (b)), \
5041341825Sdim                                      (__v8di)_mm512_setzero_si512())
5042341825Sdim
5043341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
5044309124Sdim_mm512_rolv_epi32 (__m512i __A, __m512i __B)
5045309124Sdim{
5046341825Sdim  return (__m512i)__builtin_ia32_prolvd512((__v16si)__A, (__v16si)__B);
5047309124Sdim}
5048309124Sdim
5049341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
5050309124Sdim_mm512_mask_rolv_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
5051309124Sdim{
5052341825Sdim  return (__m512i)__builtin_ia32_selectd_512(__U,
5053341825Sdim                                           (__v16si)_mm512_rolv_epi32(__A, __B),
5054341825Sdim                                           (__v16si)__W);
5055309124Sdim}
5056309124Sdim
5057341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
5058309124Sdim_mm512_maskz_rolv_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
5059309124Sdim{
5060341825Sdim  return (__m512i)__builtin_ia32_selectd_512(__U,
5061341825Sdim                                           (__v16si)_mm512_rolv_epi32(__A, __B),
5062341825Sdim                                           (__v16si)_mm512_setzero_si512());
5063309124Sdim}
5064309124Sdim
5065341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
5066309124Sdim_mm512_rolv_epi64 (__m512i __A, __m512i __B)
5067309124Sdim{
5068341825Sdim  return (__m512i)__builtin_ia32_prolvq512((__v8di)__A, (__v8di)__B);
5069309124Sdim}
5070309124Sdim
5071341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
5072309124Sdim_mm512_mask_rolv_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
5073309124Sdim{
5074341825Sdim  return (__m512i)__builtin_ia32_selectq_512(__U,
5075341825Sdim                                            (__v8di)_mm512_rolv_epi64(__A, __B),
5076341825Sdim                                            (__v8di)__W);
5077309124Sdim}
5078309124Sdim
5079341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
5080309124Sdim_mm512_maskz_rolv_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
5081309124Sdim{
5082341825Sdim  return (__m512i)__builtin_ia32_selectq_512(__U,
5083341825Sdim                                            (__v8di)_mm512_rolv_epi64(__A, __B),
5084341825Sdim                                            (__v8di)_mm512_setzero_si512());
5085309124Sdim}
5086309124Sdim
5087341825Sdim#define _mm512_ror_epi32(A, B) \
5088341825Sdim  (__m512i)__builtin_ia32_prord512((__v16si)(__m512i)(A), (int)(B))
5089309124Sdim
5090341825Sdim#define _mm512_mask_ror_epi32(W, U, A, B) \
5091341825Sdim  (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
5092341825Sdim                                      (__v16si)_mm512_ror_epi32((A), (B)), \
5093341825Sdim                                      (__v16si)(__m512i)(W))
5094309124Sdim
5095341825Sdim#define _mm512_maskz_ror_epi32(U, A, B) \
5096341825Sdim  (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
5097341825Sdim                                      (__v16si)_mm512_ror_epi32((A), (B)), \
5098341825Sdim                                      (__v16si)_mm512_setzero_si512())
5099309124Sdim
5100341825Sdim#define _mm512_ror_epi64(A, B) \
5101341825Sdim  (__m512i)__builtin_ia32_prorq512((__v8di)(__m512i)(A), (int)(B))
5102309124Sdim
5103341825Sdim#define _mm512_mask_ror_epi64(W, U, A, B) \
5104341825Sdim  (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
5105341825Sdim                                      (__v8di)_mm512_ror_epi64((A), (B)), \
5106341825Sdim                                      (__v8di)(__m512i)(W))
5107309124Sdim
5108341825Sdim#define _mm512_maskz_ror_epi64(U, A, B) \
5109341825Sdim  (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
5110341825Sdim                                      (__v8di)_mm512_ror_epi64((A), (B)), \
5111341825Sdim                                      (__v8di)_mm512_setzero_si512())
5112309124Sdim
5113341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
5114314564Sdim_mm512_slli_epi32(__m512i __A, int __B)
5115314564Sdim{
5116314564Sdim  return (__m512i)__builtin_ia32_pslldi512((__v16si)__A, __B);
5117314564Sdim}
5118309124Sdim
5119341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
5120314564Sdim_mm512_mask_slli_epi32(__m512i __W, __mmask16 __U, __m512i __A, int __B)
5121314564Sdim{
5122314564Sdim  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5123314564Sdim                                         (__v16si)_mm512_slli_epi32(__A, __B),
5124314564Sdim                                         (__v16si)__W);
5125314564Sdim}
5126309124Sdim
5127341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
5128314564Sdim_mm512_maskz_slli_epi32(__mmask16 __U, __m512i __A, int __B) {
5129314564Sdim  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5130314564Sdim                                         (__v16si)_mm512_slli_epi32(__A, __B),
5131314564Sdim                                         (__v16si)_mm512_setzero_si512());
5132314564Sdim}
5133309124Sdim
5134341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
5135314564Sdim_mm512_slli_epi64(__m512i __A, int __B)
5136314564Sdim{
5137314564Sdim  return (__m512i)__builtin_ia32_psllqi512((__v8di)__A, __B);
5138314564Sdim}
5139309124Sdim
5140341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
5141314564Sdim_mm512_mask_slli_epi64(__m512i __W, __mmask8 __U, __m512i __A, int __B)
5142314564Sdim{
5143314564Sdim  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5144314564Sdim                                          (__v8di)_mm512_slli_epi64(__A, __B),
5145314564Sdim                                          (__v8di)__W);
5146314564Sdim}
5147309124Sdim
5148341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
5149314564Sdim_mm512_maskz_slli_epi64(__mmask8 __U, __m512i __A, int __B)
5150314564Sdim{
5151314564Sdim  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5152314564Sdim                                          (__v8di)_mm512_slli_epi64(__A, __B),
5153314564Sdim                                          (__v8di)_mm512_setzero_si512());
5154314564Sdim}
5155309124Sdim
5156341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
5157314564Sdim_mm512_srli_epi32(__m512i __A, int __B)
5158314564Sdim{
5159314564Sdim  return (__m512i)__builtin_ia32_psrldi512((__v16si)__A, __B);
5160314564Sdim}
5161309124Sdim
5162341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
5163314564Sdim_mm512_mask_srli_epi32(__m512i __W, __mmask16 __U, __m512i __A, int __B)
5164314564Sdim{
5165314564Sdim  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5166314564Sdim                                         (__v16si)_mm512_srli_epi32(__A, __B),
5167314564Sdim                                         (__v16si)__W);
5168314564Sdim}
5169309124Sdim
5170341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
5171314564Sdim_mm512_maskz_srli_epi32(__mmask16 __U, __m512i __A, int __B) {
5172314564Sdim  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5173314564Sdim                                         (__v16si)_mm512_srli_epi32(__A, __B),
5174314564Sdim                                         (__v16si)_mm512_setzero_si512());
5175314564Sdim}
5176309124Sdim
5177341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
5178314564Sdim_mm512_srli_epi64(__m512i __A, int __B)
5179314564Sdim{
5180314564Sdim  return (__m512i)__builtin_ia32_psrlqi512((__v8di)__A, __B);
5181314564Sdim}
5182309124Sdim
5183341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
5184314564Sdim_mm512_mask_srli_epi64(__m512i __W, __mmask8 __U, __m512i __A, int __B)
5185314564Sdim{
5186314564Sdim  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5187314564Sdim                                          (__v8di)_mm512_srli_epi64(__A, __B),
5188314564Sdim                                          (__v8di)__W);
5189314564Sdim}
5190309124Sdim
5191341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
5192314564Sdim_mm512_maskz_srli_epi64(__mmask8 __U, __m512i __A, int __B)
5193314564Sdim{
5194314564Sdim  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5195314564Sdim                                          (__v8di)_mm512_srli_epi64(__A, __B),
5196314564Sdim                                          (__v8di)_mm512_setzero_si512());
5197314564Sdim}
5198309124Sdim
5199341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
5200309124Sdim_mm512_mask_load_epi32 (__m512i __W, __mmask16 __U, void const *__P)
5201309124Sdim{
5202309124Sdim  return (__m512i) __builtin_ia32_movdqa32load512_mask ((const __v16si *) __P,
5203309124Sdim              (__v16si) __W,
5204309124Sdim              (__mmask16) __U);
5205309124Sdim}
5206309124Sdim
5207341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
5208309124Sdim_mm512_maskz_load_epi32 (__mmask16 __U, void const *__P)
5209309124Sdim{
5210309124Sdim  return (__m512i) __builtin_ia32_movdqa32load512_mask ((const __v16si *) __P,
5211309124Sdim              (__v16si)
5212309124Sdim              _mm512_setzero_si512 (),
5213309124Sdim              (__mmask16) __U);
5214309124Sdim}
5215309124Sdim
5216341825Sdimstatic __inline__ void __DEFAULT_FN_ATTRS512
5217309124Sdim_mm512_mask_store_epi32 (void *__P, __mmask16 __U, __m512i __A)
5218309124Sdim{
5219309124Sdim  __builtin_ia32_movdqa32store512_mask ((__v16si *) __P, (__v16si) __A,
5220309124Sdim          (__mmask16) __U);
5221309124Sdim}
5222309124Sdim
5223341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
5224309124Sdim_mm512_mask_mov_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
5225309124Sdim{
5226309124Sdim  return (__m512i) __builtin_ia32_selectd_512 ((__mmask16) __U,
5227309124Sdim                 (__v16si) __A,
5228309124Sdim                 (__v16si) __W);
5229309124Sdim}
5230309124Sdim
5231341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
5232309124Sdim_mm512_maskz_mov_epi32 (__mmask16 __U, __m512i __A)
5233309124Sdim{
5234309124Sdim  return (__m512i) __builtin_ia32_selectd_512 ((__mmask16) __U,
5235309124Sdim                 (__v16si) __A,
5236309124Sdim                 (__v16si) _mm512_setzero_si512 ());
5237309124Sdim}
5238309124Sdim
5239341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
5240309124Sdim_mm512_mask_mov_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
5241309124Sdim{
5242309124Sdim  return (__m512i) __builtin_ia32_selectq_512 ((__mmask8) __U,
5243309124Sdim                 (__v8di) __A,
5244309124Sdim                 (__v8di) __W);
5245309124Sdim}
5246309124Sdim
5247341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
5248309124Sdim_mm512_maskz_mov_epi64 (__mmask8 __U, __m512i __A)
5249309124Sdim{
5250309124Sdim  return (__m512i) __builtin_ia32_selectq_512 ((__mmask8) __U,
5251309124Sdim                 (__v8di) __A,
5252309124Sdim                 (__v8di) _mm512_setzero_si512 ());
5253309124Sdim}
5254309124Sdim
5255341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
5256309124Sdim_mm512_mask_load_epi64 (__m512i __W, __mmask8 __U, void const *__P)
5257309124Sdim{
5258309124Sdim  return (__m512i) __builtin_ia32_movdqa64load512_mask ((const __v8di *) __P,
5259309124Sdim              (__v8di) __W,
5260309124Sdim              (__mmask8) __U);
5261309124Sdim}
5262309124Sdim
5263341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
5264309124Sdim_mm512_maskz_load_epi64 (__mmask8 __U, void const *__P)
5265309124Sdim{
5266309124Sdim  return (__m512i) __builtin_ia32_movdqa64load512_mask ((const __v8di *) __P,
5267309124Sdim              (__v8di)
5268309124Sdim              _mm512_setzero_si512 (),
5269309124Sdim              (__mmask8) __U);
5270309124Sdim}
5271309124Sdim
5272341825Sdimstatic __inline__ void __DEFAULT_FN_ATTRS512
5273309124Sdim_mm512_mask_store_epi64 (void *__P, __mmask8 __U, __m512i __A)
5274309124Sdim{
5275309124Sdim  __builtin_ia32_movdqa64store512_mask ((__v8di *) __P, (__v8di) __A,
5276309124Sdim          (__mmask8) __U);
5277309124Sdim}
5278309124Sdim
5279341825Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS512
5280309124Sdim_mm512_movedup_pd (__m512d __A)
5281309124Sdim{
5282309124Sdim  return (__m512d)__builtin_shufflevector((__v8df)__A, (__v8df)__A,
5283309124Sdim                                          0, 0, 2, 2, 4, 4, 6, 6);
5284309124Sdim}
5285309124Sdim
5286341825Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS512
5287309124Sdim_mm512_mask_movedup_pd (__m512d __W, __mmask8 __U, __m512d __A)
5288309124Sdim{
5289309124Sdim  return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
5290309124Sdim                                              (__v8df)_mm512_movedup_pd(__A),
5291309124Sdim                                              (__v8df)__W);
5292309124Sdim}
5293309124Sdim
5294341825Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS512
5295309124Sdim_mm512_maskz_movedup_pd (__mmask8 __U, __m512d __A)
5296309124Sdim{
5297309124Sdim  return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
5298309124Sdim                                              (__v8df)_mm512_movedup_pd(__A),
5299309124Sdim                                              (__v8df)_mm512_setzero_pd());
5300309124Sdim}
5301309124Sdim
5302341825Sdim#define _mm512_fixupimm_round_pd(A, B, C, imm, R) \
5303309124Sdim  (__m512d)__builtin_ia32_fixupimmpd512_mask((__v8df)(__m512d)(A), \
5304309124Sdim                                             (__v8df)(__m512d)(B), \
5305309124Sdim                                             (__v8di)(__m512i)(C), (int)(imm), \
5306341825Sdim                                             (__mmask8)-1, (int)(R))
5307309124Sdim
5308341825Sdim#define _mm512_mask_fixupimm_round_pd(A, U, B, C, imm, R) \
5309309124Sdim  (__m512d)__builtin_ia32_fixupimmpd512_mask((__v8df)(__m512d)(A), \
5310309124Sdim                                             (__v8df)(__m512d)(B), \
5311309124Sdim                                             (__v8di)(__m512i)(C), (int)(imm), \
5312341825Sdim                                             (__mmask8)(U), (int)(R))
5313309124Sdim
5314341825Sdim#define _mm512_fixupimm_pd(A, B, C, imm) \
5315309124Sdim  (__m512d)__builtin_ia32_fixupimmpd512_mask((__v8df)(__m512d)(A), \
5316309124Sdim                                             (__v8df)(__m512d)(B), \
5317309124Sdim                                             (__v8di)(__m512i)(C), (int)(imm), \
5318309124Sdim                                             (__mmask8)-1, \
5319341825Sdim                                             _MM_FROUND_CUR_DIRECTION)
5320309124Sdim
5321341825Sdim#define _mm512_mask_fixupimm_pd(A, U, B, C, imm) \
5322309124Sdim  (__m512d)__builtin_ia32_fixupimmpd512_mask((__v8df)(__m512d)(A), \
5323309124Sdim                                             (__v8df)(__m512d)(B), \
5324309124Sdim                                             (__v8di)(__m512i)(C), (int)(imm), \
5325309124Sdim                                             (__mmask8)(U), \
5326341825Sdim                                             _MM_FROUND_CUR_DIRECTION)
5327309124Sdim
5328341825Sdim#define _mm512_maskz_fixupimm_round_pd(U, A, B, C, imm, R) \
5329309124Sdim  (__m512d)__builtin_ia32_fixupimmpd512_maskz((__v8df)(__m512d)(A), \
5330309124Sdim                                              (__v8df)(__m512d)(B), \
5331309124Sdim                                              (__v8di)(__m512i)(C), \
5332309124Sdim                                              (int)(imm), (__mmask8)(U), \
5333341825Sdim                                              (int)(R))
5334309124Sdim
5335341825Sdim#define _mm512_maskz_fixupimm_pd(U, A, B, C, imm) \
5336309124Sdim  (__m512d)__builtin_ia32_fixupimmpd512_maskz((__v8df)(__m512d)(A), \
5337309124Sdim                                              (__v8df)(__m512d)(B), \
5338309124Sdim                                              (__v8di)(__m512i)(C), \
5339309124Sdim                                              (int)(imm), (__mmask8)(U), \
5340341825Sdim                                              _MM_FROUND_CUR_DIRECTION)
5341309124Sdim
5342341825Sdim#define _mm512_fixupimm_round_ps(A, B, C, imm, R) \
5343309124Sdim  (__m512)__builtin_ia32_fixupimmps512_mask((__v16sf)(__m512)(A), \
5344309124Sdim                                            (__v16sf)(__m512)(B), \
5345309124Sdim                                            (__v16si)(__m512i)(C), (int)(imm), \
5346341825Sdim                                            (__mmask16)-1, (int)(R))
5347309124Sdim
5348341825Sdim#define _mm512_mask_fixupimm_round_ps(A, U, B, C, imm, R) \
5349309124Sdim  (__m512)__builtin_ia32_fixupimmps512_mask((__v16sf)(__m512)(A), \
5350309124Sdim                                            (__v16sf)(__m512)(B), \
5351309124Sdim                                            (__v16si)(__m512i)(C), (int)(imm), \
5352341825Sdim                                            (__mmask16)(U), (int)(R))
5353309124Sdim
5354341825Sdim#define _mm512_fixupimm_ps(A, B, C, imm) \
5355309124Sdim  (__m512)__builtin_ia32_fixupimmps512_mask((__v16sf)(__m512)(A), \
5356309124Sdim                                            (__v16sf)(__m512)(B), \
5357309124Sdim                                            (__v16si)(__m512i)(C), (int)(imm), \
5358309124Sdim                                            (__mmask16)-1, \
5359341825Sdim                                            _MM_FROUND_CUR_DIRECTION)
5360309124Sdim
5361341825Sdim#define _mm512_mask_fixupimm_ps(A, U, B, C, imm) \
5362309124Sdim  (__m512)__builtin_ia32_fixupimmps512_mask((__v16sf)(__m512)(A), \
5363309124Sdim                                            (__v16sf)(__m512)(B), \
5364309124Sdim                                            (__v16si)(__m512i)(C), (int)(imm), \
5365309124Sdim                                            (__mmask16)(U), \
5366341825Sdim                                            _MM_FROUND_CUR_DIRECTION)
5367309124Sdim
5368341825Sdim#define _mm512_maskz_fixupimm_round_ps(U, A, B, C, imm, R) \
5369309124Sdim  (__m512)__builtin_ia32_fixupimmps512_maskz((__v16sf)(__m512)(A), \
5370309124Sdim                                             (__v16sf)(__m512)(B), \
5371309124Sdim                                             (__v16si)(__m512i)(C), \
5372309124Sdim                                             (int)(imm), (__mmask16)(U), \
5373341825Sdim                                             (int)(R))
5374309124Sdim
5375341825Sdim#define _mm512_maskz_fixupimm_ps(U, A, B, C, imm) \
5376309124Sdim  (__m512)__builtin_ia32_fixupimmps512_maskz((__v16sf)(__m512)(A), \
5377309124Sdim                                             (__v16sf)(__m512)(B), \
5378309124Sdim                                             (__v16si)(__m512i)(C), \
5379309124Sdim                                             (int)(imm), (__mmask16)(U), \
5380341825Sdim                                             _MM_FROUND_CUR_DIRECTION)
5381309124Sdim
5382341825Sdim#define _mm_fixupimm_round_sd(A, B, C, imm, R) \
5383309124Sdim  (__m128d)__builtin_ia32_fixupimmsd_mask((__v2df)(__m128d)(A), \
5384309124Sdim                                          (__v2df)(__m128d)(B), \
5385309124Sdim                                          (__v2di)(__m128i)(C), (int)(imm), \
5386341825Sdim                                          (__mmask8)-1, (int)(R))
5387309124Sdim
5388341825Sdim#define _mm_mask_fixupimm_round_sd(A, U, B, C, imm, R) \
5389309124Sdim  (__m128d)__builtin_ia32_fixupimmsd_mask((__v2df)(__m128d)(A), \
5390309124Sdim                                          (__v2df)(__m128d)(B), \
5391309124Sdim                                          (__v2di)(__m128i)(C), (int)(imm), \
5392341825Sdim                                          (__mmask8)(U), (int)(R))
5393309124Sdim
5394341825Sdim#define _mm_fixupimm_sd(A, B, C, imm) \
5395309124Sdim  (__m128d)__builtin_ia32_fixupimmsd_mask((__v2df)(__m128d)(A), \
5396309124Sdim                                          (__v2df)(__m128d)(B), \
5397309124Sdim                                          (__v2di)(__m128i)(C), (int)(imm), \
5398309124Sdim                                          (__mmask8)-1, \
5399341825Sdim                                          _MM_FROUND_CUR_DIRECTION)
5400309124Sdim
5401341825Sdim#define _mm_mask_fixupimm_sd(A, U, B, C, imm) \
5402309124Sdim  (__m128d)__builtin_ia32_fixupimmsd_mask((__v2df)(__m128d)(A), \
5403309124Sdim                                          (__v2df)(__m128d)(B), \
5404309124Sdim                                          (__v2di)(__m128i)(C), (int)(imm), \
5405309124Sdim                                          (__mmask8)(U), \
5406341825Sdim                                          _MM_FROUND_CUR_DIRECTION)
5407309124Sdim
5408341825Sdim#define _mm_maskz_fixupimm_round_sd(U, A, B, C, imm, R) \
5409309124Sdim  (__m128d)__builtin_ia32_fixupimmsd_maskz((__v2df)(__m128d)(A), \
5410309124Sdim                                           (__v2df)(__m128d)(B), \
5411309124Sdim                                           (__v2di)(__m128i)(C), (int)(imm), \
5412341825Sdim                                           (__mmask8)(U), (int)(R))
5413309124Sdim
5414341825Sdim#define _mm_maskz_fixupimm_sd(U, A, B, C, imm) \
5415309124Sdim  (__m128d)__builtin_ia32_fixupimmsd_maskz((__v2df)(__m128d)(A), \
5416309124Sdim                                           (__v2df)(__m128d)(B), \
5417309124Sdim                                           (__v2di)(__m128i)(C), (int)(imm), \
5418309124Sdim                                           (__mmask8)(U), \
5419341825Sdim                                           _MM_FROUND_CUR_DIRECTION)
5420309124Sdim
5421341825Sdim#define _mm_fixupimm_round_ss(A, B, C, imm, R) \
5422309124Sdim  (__m128)__builtin_ia32_fixupimmss_mask((__v4sf)(__m128)(A), \
5423309124Sdim                                         (__v4sf)(__m128)(B), \
5424309124Sdim                                         (__v4si)(__m128i)(C), (int)(imm), \
5425341825Sdim                                         (__mmask8)-1, (int)(R))
5426309124Sdim
5427341825Sdim#define _mm_mask_fixupimm_round_ss(A, U, B, C, imm, R) \
5428309124Sdim  (__m128)__builtin_ia32_fixupimmss_mask((__v4sf)(__m128)(A), \
5429309124Sdim                                         (__v4sf)(__m128)(B), \
5430309124Sdim                                         (__v4si)(__m128i)(C), (int)(imm), \
5431341825Sdim                                         (__mmask8)(U), (int)(R))
5432309124Sdim
5433341825Sdim#define _mm_fixupimm_ss(A, B, C, imm) \
5434309124Sdim  (__m128)__builtin_ia32_fixupimmss_mask((__v4sf)(__m128)(A), \
5435309124Sdim                                         (__v4sf)(__m128)(B), \
5436309124Sdim                                         (__v4si)(__m128i)(C), (int)(imm), \
5437309124Sdim                                         (__mmask8)-1, \
5438341825Sdim                                         _MM_FROUND_CUR_DIRECTION)
5439309124Sdim
5440341825Sdim#define _mm_mask_fixupimm_ss(A, U, B, C, imm) \
5441309124Sdim  (__m128)__builtin_ia32_fixupimmss_mask((__v4sf)(__m128)(A), \
5442309124Sdim                                         (__v4sf)(__m128)(B), \
5443309124Sdim                                         (__v4si)(__m128i)(C), (int)(imm), \
5444309124Sdim                                         (__mmask8)(U), \
5445341825Sdim                                         _MM_FROUND_CUR_DIRECTION)
5446309124Sdim
5447341825Sdim#define _mm_maskz_fixupimm_round_ss(U, A, B, C, imm, R) \
5448309124Sdim  (__m128)__builtin_ia32_fixupimmss_maskz((__v4sf)(__m128)(A), \
5449309124Sdim                                          (__v4sf)(__m128)(B), \
5450309124Sdim                                          (__v4si)(__m128i)(C), (int)(imm), \
5451341825Sdim                                          (__mmask8)(U), (int)(R))
5452309124Sdim
5453341825Sdim#define _mm_maskz_fixupimm_ss(U, A, B, C, imm) \
5454309124Sdim  (__m128)__builtin_ia32_fixupimmss_maskz((__v4sf)(__m128)(A), \
5455309124Sdim                                          (__v4sf)(__m128)(B), \
5456309124Sdim                                          (__v4si)(__m128i)(C), (int)(imm), \
5457309124Sdim                                          (__mmask8)(U), \
5458341825Sdim                                          _MM_FROUND_CUR_DIRECTION)
5459309124Sdim
5460341825Sdim#define _mm_getexp_round_sd(A, B, R) \
5461309124Sdim  (__m128d)__builtin_ia32_getexpsd128_round_mask((__v2df)(__m128d)(A), \
5462309124Sdim                                                 (__v2df)(__m128d)(B), \
5463309124Sdim                                                 (__v2df)_mm_setzero_pd(), \
5464341825Sdim                                                 (__mmask8)-1, (int)(R))
5465309124Sdim
5466309124Sdim
5467341825Sdimstatic __inline__ __m128d __DEFAULT_FN_ATTRS128
5468309124Sdim_mm_getexp_sd (__m128d __A, __m128d __B)
5469309124Sdim{
5470309124Sdim  return (__m128d) __builtin_ia32_getexpsd128_round_mask ((__v2df) __A,
5471309124Sdim                 (__v2df) __B, (__v2df) _mm_setzero_pd(), (__mmask8) -1, _MM_FROUND_CUR_DIRECTION);
5472309124Sdim}
5473309124Sdim
5474341825Sdimstatic __inline__ __m128d __DEFAULT_FN_ATTRS128
5475309124Sdim_mm_mask_getexp_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
5476309124Sdim{
5477309124Sdim return (__m128d) __builtin_ia32_getexpsd128_round_mask ( (__v2df) __A,
5478309124Sdim          (__v2df) __B,
5479309124Sdim          (__v2df) __W,
5480309124Sdim          (__mmask8) __U,
5481309124Sdim          _MM_FROUND_CUR_DIRECTION);
5482309124Sdim}
5483309124Sdim
5484341825Sdim#define _mm_mask_getexp_round_sd(W, U, A, B, R) \
5485309124Sdim  (__m128d)__builtin_ia32_getexpsd128_round_mask((__v2df)(__m128d)(A), \
5486309124Sdim                                                 (__v2df)(__m128d)(B), \
5487309124Sdim                                                 (__v2df)(__m128d)(W), \
5488341825Sdim                                                 (__mmask8)(U), (int)(R))
5489309124Sdim
5490341825Sdimstatic __inline__ __m128d __DEFAULT_FN_ATTRS128
5491309124Sdim_mm_maskz_getexp_sd (__mmask8 __U, __m128d __A, __m128d __B)
5492309124Sdim{
5493309124Sdim return (__m128d) __builtin_ia32_getexpsd128_round_mask ( (__v2df) __A,
5494309124Sdim          (__v2df) __B,
5495309124Sdim          (__v2df) _mm_setzero_pd (),
5496309124Sdim          (__mmask8) __U,
5497309124Sdim          _MM_FROUND_CUR_DIRECTION);
5498309124Sdim}
5499309124Sdim
5500341825Sdim#define _mm_maskz_getexp_round_sd(U, A, B, R) \
5501309124Sdim  (__m128d)__builtin_ia32_getexpsd128_round_mask((__v2df)(__m128d)(A), \
5502309124Sdim                                                 (__v2df)(__m128d)(B), \
5503309124Sdim                                                 (__v2df)_mm_setzero_pd(), \
5504341825Sdim                                                 (__mmask8)(U), (int)(R))
5505309124Sdim
5506341825Sdim#define _mm_getexp_round_ss(A, B, R) \
5507309124Sdim  (__m128)__builtin_ia32_getexpss128_round_mask((__v4sf)(__m128)(A), \
5508309124Sdim                                                (__v4sf)(__m128)(B), \
5509309124Sdim                                                (__v4sf)_mm_setzero_ps(), \
5510341825Sdim                                                (__mmask8)-1, (int)(R))
5511309124Sdim
5512341825Sdimstatic __inline__ __m128 __DEFAULT_FN_ATTRS128
5513309124Sdim_mm_getexp_ss (__m128 __A, __m128 __B)
5514309124Sdim{
5515309124Sdim  return (__m128) __builtin_ia32_getexpss128_round_mask ((__v4sf) __A,
5516309124Sdim                (__v4sf) __B, (__v4sf)  _mm_setzero_ps(), (__mmask8) -1, _MM_FROUND_CUR_DIRECTION);
5517309124Sdim}
5518309124Sdim
5519341825Sdimstatic __inline__ __m128 __DEFAULT_FN_ATTRS128
5520309124Sdim_mm_mask_getexp_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
5521309124Sdim{
5522309124Sdim return (__m128) __builtin_ia32_getexpss128_round_mask ((__v4sf) __A,
5523309124Sdim          (__v4sf) __B,
5524309124Sdim          (__v4sf) __W,
5525309124Sdim          (__mmask8) __U,
5526309124Sdim          _MM_FROUND_CUR_DIRECTION);
5527309124Sdim}
5528309124Sdim
5529341825Sdim#define _mm_mask_getexp_round_ss(W, U, A, B, R) \
5530309124Sdim  (__m128)__builtin_ia32_getexpss128_round_mask((__v4sf)(__m128)(A), \
5531309124Sdim                                                (__v4sf)(__m128)(B), \
5532309124Sdim                                                (__v4sf)(__m128)(W), \
5533341825Sdim                                                (__mmask8)(U), (int)(R))
5534309124Sdim
5535341825Sdimstatic __inline__ __m128 __DEFAULT_FN_ATTRS128
5536309124Sdim_mm_maskz_getexp_ss (__mmask8 __U, __m128 __A, __m128 __B)
5537309124Sdim{
5538309124Sdim return (__m128) __builtin_ia32_getexpss128_round_mask ((__v4sf) __A,
5539309124Sdim          (__v4sf) __B,
5540341825Sdim          (__v4sf) _mm_setzero_ps (),
5541309124Sdim          (__mmask8) __U,
5542309124Sdim          _MM_FROUND_CUR_DIRECTION);
5543309124Sdim}
5544309124Sdim
5545341825Sdim#define _mm_maskz_getexp_round_ss(U, A, B, R) \
5546309124Sdim  (__m128)__builtin_ia32_getexpss128_round_mask((__v4sf)(__m128)(A), \
5547309124Sdim                                                (__v4sf)(__m128)(B), \
5548309124Sdim                                                (__v4sf)_mm_setzero_ps(), \
5549341825Sdim                                                (__mmask8)(U), (int)(R))
5550309124Sdim
5551341825Sdim#define _mm_getmant_round_sd(A, B, C, D, R) \
5552309124Sdim  (__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \
5553309124Sdim                                               (__v2df)(__m128d)(B), \
5554309124Sdim                                               (int)(((D)<<2) | (C)), \
5555309124Sdim                                               (__v2df)_mm_setzero_pd(), \
5556341825Sdim                                               (__mmask8)-1, (int)(R))
5557309124Sdim
5558341825Sdim#define _mm_getmant_sd(A, B, C, D)  \
5559309124Sdim  (__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \
5560309124Sdim                                               (__v2df)(__m128d)(B), \
5561309124Sdim                                               (int)(((D)<<2) | (C)), \
5562309124Sdim                                               (__v2df)_mm_setzero_pd(), \
5563309124Sdim                                               (__mmask8)-1, \
5564341825Sdim                                               _MM_FROUND_CUR_DIRECTION)
5565309124Sdim
5566341825Sdim#define _mm_mask_getmant_sd(W, U, A, B, C, D) \
5567309124Sdim  (__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \
5568309124Sdim                                               (__v2df)(__m128d)(B), \
5569309124Sdim                                               (int)(((D)<<2) | (C)), \
5570309124Sdim                                               (__v2df)(__m128d)(W), \
5571309124Sdim                                               (__mmask8)(U), \
5572341825Sdim                                               _MM_FROUND_CUR_DIRECTION)
5573309124Sdim
5574341825Sdim#define _mm_mask_getmant_round_sd(W, U, A, B, C, D, R) \
5575309124Sdim  (__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \
5576309124Sdim                                               (__v2df)(__m128d)(B), \
5577309124Sdim                                               (int)(((D)<<2) | (C)), \
5578309124Sdim                                               (__v2df)(__m128d)(W), \
5579341825Sdim                                               (__mmask8)(U), (int)(R))
5580309124Sdim
5581341825Sdim#define _mm_maskz_getmant_sd(U, A, B, C, D) \
5582309124Sdim  (__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \
5583309124Sdim                                               (__v2df)(__m128d)(B), \
5584309124Sdim                                               (int)(((D)<<2) | (C)), \
5585309124Sdim                                               (__v2df)_mm_setzero_pd(), \
5586309124Sdim                                               (__mmask8)(U), \
5587341825Sdim                                               _MM_FROUND_CUR_DIRECTION)
5588309124Sdim
5589341825Sdim#define _mm_maskz_getmant_round_sd(U, A, B, C, D, R) \
5590309124Sdim  (__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \
5591309124Sdim                                               (__v2df)(__m128d)(B), \
5592309124Sdim                                               (int)(((D)<<2) | (C)), \
5593309124Sdim                                               (__v2df)_mm_setzero_pd(), \
5594341825Sdim                                               (__mmask8)(U), (int)(R))
5595309124Sdim
5596341825Sdim#define _mm_getmant_round_ss(A, B, C, D, R) \
5597309124Sdim  (__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \
5598309124Sdim                                              (__v4sf)(__m128)(B), \
5599309124Sdim                                              (int)(((D)<<2) | (C)), \
5600309124Sdim                                              (__v4sf)_mm_setzero_ps(), \
5601341825Sdim                                              (__mmask8)-1, (int)(R))
5602309124Sdim
5603341825Sdim#define _mm_getmant_ss(A, B, C, D) \
5604309124Sdim  (__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \
5605309124Sdim                                              (__v4sf)(__m128)(B), \
5606309124Sdim                                              (int)(((D)<<2) | (C)), \
5607309124Sdim                                              (__v4sf)_mm_setzero_ps(), \
5608309124Sdim                                              (__mmask8)-1, \
5609341825Sdim                                              _MM_FROUND_CUR_DIRECTION)
5610309124Sdim
5611341825Sdim#define _mm_mask_getmant_ss(W, U, A, B, C, D) \
5612309124Sdim  (__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \
5613309124Sdim                                              (__v4sf)(__m128)(B), \
5614309124Sdim                                              (int)(((D)<<2) | (C)), \
5615309124Sdim                                              (__v4sf)(__m128)(W), \
5616309124Sdim                                              (__mmask8)(U), \
5617341825Sdim                                              _MM_FROUND_CUR_DIRECTION)
5618309124Sdim
5619341825Sdim#define _mm_mask_getmant_round_ss(W, U, A, B, C, D, R) \
5620309124Sdim  (__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \
5621309124Sdim                                              (__v4sf)(__m128)(B), \
5622309124Sdim                                              (int)(((D)<<2) | (C)), \
5623309124Sdim                                              (__v4sf)(__m128)(W), \
5624341825Sdim                                              (__mmask8)(U), (int)(R))
5625309124Sdim
5626341825Sdim#define _mm_maskz_getmant_ss(U, A, B, C, D) \
5627309124Sdim  (__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \
5628309124Sdim                                              (__v4sf)(__m128)(B), \
5629309124Sdim                                              (int)(((D)<<2) | (C)), \
5630341825Sdim                                              (__v4sf)_mm_setzero_ps(), \
5631309124Sdim                                              (__mmask8)(U), \
5632341825Sdim                                              _MM_FROUND_CUR_DIRECTION)
5633309124Sdim
5634341825Sdim#define _mm_maskz_getmant_round_ss(U, A, B, C, D, R) \
5635309124Sdim  (__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \
5636309124Sdim                                              (__v4sf)(__m128)(B), \
5637309124Sdim                                              (int)(((D)<<2) | (C)), \
5638309124Sdim                                              (__v4sf)_mm_setzero_ps(), \
5639341825Sdim                                              (__mmask8)(U), (int)(R))
5640309124Sdim
5641344779Sdimstatic __inline__ __mmask16 __DEFAULT_FN_ATTRS
5642309124Sdim_mm512_kmov (__mmask16 __A)
5643309124Sdim{
5644309124Sdim  return  __A;
5645309124Sdim}
5646309124Sdim
5647341825Sdim#define _mm_comi_round_sd(A, B, P, R) \
5648309124Sdim  (int)__builtin_ia32_vcomisd((__v2df)(__m128d)(A), (__v2df)(__m128d)(B), \
5649341825Sdim                              (int)(P), (int)(R))
5650309124Sdim
5651341825Sdim#define _mm_comi_round_ss(A, B, P, R) \
5652309124Sdim  (int)__builtin_ia32_vcomiss((__v4sf)(__m128)(A), (__v4sf)(__m128)(B), \
5653341825Sdim                              (int)(P), (int)(R))
5654309124Sdim
5655314564Sdim#ifdef __x86_64__
5656341825Sdim#define _mm_cvt_roundsd_si64(A, R) \
5657341825Sdim  (long long)__builtin_ia32_vcvtsd2si64((__v2df)(__m128d)(A), (int)(R))
5658314564Sdim#endif
5659309124Sdim
5660341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
5661314564Sdim_mm512_sll_epi32(__m512i __A, __m128i __B)
5662309124Sdim{
5663314564Sdim  return (__m512i)__builtin_ia32_pslld512((__v16si) __A, (__v4si)__B);
5664309124Sdim}
5665309124Sdim
5666341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
5667314564Sdim_mm512_mask_sll_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
5668309124Sdim{
5669314564Sdim  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5670314564Sdim                                          (__v16si)_mm512_sll_epi32(__A, __B),
5671314564Sdim                                          (__v16si)__W);
5672309124Sdim}
5673309124Sdim
5674341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
5675314564Sdim_mm512_maskz_sll_epi32(__mmask16 __U, __m512i __A, __m128i __B)
5676309124Sdim{
5677314564Sdim  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5678314564Sdim                                          (__v16si)_mm512_sll_epi32(__A, __B),
5679314564Sdim                                          (__v16si)_mm512_setzero_si512());
5680309124Sdim}
5681309124Sdim
5682341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
5683314564Sdim_mm512_sll_epi64(__m512i __A, __m128i __B)
5684309124Sdim{
5685314564Sdim  return (__m512i)__builtin_ia32_psllq512((__v8di)__A, (__v2di)__B);
5686309124Sdim}
5687309124Sdim
5688341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
5689314564Sdim_mm512_mask_sll_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
5690309124Sdim{
5691314564Sdim  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5692314564Sdim                                             (__v8di)_mm512_sll_epi64(__A, __B),
5693314564Sdim                                             (__v8di)__W);
5694309124Sdim}
5695309124Sdim
5696341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
5697314564Sdim_mm512_maskz_sll_epi64(__mmask8 __U, __m512i __A, __m128i __B)
5698309124Sdim{
5699314564Sdim  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5700314564Sdim                                           (__v8di)_mm512_sll_epi64(__A, __B),
5701314564Sdim                                           (__v8di)_mm512_setzero_si512());
5702309124Sdim}
5703309124Sdim
5704341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
5705314564Sdim_mm512_sllv_epi32(__m512i __X, __m512i __Y)
5706309124Sdim{
5707314564Sdim  return (__m512i)__builtin_ia32_psllv16si((__v16si)__X, (__v16si)__Y);
5708309124Sdim}
5709309124Sdim
5710341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
5711314564Sdim_mm512_mask_sllv_epi32(__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
5712309124Sdim{
5713314564Sdim  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5714314564Sdim                                           (__v16si)_mm512_sllv_epi32(__X, __Y),
5715314564Sdim                                           (__v16si)__W);
5716309124Sdim}
5717309124Sdim
5718341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
5719314564Sdim_mm512_maskz_sllv_epi32(__mmask16 __U, __m512i __X, __m512i __Y)
5720309124Sdim{
5721314564Sdim  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5722314564Sdim                                           (__v16si)_mm512_sllv_epi32(__X, __Y),
5723314564Sdim                                           (__v16si)_mm512_setzero_si512());
5724309124Sdim}
5725309124Sdim
5726341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
5727314564Sdim_mm512_sllv_epi64(__m512i __X, __m512i __Y)
5728309124Sdim{
5729314564Sdim  return (__m512i)__builtin_ia32_psllv8di((__v8di)__X, (__v8di)__Y);
5730309124Sdim}
5731309124Sdim
5732341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
5733314564Sdim_mm512_mask_sllv_epi64(__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
5734309124Sdim{
5735314564Sdim  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5736314564Sdim                                            (__v8di)_mm512_sllv_epi64(__X, __Y),
5737314564Sdim                                            (__v8di)__W);
5738309124Sdim}
5739309124Sdim
5740341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
5741314564Sdim_mm512_maskz_sllv_epi64(__mmask8 __U, __m512i __X, __m512i __Y)
5742309124Sdim{
5743314564Sdim  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5744314564Sdim                                            (__v8di)_mm512_sllv_epi64(__X, __Y),
5745314564Sdim                                            (__v8di)_mm512_setzero_si512());
5746309124Sdim}
5747309124Sdim
5748341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
5749314564Sdim_mm512_sra_epi32(__m512i __A, __m128i __B)
5750309124Sdim{
5751314564Sdim  return (__m512i)__builtin_ia32_psrad512((__v16si) __A, (__v4si)__B);
5752309124Sdim}
5753309124Sdim
5754341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
5755314564Sdim_mm512_mask_sra_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
5756309124Sdim{
5757314564Sdim  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5758314564Sdim                                          (__v16si)_mm512_sra_epi32(__A, __B),
5759314564Sdim                                          (__v16si)__W);
5760309124Sdim}
5761309124Sdim
5762341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
5763314564Sdim_mm512_maskz_sra_epi32(__mmask16 __U, __m512i __A, __m128i __B)
5764309124Sdim{
5765314564Sdim  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5766314564Sdim                                          (__v16si)_mm512_sra_epi32(__A, __B),
5767314564Sdim                                          (__v16si)_mm512_setzero_si512());
5768309124Sdim}
5769309124Sdim
5770341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
5771314564Sdim_mm512_sra_epi64(__m512i __A, __m128i __B)
5772309124Sdim{
5773314564Sdim  return (__m512i)__builtin_ia32_psraq512((__v8di)__A, (__v2di)__B);
5774309124Sdim}
5775309124Sdim
5776341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
5777314564Sdim_mm512_mask_sra_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
5778309124Sdim{
5779314564Sdim  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5780314564Sdim                                           (__v8di)_mm512_sra_epi64(__A, __B),
5781314564Sdim                                           (__v8di)__W);
5782309124Sdim}
5783309124Sdim
5784341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
5785314564Sdim_mm512_maskz_sra_epi64(__mmask8 __U, __m512i __A, __m128i __B)
5786309124Sdim{
5787314564Sdim  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5788314564Sdim                                           (__v8di)_mm512_sra_epi64(__A, __B),
5789314564Sdim                                           (__v8di)_mm512_setzero_si512());
5790309124Sdim}
5791309124Sdim
5792341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
5793314564Sdim_mm512_srav_epi32(__m512i __X, __m512i __Y)
5794309124Sdim{
5795314564Sdim  return (__m512i)__builtin_ia32_psrav16si((__v16si)__X, (__v16si)__Y);
5796309124Sdim}
5797309124Sdim
5798341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
5799314564Sdim_mm512_mask_srav_epi32(__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
5800309124Sdim{
5801314564Sdim  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5802314564Sdim                                           (__v16si)_mm512_srav_epi32(__X, __Y),
5803314564Sdim                                           (__v16si)__W);
5804309124Sdim}
5805309124Sdim
5806341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
5807314564Sdim_mm512_maskz_srav_epi32(__mmask16 __U, __m512i __X, __m512i __Y)
5808309124Sdim{
5809314564Sdim  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5810314564Sdim                                           (__v16si)_mm512_srav_epi32(__X, __Y),
5811314564Sdim                                           (__v16si)_mm512_setzero_si512());
5812309124Sdim}
5813309124Sdim
5814341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
5815314564Sdim_mm512_srav_epi64(__m512i __X, __m512i __Y)
5816309124Sdim{
5817314564Sdim  return (__m512i)__builtin_ia32_psrav8di((__v8di)__X, (__v8di)__Y);
5818309124Sdim}
5819309124Sdim
5820341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
5821314564Sdim_mm512_mask_srav_epi64(__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
5822309124Sdim{
5823314564Sdim  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5824314564Sdim                                            (__v8di)_mm512_srav_epi64(__X, __Y),
5825314564Sdim                                            (__v8di)__W);
5826309124Sdim}
5827309124Sdim
5828341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
5829314564Sdim_mm512_maskz_srav_epi64(__mmask8 __U, __m512i __X, __m512i __Y)
5830309124Sdim{
5831314564Sdim  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5832314564Sdim                                            (__v8di)_mm512_srav_epi64(__X, __Y),
5833314564Sdim                                            (__v8di)_mm512_setzero_si512());
5834309124Sdim}
5835309124Sdim
5836341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
5837314564Sdim_mm512_srl_epi32(__m512i __A, __m128i __B)
5838309124Sdim{
5839314564Sdim  return (__m512i)__builtin_ia32_psrld512((__v16si) __A, (__v4si)__B);
5840309124Sdim}
5841309124Sdim
5842341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
5843314564Sdim_mm512_mask_srl_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
5844309124Sdim{
5845314564Sdim  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5846314564Sdim                                          (__v16si)_mm512_srl_epi32(__A, __B),
5847314564Sdim                                          (__v16si)__W);
5848309124Sdim}
5849309124Sdim
5850341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
5851314564Sdim_mm512_maskz_srl_epi32(__mmask16 __U, __m512i __A, __m128i __B)
5852309124Sdim{
5853314564Sdim  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5854314564Sdim                                          (__v16si)_mm512_srl_epi32(__A, __B),
5855314564Sdim                                          (__v16si)_mm512_setzero_si512());
5856309124Sdim}
5857309124Sdim
5858341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
5859314564Sdim_mm512_srl_epi64(__m512i __A, __m128i __B)
5860309124Sdim{
5861314564Sdim  return (__m512i)__builtin_ia32_psrlq512((__v8di)__A, (__v2di)__B);
5862309124Sdim}
5863309124Sdim
5864341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
5865314564Sdim_mm512_mask_srl_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
5866309124Sdim{
5867314564Sdim  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5868314564Sdim                                           (__v8di)_mm512_srl_epi64(__A, __B),
5869314564Sdim                                           (__v8di)__W);
5870309124Sdim}
5871309124Sdim
5872341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
5873314564Sdim_mm512_maskz_srl_epi64(__mmask8 __U, __m512i __A, __m128i __B)
5874309124Sdim{
5875314564Sdim  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5876314564Sdim                                           (__v8di)_mm512_srl_epi64(__A, __B),
5877314564Sdim                                           (__v8di)_mm512_setzero_si512());
5878309124Sdim}
5879309124Sdim
5880341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
5881314564Sdim_mm512_srlv_epi32(__m512i __X, __m512i __Y)
5882309124Sdim{
5883314564Sdim  return (__m512i)__builtin_ia32_psrlv16si((__v16si)__X, (__v16si)__Y);
5884309124Sdim}
5885309124Sdim
5886341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
5887314564Sdim_mm512_mask_srlv_epi32(__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
5888309124Sdim{
5889314564Sdim  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5890314564Sdim                                           (__v16si)_mm512_srlv_epi32(__X, __Y),
5891314564Sdim                                           (__v16si)__W);
5892309124Sdim}
5893309124Sdim
5894341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
5895314564Sdim_mm512_maskz_srlv_epi32(__mmask16 __U, __m512i __X, __m512i __Y)
5896309124Sdim{
5897314564Sdim  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5898314564Sdim                                           (__v16si)_mm512_srlv_epi32(__X, __Y),
5899314564Sdim                                           (__v16si)_mm512_setzero_si512());
5900309124Sdim}
5901309124Sdim
5902341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
5903309124Sdim_mm512_srlv_epi64 (__m512i __X, __m512i __Y)
5904309124Sdim{
5905314564Sdim  return (__m512i)__builtin_ia32_psrlv8di((__v8di)__X, (__v8di)__Y);
5906309124Sdim}
5907309124Sdim
5908341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
5909314564Sdim_mm512_mask_srlv_epi64(__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
5910309124Sdim{
5911314564Sdim  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5912314564Sdim                                            (__v8di)_mm512_srlv_epi64(__X, __Y),
5913314564Sdim                                            (__v8di)__W);
5914309124Sdim}
5915309124Sdim
5916341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
5917314564Sdim_mm512_maskz_srlv_epi64(__mmask8 __U, __m512i __X, __m512i __Y)
5918309124Sdim{
5919314564Sdim  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5920314564Sdim                                            (__v8di)_mm512_srlv_epi64(__X, __Y),
5921314564Sdim                                            (__v8di)_mm512_setzero_si512());
5922309124Sdim}
5923309124Sdim
5924341825Sdim#define _mm512_ternarylogic_epi32(A, B, C, imm) \
5925309124Sdim  (__m512i)__builtin_ia32_pternlogd512_mask((__v16si)(__m512i)(A), \
5926309124Sdim                                            (__v16si)(__m512i)(B), \
5927309124Sdim                                            (__v16si)(__m512i)(C), (int)(imm), \
5928341825Sdim                                            (__mmask16)-1)
5929309124Sdim
5930341825Sdim#define _mm512_mask_ternarylogic_epi32(A, U, B, C, imm) \
5931309124Sdim  (__m512i)__builtin_ia32_pternlogd512_mask((__v16si)(__m512i)(A), \
5932309124Sdim                                            (__v16si)(__m512i)(B), \
5933309124Sdim                                            (__v16si)(__m512i)(C), (int)(imm), \
5934341825Sdim                                            (__mmask16)(U))
5935309124Sdim
5936341825Sdim#define _mm512_maskz_ternarylogic_epi32(U, A, B, C, imm) \
5937309124Sdim  (__m512i)__builtin_ia32_pternlogd512_maskz((__v16si)(__m512i)(A), \
5938309124Sdim                                             (__v16si)(__m512i)(B), \
5939309124Sdim                                             (__v16si)(__m512i)(C), \
5940341825Sdim                                             (int)(imm), (__mmask16)(U))
5941309124Sdim
5942341825Sdim#define _mm512_ternarylogic_epi64(A, B, C, imm) \
5943309124Sdim  (__m512i)__builtin_ia32_pternlogq512_mask((__v8di)(__m512i)(A), \
5944309124Sdim                                            (__v8di)(__m512i)(B), \
5945309124Sdim                                            (__v8di)(__m512i)(C), (int)(imm), \
5946341825Sdim                                            (__mmask8)-1)
5947309124Sdim
5948341825Sdim#define _mm512_mask_ternarylogic_epi64(A, U, B, C, imm) \
5949309124Sdim  (__m512i)__builtin_ia32_pternlogq512_mask((__v8di)(__m512i)(A), \
5950309124Sdim                                            (__v8di)(__m512i)(B), \
5951309124Sdim                                            (__v8di)(__m512i)(C), (int)(imm), \
5952341825Sdim                                            (__mmask8)(U))
5953309124Sdim
5954341825Sdim#define _mm512_maskz_ternarylogic_epi64(U, A, B, C, imm) \
5955309124Sdim  (__m512i)__builtin_ia32_pternlogq512_maskz((__v8di)(__m512i)(A), \
5956309124Sdim                                             (__v8di)(__m512i)(B), \
5957309124Sdim                                             (__v8di)(__m512i)(C), (int)(imm), \
5958341825Sdim                                             (__mmask8)(U))
5959309124Sdim
5960314564Sdim#ifdef __x86_64__
5961341825Sdim#define _mm_cvt_roundsd_i64(A, R) \
5962341825Sdim  (long long)__builtin_ia32_vcvtsd2si64((__v2df)(__m128d)(A), (int)(R))
5963314564Sdim#endif
5964309124Sdim
5965341825Sdim#define _mm_cvt_roundsd_si32(A, R) \
5966341825Sdim  (int)__builtin_ia32_vcvtsd2si32((__v2df)(__m128d)(A), (int)(R))
5967309124Sdim
5968341825Sdim#define _mm_cvt_roundsd_i32(A, R) \
5969341825Sdim  (int)__builtin_ia32_vcvtsd2si32((__v2df)(__m128d)(A), (int)(R))
5970309124Sdim
5971341825Sdim#define _mm_cvt_roundsd_u32(A, R) \
5972341825Sdim  (unsigned int)__builtin_ia32_vcvtsd2usi32((__v2df)(__m128d)(A), (int)(R))
5973309124Sdim
5974341825Sdimstatic __inline__ unsigned __DEFAULT_FN_ATTRS128
5975309124Sdim_mm_cvtsd_u32 (__m128d __A)
5976309124Sdim{
5977309124Sdim  return (unsigned) __builtin_ia32_vcvtsd2usi32 ((__v2df) __A,
5978309124Sdim             _MM_FROUND_CUR_DIRECTION);
5979309124Sdim}
5980309124Sdim
5981314564Sdim#ifdef __x86_64__
5982341825Sdim#define _mm_cvt_roundsd_u64(A, R) \
5983309124Sdim  (unsigned long long)__builtin_ia32_vcvtsd2usi64((__v2df)(__m128d)(A), \
5984341825Sdim                                                  (int)(R))
5985309124Sdim
5986341825Sdimstatic __inline__ unsigned long long __DEFAULT_FN_ATTRS128
5987309124Sdim_mm_cvtsd_u64 (__m128d __A)
5988309124Sdim{
5989309124Sdim  return (unsigned long long) __builtin_ia32_vcvtsd2usi64 ((__v2df)
5990309124Sdim                 __A,
5991309124Sdim                 _MM_FROUND_CUR_DIRECTION);
5992309124Sdim}
5993314564Sdim#endif
5994309124Sdim
5995341825Sdim#define _mm_cvt_roundss_si32(A, R) \
5996341825Sdim  (int)__builtin_ia32_vcvtss2si32((__v4sf)(__m128)(A), (int)(R))
5997309124Sdim
5998341825Sdim#define _mm_cvt_roundss_i32(A, R) \
5999341825Sdim  (int)__builtin_ia32_vcvtss2si32((__v4sf)(__m128)(A), (int)(R))
6000309124Sdim
6001314564Sdim#ifdef __x86_64__
6002341825Sdim#define _mm_cvt_roundss_si64(A, R) \
6003341825Sdim  (long long)__builtin_ia32_vcvtss2si64((__v4sf)(__m128)(A), (int)(R))
6004309124Sdim
6005341825Sdim#define _mm_cvt_roundss_i64(A, R) \
6006341825Sdim  (long long)__builtin_ia32_vcvtss2si64((__v4sf)(__m128)(A), (int)(R))
6007314564Sdim#endif
6008309124Sdim
6009341825Sdim#define _mm_cvt_roundss_u32(A, R) \
6010341825Sdim  (unsigned int)__builtin_ia32_vcvtss2usi32((__v4sf)(__m128)(A), (int)(R))
6011309124Sdim
6012341825Sdimstatic __inline__ unsigned __DEFAULT_FN_ATTRS128
6013309124Sdim_mm_cvtss_u32 (__m128 __A)
6014309124Sdim{
6015309124Sdim  return (unsigned) __builtin_ia32_vcvtss2usi32 ((__v4sf) __A,
6016309124Sdim             _MM_FROUND_CUR_DIRECTION);
6017309124Sdim}
6018309124Sdim
6019314564Sdim#ifdef __x86_64__
6020341825Sdim#define _mm_cvt_roundss_u64(A, R) \
6021309124Sdim  (unsigned long long)__builtin_ia32_vcvtss2usi64((__v4sf)(__m128)(A), \
6022341825Sdim                                                  (int)(R))
6023309124Sdim
6024341825Sdimstatic __inline__ unsigned long long __DEFAULT_FN_ATTRS128
6025309124Sdim_mm_cvtss_u64 (__m128 __A)
6026309124Sdim{
6027309124Sdim  return (unsigned long long) __builtin_ia32_vcvtss2usi64 ((__v4sf)
6028309124Sdim                 __A,
6029309124Sdim                 _MM_FROUND_CUR_DIRECTION);
6030309124Sdim}
6031314564Sdim#endif
6032309124Sdim
6033341825Sdim#define _mm_cvtt_roundsd_i32(A, R) \
6034341825Sdim  (int)__builtin_ia32_vcvttsd2si32((__v2df)(__m128d)(A), (int)(R))
6035309124Sdim
6036341825Sdim#define _mm_cvtt_roundsd_si32(A, R) \
6037341825Sdim  (int)__builtin_ia32_vcvttsd2si32((__v2df)(__m128d)(A), (int)(R))
6038309124Sdim
6039341825Sdimstatic __inline__ int __DEFAULT_FN_ATTRS128
6040309124Sdim_mm_cvttsd_i32 (__m128d __A)
6041309124Sdim{
6042309124Sdim  return (int) __builtin_ia32_vcvttsd2si32 ((__v2df) __A,
6043309124Sdim              _MM_FROUND_CUR_DIRECTION);
6044309124Sdim}
6045309124Sdim
6046314564Sdim#ifdef __x86_64__
6047341825Sdim#define _mm_cvtt_roundsd_si64(A, R) \
6048341825Sdim  (long long)__builtin_ia32_vcvttsd2si64((__v2df)(__m128d)(A), (int)(R))
6049309124Sdim
6050341825Sdim#define _mm_cvtt_roundsd_i64(A, R) \
6051341825Sdim  (long long)__builtin_ia32_vcvttsd2si64((__v2df)(__m128d)(A), (int)(R))
6052309124Sdim
6053341825Sdimstatic __inline__ long long __DEFAULT_FN_ATTRS128
6054309124Sdim_mm_cvttsd_i64 (__m128d __A)
6055309124Sdim{
6056309124Sdim  return (long long) __builtin_ia32_vcvttsd2si64 ((__v2df) __A,
6057309124Sdim              _MM_FROUND_CUR_DIRECTION);
6058309124Sdim}
6059314564Sdim#endif
6060309124Sdim
6061341825Sdim#define _mm_cvtt_roundsd_u32(A, R) \
6062341825Sdim  (unsigned int)__builtin_ia32_vcvttsd2usi32((__v2df)(__m128d)(A), (int)(R))
6063309124Sdim
6064341825Sdimstatic __inline__ unsigned __DEFAULT_FN_ATTRS128
6065309124Sdim_mm_cvttsd_u32 (__m128d __A)
6066309124Sdim{
6067309124Sdim  return (unsigned) __builtin_ia32_vcvttsd2usi32 ((__v2df) __A,
6068309124Sdim              _MM_FROUND_CUR_DIRECTION);
6069309124Sdim}
6070309124Sdim
6071314564Sdim#ifdef __x86_64__
6072341825Sdim#define _mm_cvtt_roundsd_u64(A, R) \
6073309124Sdim  (unsigned long long)__builtin_ia32_vcvttsd2usi64((__v2df)(__m128d)(A), \
6074341825Sdim                                                   (int)(R))
6075309124Sdim
6076341825Sdimstatic __inline__ unsigned long long __DEFAULT_FN_ATTRS128
6077309124Sdim_mm_cvttsd_u64 (__m128d __A)
6078309124Sdim{
6079309124Sdim  return (unsigned long long) __builtin_ia32_vcvttsd2usi64 ((__v2df)
6080309124Sdim                  __A,
6081309124Sdim                  _MM_FROUND_CUR_DIRECTION);
6082309124Sdim}
6083314564Sdim#endif
6084309124Sdim
6085341825Sdim#define _mm_cvtt_roundss_i32(A, R) \
6086341825Sdim  (int)__builtin_ia32_vcvttss2si32((__v4sf)(__m128)(A), (int)(R))
6087309124Sdim
6088341825Sdim#define _mm_cvtt_roundss_si32(A, R) \
6089341825Sdim  (int)__builtin_ia32_vcvttss2si32((__v4sf)(__m128)(A), (int)(R))
6090309124Sdim
6091341825Sdimstatic __inline__ int __DEFAULT_FN_ATTRS128
6092309124Sdim_mm_cvttss_i32 (__m128 __A)
6093309124Sdim{
6094309124Sdim  return (int) __builtin_ia32_vcvttss2si32 ((__v4sf) __A,
6095309124Sdim              _MM_FROUND_CUR_DIRECTION);
6096309124Sdim}
6097309124Sdim
6098314564Sdim#ifdef __x86_64__
6099341825Sdim#define _mm_cvtt_roundss_i64(A, R) \
6100341825Sdim  (long long)__builtin_ia32_vcvttss2si64((__v4sf)(__m128)(A), (int)(R))
6101309124Sdim
6102341825Sdim#define _mm_cvtt_roundss_si64(A, R) \
6103341825Sdim  (long long)__builtin_ia32_vcvttss2si64((__v4sf)(__m128)(A), (int)(R))
6104309124Sdim
6105341825Sdimstatic __inline__ long long __DEFAULT_FN_ATTRS128
6106309124Sdim_mm_cvttss_i64 (__m128 __A)
6107309124Sdim{
6108309124Sdim  return (long long) __builtin_ia32_vcvttss2si64 ((__v4sf) __A,
6109309124Sdim              _MM_FROUND_CUR_DIRECTION);
6110309124Sdim}
6111314564Sdim#endif
6112309124Sdim
6113341825Sdim#define _mm_cvtt_roundss_u32(A, R) \
6114341825Sdim  (unsigned int)__builtin_ia32_vcvttss2usi32((__v4sf)(__m128)(A), (int)(R))
6115309124Sdim
6116341825Sdimstatic __inline__ unsigned __DEFAULT_FN_ATTRS128
6117309124Sdim_mm_cvttss_u32 (__m128 __A)
6118309124Sdim{
6119309124Sdim  return (unsigned) __builtin_ia32_vcvttss2usi32 ((__v4sf) __A,
6120309124Sdim              _MM_FROUND_CUR_DIRECTION);
6121309124Sdim}
6122309124Sdim
6123314564Sdim#ifdef __x86_64__
6124341825Sdim#define _mm_cvtt_roundss_u64(A, R) \
6125309124Sdim  (unsigned long long)__builtin_ia32_vcvttss2usi64((__v4sf)(__m128)(A), \
6126341825Sdim                                                   (int)(R))
6127309124Sdim
6128341825Sdimstatic __inline__ unsigned long long __DEFAULT_FN_ATTRS128
6129309124Sdim_mm_cvttss_u64 (__m128 __A)
6130309124Sdim{
6131309124Sdim  return (unsigned long long) __builtin_ia32_vcvttss2usi64 ((__v4sf)
6132309124Sdim                  __A,
6133309124Sdim                  _MM_FROUND_CUR_DIRECTION);
6134309124Sdim}
6135314564Sdim#endif
6136309124Sdim
6137341825Sdim#define _mm512_permute_pd(X, C) \
6138341825Sdim  (__m512d)__builtin_ia32_vpermilpd512((__v8df)(__m512d)(X), (int)(C))
6139309124Sdim
6140341825Sdim#define _mm512_mask_permute_pd(W, U, X, C) \
6141309124Sdim  (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
6142309124Sdim                                       (__v8df)_mm512_permute_pd((X), (C)), \
6143341825Sdim                                       (__v8df)(__m512d)(W))
6144309124Sdim
6145341825Sdim#define _mm512_maskz_permute_pd(U, X, C) \
6146309124Sdim  (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
6147309124Sdim                                       (__v8df)_mm512_permute_pd((X), (C)), \
6148341825Sdim                                       (__v8df)_mm512_setzero_pd())
6149309124Sdim
6150341825Sdim#define _mm512_permute_ps(X, C) \
6151341825Sdim  (__m512)__builtin_ia32_vpermilps512((__v16sf)(__m512)(X), (int)(C))
6152309124Sdim
6153341825Sdim#define _mm512_mask_permute_ps(W, U, X, C) \
6154309124Sdim  (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
6155309124Sdim                                      (__v16sf)_mm512_permute_ps((X), (C)), \
6156341825Sdim                                      (__v16sf)(__m512)(W))
6157309124Sdim
6158341825Sdim#define _mm512_maskz_permute_ps(U, X, C) \
6159309124Sdim  (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
6160309124Sdim                                      (__v16sf)_mm512_permute_ps((X), (C)), \
6161341825Sdim                                      (__v16sf)_mm512_setzero_ps())
6162309124Sdim
6163341825Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS512
6164314564Sdim_mm512_permutevar_pd(__m512d __A, __m512i __C)
6165309124Sdim{
6166314564Sdim  return (__m512d)__builtin_ia32_vpermilvarpd512((__v8df)__A, (__v8di)__C);
6167309124Sdim}
6168309124Sdim
6169341825Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS512
6170314564Sdim_mm512_mask_permutevar_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512i __C)
6171309124Sdim{
6172314564Sdim  return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
6173314564Sdim                                         (__v8df)_mm512_permutevar_pd(__A, __C),
6174314564Sdim                                         (__v8df)__W);
6175309124Sdim}
6176309124Sdim
6177341825Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS512
6178314564Sdim_mm512_maskz_permutevar_pd(__mmask8 __U, __m512d __A, __m512i __C)
6179309124Sdim{
6180314564Sdim  return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
6181314564Sdim                                         (__v8df)_mm512_permutevar_pd(__A, __C),
6182314564Sdim                                         (__v8df)_mm512_setzero_pd());
6183309124Sdim}
6184309124Sdim
6185341825Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS512
6186314564Sdim_mm512_permutevar_ps(__m512 __A, __m512i __C)
6187309124Sdim{
6188314564Sdim  return (__m512)__builtin_ia32_vpermilvarps512((__v16sf)__A, (__v16si)__C);
6189309124Sdim}
6190309124Sdim
6191341825Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS512
6192314564Sdim_mm512_mask_permutevar_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512i __C)
6193309124Sdim{
6194314564Sdim  return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
6195314564Sdim                                        (__v16sf)_mm512_permutevar_ps(__A, __C),
6196314564Sdim                                        (__v16sf)__W);
6197309124Sdim}
6198309124Sdim
6199341825Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS512
6200314564Sdim_mm512_maskz_permutevar_ps(__mmask16 __U, __m512 __A, __m512i __C)
6201309124Sdim{
6202314564Sdim  return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
6203314564Sdim                                        (__v16sf)_mm512_permutevar_ps(__A, __C),
6204314564Sdim                                        (__v16sf)_mm512_setzero_ps());
6205309124Sdim}
6206309124Sdim
6207341825Sdimstatic __inline __m512d __DEFAULT_FN_ATTRS512
6208309124Sdim_mm512_permutex2var_pd(__m512d __A, __m512i __I, __m512d __B)
6209309124Sdim{
6210341825Sdim  return (__m512d)__builtin_ia32_vpermi2varpd512((__v8df)__A, (__v8di)__I,
6211341825Sdim                                                 (__v8df)__B);
6212309124Sdim}
6213309124Sdim
6214341825Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS512
6215341825Sdim_mm512_mask_permutex2var_pd(__m512d __A, __mmask8 __U, __m512i __I, __m512d __B)
6216309124Sdim{
6217341825Sdim  return (__m512d)__builtin_ia32_selectpd_512(__U,
6218341825Sdim                                  (__v8df)_mm512_permutex2var_pd(__A, __I, __B),
6219341825Sdim                                  (__v8df)__A);
6220309124Sdim}
6221309124Sdim
6222341825Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS512
6223341825Sdim_mm512_mask2_permutex2var_pd(__m512d __A, __m512i __I, __mmask8 __U,
6224341825Sdim                             __m512d __B)
6225309124Sdim{
6226341825Sdim  return (__m512d)__builtin_ia32_selectpd_512(__U,
6227341825Sdim                                  (__v8df)_mm512_permutex2var_pd(__A, __I, __B),
6228341825Sdim                                  (__v8df)(__m512d)__I);
6229309124Sdim}
6230309124Sdim
6231341825Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS512
6232341825Sdim_mm512_maskz_permutex2var_pd(__mmask8 __U, __m512d __A, __m512i __I,
6233341825Sdim                             __m512d __B)
6234341825Sdim{
6235341825Sdim  return (__m512d)__builtin_ia32_selectpd_512(__U,
6236341825Sdim                                  (__v8df)_mm512_permutex2var_pd(__A, __I, __B),
6237341825Sdim                                  (__v8df)_mm512_setzero_pd());
6238341825Sdim}
6239341825Sdim
6240341825Sdimstatic __inline __m512 __DEFAULT_FN_ATTRS512
6241309124Sdim_mm512_permutex2var_ps(__m512 __A, __m512i __I, __m512 __B)
6242309124Sdim{
6243341825Sdim  return (__m512)__builtin_ia32_vpermi2varps512((__v16sf)__A, (__v16si)__I,
6244341825Sdim                                                (__v16sf) __B);
6245309124Sdim}
6246309124Sdim
6247341825Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS512
6248341825Sdim_mm512_mask_permutex2var_ps(__m512 __A, __mmask16 __U, __m512i __I, __m512 __B)
6249309124Sdim{
6250341825Sdim  return (__m512)__builtin_ia32_selectps_512(__U,
6251341825Sdim                                 (__v16sf)_mm512_permutex2var_ps(__A, __I, __B),
6252341825Sdim                                 (__v16sf)__A);
6253309124Sdim}
6254309124Sdim
6255341825Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS512
6256341825Sdim_mm512_mask2_permutex2var_ps(__m512 __A, __m512i __I, __mmask16 __U, __m512 __B)
6257309124Sdim{
6258341825Sdim  return (__m512)__builtin_ia32_selectps_512(__U,
6259341825Sdim                                 (__v16sf)_mm512_permutex2var_ps(__A, __I, __B),
6260341825Sdim                                 (__v16sf)(__m512)__I);
6261309124Sdim}
6262309124Sdim
6263341825Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS512
6264341825Sdim_mm512_maskz_permutex2var_ps(__mmask16 __U, __m512 __A, __m512i __I, __m512 __B)
6265341825Sdim{
6266341825Sdim  return (__m512)__builtin_ia32_selectps_512(__U,
6267341825Sdim                                 (__v16sf)_mm512_permutex2var_ps(__A, __I, __B),
6268341825Sdim                                 (__v16sf)_mm512_setzero_ps());
6269341825Sdim}
6270309124Sdim
6271341825Sdim
6272341825Sdim#define _mm512_cvtt_roundpd_epu32(A, R) \
6273309124Sdim  (__m256i)__builtin_ia32_cvttpd2udq512_mask((__v8df)(__m512d)(A), \
6274309124Sdim                                             (__v8si)_mm256_undefined_si256(), \
6275341825Sdim                                             (__mmask8)-1, (int)(R))
6276309124Sdim
6277341825Sdim#define _mm512_mask_cvtt_roundpd_epu32(W, U, A, R) \
6278309124Sdim  (__m256i)__builtin_ia32_cvttpd2udq512_mask((__v8df)(__m512d)(A), \
6279309124Sdim                                             (__v8si)(__m256i)(W), \
6280341825Sdim                                             (__mmask8)(U), (int)(R))
6281309124Sdim
6282341825Sdim#define _mm512_maskz_cvtt_roundpd_epu32(U, A, R) \
6283309124Sdim  (__m256i)__builtin_ia32_cvttpd2udq512_mask((__v8df)(__m512d)(A), \
6284309124Sdim                                             (__v8si)_mm256_setzero_si256(), \
6285341825Sdim                                             (__mmask8)(U), (int)(R))
6286309124Sdim
6287341825Sdimstatic __inline__ __m256i __DEFAULT_FN_ATTRS512
6288309124Sdim_mm512_cvttpd_epu32 (__m512d __A)
6289309124Sdim{
6290309124Sdim  return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
6291309124Sdim                  (__v8si)
6292309124Sdim                  _mm256_undefined_si256 (),
6293309124Sdim                  (__mmask8) -1,
6294309124Sdim                  _MM_FROUND_CUR_DIRECTION);
6295309124Sdim}
6296309124Sdim
6297341825Sdimstatic __inline__ __m256i __DEFAULT_FN_ATTRS512
6298309124Sdim_mm512_mask_cvttpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A)
6299309124Sdim{
6300309124Sdim  return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
6301309124Sdim                  (__v8si) __W,
6302309124Sdim                  (__mmask8) __U,
6303309124Sdim                  _MM_FROUND_CUR_DIRECTION);
6304309124Sdim}
6305309124Sdim
6306341825Sdimstatic __inline__ __m256i __DEFAULT_FN_ATTRS512
6307309124Sdim_mm512_maskz_cvttpd_epu32 (__mmask8 __U, __m512d __A)
6308309124Sdim{
6309309124Sdim  return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
6310309124Sdim                  (__v8si)
6311309124Sdim                  _mm256_setzero_si256 (),
6312309124Sdim                  (__mmask8) __U,
6313309124Sdim                  _MM_FROUND_CUR_DIRECTION);
6314309124Sdim}
6315309124Sdim
6316341825Sdim#define _mm_roundscale_round_sd(A, B, imm, R) \
6317309124Sdim  (__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \
6318309124Sdim                                                (__v2df)(__m128d)(B), \
6319309124Sdim                                                (__v2df)_mm_setzero_pd(), \
6320309124Sdim                                                (__mmask8)-1, (int)(imm), \
6321341825Sdim                                                (int)(R))
6322309124Sdim
6323341825Sdim#define _mm_roundscale_sd(A, B, imm) \
6324309124Sdim  (__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \
6325309124Sdim                                                (__v2df)(__m128d)(B), \
6326309124Sdim                                                (__v2df)_mm_setzero_pd(), \
6327309124Sdim                                                (__mmask8)-1, (int)(imm), \
6328341825Sdim                                                _MM_FROUND_CUR_DIRECTION)
6329309124Sdim
6330341825Sdim#define _mm_mask_roundscale_sd(W, U, A, B, imm) \
6331309124Sdim  (__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \
6332309124Sdim                                                (__v2df)(__m128d)(B), \
6333309124Sdim                                                (__v2df)(__m128d)(W), \
6334309124Sdim                                                (__mmask8)(U), (int)(imm), \
6335341825Sdim                                                _MM_FROUND_CUR_DIRECTION)
6336309124Sdim
6337341825Sdim#define _mm_mask_roundscale_round_sd(W, U, A, B, I, R) \
6338309124Sdim  (__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \
6339309124Sdim                                                (__v2df)(__m128d)(B), \
6340309124Sdim                                                (__v2df)(__m128d)(W), \
6341309124Sdim                                                (__mmask8)(U), (int)(I), \
6342341825Sdim                                                (int)(R))
6343309124Sdim
6344341825Sdim#define _mm_maskz_roundscale_sd(U, A, B, I) \
6345309124Sdim  (__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \
6346309124Sdim                                                (__v2df)(__m128d)(B), \
6347309124Sdim                                                (__v2df)_mm_setzero_pd(), \
6348309124Sdim                                                (__mmask8)(U), (int)(I), \
6349341825Sdim                                                _MM_FROUND_CUR_DIRECTION)
6350309124Sdim
6351341825Sdim#define _mm_maskz_roundscale_round_sd(U, A, B, I, R) \
6352309124Sdim  (__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \
6353309124Sdim                                                (__v2df)(__m128d)(B), \
6354309124Sdim                                                (__v2df)_mm_setzero_pd(), \
6355309124Sdim                                                (__mmask8)(U), (int)(I), \
6356341825Sdim                                                (int)(R))
6357309124Sdim
6358341825Sdim#define _mm_roundscale_round_ss(A, B, imm, R) \
6359309124Sdim  (__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \
6360309124Sdim                                               (__v4sf)(__m128)(B), \
6361309124Sdim                                               (__v4sf)_mm_setzero_ps(), \
6362309124Sdim                                               (__mmask8)-1, (int)(imm), \
6363341825Sdim                                               (int)(R))
6364309124Sdim
6365341825Sdim#define _mm_roundscale_ss(A, B, imm) \
6366309124Sdim  (__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \
6367309124Sdim                                               (__v4sf)(__m128)(B), \
6368309124Sdim                                               (__v4sf)_mm_setzero_ps(), \
6369309124Sdim                                               (__mmask8)-1, (int)(imm), \
6370341825Sdim                                               _MM_FROUND_CUR_DIRECTION)
6371309124Sdim
6372341825Sdim#define _mm_mask_roundscale_ss(W, U, A, B, I) \
6373309124Sdim  (__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \
6374309124Sdim                                               (__v4sf)(__m128)(B), \
6375309124Sdim                                               (__v4sf)(__m128)(W), \
6376309124Sdim                                               (__mmask8)(U), (int)(I), \
6377341825Sdim                                               _MM_FROUND_CUR_DIRECTION)
6378309124Sdim
6379341825Sdim#define _mm_mask_roundscale_round_ss(W, U, A, B, I, R) \
6380309124Sdim  (__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \
6381309124Sdim                                               (__v4sf)(__m128)(B), \
6382309124Sdim                                               (__v4sf)(__m128)(W), \
6383309124Sdim                                               (__mmask8)(U), (int)(I), \
6384341825Sdim                                               (int)(R))
6385309124Sdim
6386341825Sdim#define _mm_maskz_roundscale_ss(U, A, B, I) \
6387309124Sdim  (__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \
6388309124Sdim                                               (__v4sf)(__m128)(B), \
6389309124Sdim                                               (__v4sf)_mm_setzero_ps(), \
6390309124Sdim                                               (__mmask8)(U), (int)(I), \
6391341825Sdim                                               _MM_FROUND_CUR_DIRECTION)
6392309124Sdim
6393341825Sdim#define _mm_maskz_roundscale_round_ss(U, A, B, I, R) \
6394309124Sdim  (__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \
6395309124Sdim                                               (__v4sf)(__m128)(B), \
6396309124Sdim                                               (__v4sf)_mm_setzero_ps(), \
6397309124Sdim                                               (__mmask8)(U), (int)(I), \
6398341825Sdim                                               (int)(R))
6399309124Sdim
6400341825Sdim#define _mm512_scalef_round_pd(A, B, R) \
6401309124Sdim  (__m512d)__builtin_ia32_scalefpd512_mask((__v8df)(__m512d)(A), \
6402309124Sdim                                           (__v8df)(__m512d)(B), \
6403309124Sdim                                           (__v8df)_mm512_undefined_pd(), \
6404341825Sdim                                           (__mmask8)-1, (int)(R))
6405309124Sdim
6406341825Sdim#define _mm512_mask_scalef_round_pd(W, U, A, B, R) \
6407309124Sdim  (__m512d)__builtin_ia32_scalefpd512_mask((__v8df)(__m512d)(A), \
6408309124Sdim                                           (__v8df)(__m512d)(B), \
6409309124Sdim                                           (__v8df)(__m512d)(W), \
6410341825Sdim                                           (__mmask8)(U), (int)(R))
6411309124Sdim
6412341825Sdim#define _mm512_maskz_scalef_round_pd(U, A, B, R) \
6413309124Sdim  (__m512d)__builtin_ia32_scalefpd512_mask((__v8df)(__m512d)(A), \
6414309124Sdim                                           (__v8df)(__m512d)(B), \
6415309124Sdim                                           (__v8df)_mm512_setzero_pd(), \
6416341825Sdim                                           (__mmask8)(U), (int)(R))
6417309124Sdim
6418341825Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS512
6419309124Sdim_mm512_scalef_pd (__m512d __A, __m512d __B)
6420309124Sdim{
6421309124Sdim  return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
6422309124Sdim                (__v8df) __B,
6423309124Sdim                (__v8df)
6424309124Sdim                _mm512_undefined_pd (),
6425309124Sdim                (__mmask8) -1,
6426309124Sdim                _MM_FROUND_CUR_DIRECTION);
6427309124Sdim}
6428309124Sdim
6429341825Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS512
6430309124Sdim_mm512_mask_scalef_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
6431309124Sdim{
6432309124Sdim  return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
6433309124Sdim                (__v8df) __B,
6434309124Sdim                (__v8df) __W,
6435309124Sdim                (__mmask8) __U,
6436309124Sdim                _MM_FROUND_CUR_DIRECTION);
6437309124Sdim}
6438309124Sdim
6439341825Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS512
6440309124Sdim_mm512_maskz_scalef_pd (__mmask8 __U, __m512d __A, __m512d __B)
6441309124Sdim{
6442309124Sdim  return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
6443309124Sdim                (__v8df) __B,
6444309124Sdim                (__v8df)
6445309124Sdim                _mm512_setzero_pd (),
6446309124Sdim                (__mmask8) __U,
6447309124Sdim                _MM_FROUND_CUR_DIRECTION);
6448309124Sdim}
6449309124Sdim
6450341825Sdim#define _mm512_scalef_round_ps(A, B, R) \
6451309124Sdim  (__m512)__builtin_ia32_scalefps512_mask((__v16sf)(__m512)(A), \
6452309124Sdim                                          (__v16sf)(__m512)(B), \
6453309124Sdim                                          (__v16sf)_mm512_undefined_ps(), \
6454341825Sdim                                          (__mmask16)-1, (int)(R))
6455309124Sdim
6456341825Sdim#define _mm512_mask_scalef_round_ps(W, U, A, B, R) \
6457309124Sdim  (__m512)__builtin_ia32_scalefps512_mask((__v16sf)(__m512)(A), \
6458309124Sdim                                          (__v16sf)(__m512)(B), \
6459309124Sdim                                          (__v16sf)(__m512)(W), \
6460341825Sdim                                          (__mmask16)(U), (int)(R))
6461309124Sdim
6462341825Sdim#define _mm512_maskz_scalef_round_ps(U, A, B, R) \
6463309124Sdim  (__m512)__builtin_ia32_scalefps512_mask((__v16sf)(__m512)(A), \
6464309124Sdim                                          (__v16sf)(__m512)(B), \
6465309124Sdim                                          (__v16sf)_mm512_setzero_ps(), \
6466341825Sdim                                          (__mmask16)(U), (int)(R))
6467309124Sdim
6468341825Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS512
6469309124Sdim_mm512_scalef_ps (__m512 __A, __m512 __B)
6470309124Sdim{
6471309124Sdim  return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
6472309124Sdim               (__v16sf) __B,
6473309124Sdim               (__v16sf)
6474309124Sdim               _mm512_undefined_ps (),
6475309124Sdim               (__mmask16) -1,
6476309124Sdim               _MM_FROUND_CUR_DIRECTION);
6477309124Sdim}
6478309124Sdim
6479341825Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS512
6480309124Sdim_mm512_mask_scalef_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
6481309124Sdim{
6482309124Sdim  return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
6483309124Sdim               (__v16sf) __B,
6484309124Sdim               (__v16sf) __W,
6485309124Sdim               (__mmask16) __U,
6486309124Sdim               _MM_FROUND_CUR_DIRECTION);
6487309124Sdim}
6488309124Sdim
6489341825Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS512
6490309124Sdim_mm512_maskz_scalef_ps (__mmask16 __U, __m512 __A, __m512 __B)
6491309124Sdim{
6492309124Sdim  return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
6493309124Sdim               (__v16sf) __B,
6494309124Sdim               (__v16sf)
6495309124Sdim               _mm512_setzero_ps (),
6496309124Sdim               (__mmask16) __U,
6497309124Sdim               _MM_FROUND_CUR_DIRECTION);
6498309124Sdim}
6499309124Sdim
6500341825Sdim#define _mm_scalef_round_sd(A, B, R) \
6501309124Sdim  (__m128d)__builtin_ia32_scalefsd_round_mask((__v2df)(__m128d)(A), \
6502309124Sdim                                              (__v2df)(__m128d)(B), \
6503309124Sdim                                              (__v2df)_mm_setzero_pd(), \
6504341825Sdim                                              (__mmask8)-1, (int)(R))
6505309124Sdim
6506341825Sdimstatic __inline__ __m128d __DEFAULT_FN_ATTRS128
6507309124Sdim_mm_scalef_sd (__m128d __A, __m128d __B)
6508309124Sdim{
6509309124Sdim  return (__m128d) __builtin_ia32_scalefsd_round_mask ((__v2df) __A,
6510309124Sdim              (__v2df)( __B), (__v2df) _mm_setzero_pd(),
6511309124Sdim              (__mmask8) -1,
6512309124Sdim              _MM_FROUND_CUR_DIRECTION);
6513309124Sdim}
6514309124Sdim
6515341825Sdimstatic __inline__ __m128d __DEFAULT_FN_ATTRS128
6516309124Sdim_mm_mask_scalef_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
6517309124Sdim{
6518309124Sdim return (__m128d) __builtin_ia32_scalefsd_round_mask ( (__v2df) __A,
6519309124Sdim                 (__v2df) __B,
6520309124Sdim                (__v2df) __W,
6521309124Sdim                (__mmask8) __U,
6522309124Sdim                _MM_FROUND_CUR_DIRECTION);
6523309124Sdim}
6524309124Sdim
6525341825Sdim#define _mm_mask_scalef_round_sd(W, U, A, B, R) \
6526309124Sdim  (__m128d)__builtin_ia32_scalefsd_round_mask((__v2df)(__m128d)(A), \
6527309124Sdim                                              (__v2df)(__m128d)(B), \
6528309124Sdim                                              (__v2df)(__m128d)(W), \
6529341825Sdim                                              (__mmask8)(U), (int)(R))
6530309124Sdim
6531341825Sdimstatic __inline__ __m128d __DEFAULT_FN_ATTRS128
6532309124Sdim_mm_maskz_scalef_sd (__mmask8 __U, __m128d __A, __m128d __B)
6533309124Sdim{
6534309124Sdim return (__m128d) __builtin_ia32_scalefsd_round_mask ( (__v2df) __A,
6535309124Sdim                 (__v2df) __B,
6536309124Sdim                (__v2df) _mm_setzero_pd (),
6537309124Sdim                (__mmask8) __U,
6538309124Sdim                _MM_FROUND_CUR_DIRECTION);
6539309124Sdim}
6540309124Sdim
6541341825Sdim#define _mm_maskz_scalef_round_sd(U, A, B, R) \
6542309124Sdim  (__m128d)__builtin_ia32_scalefsd_round_mask((__v2df)(__m128d)(A), \
6543309124Sdim                                              (__v2df)(__m128d)(B), \
6544309124Sdim                                              (__v2df)_mm_setzero_pd(), \
6545341825Sdim                                              (__mmask8)(U), (int)(R))
6546309124Sdim
6547341825Sdim#define _mm_scalef_round_ss(A, B, R) \
6548309124Sdim  (__m128)__builtin_ia32_scalefss_round_mask((__v4sf)(__m128)(A), \
6549309124Sdim                                             (__v4sf)(__m128)(B), \
6550309124Sdim                                             (__v4sf)_mm_setzero_ps(), \
6551341825Sdim                                             (__mmask8)-1, (int)(R))
6552309124Sdim
6553341825Sdimstatic __inline__ __m128 __DEFAULT_FN_ATTRS128
6554309124Sdim_mm_scalef_ss (__m128 __A, __m128 __B)
6555309124Sdim{
6556309124Sdim  return (__m128) __builtin_ia32_scalefss_round_mask ((__v4sf) __A,
6557309124Sdim             (__v4sf)( __B), (__v4sf) _mm_setzero_ps(),
6558309124Sdim             (__mmask8) -1,
6559309124Sdim             _MM_FROUND_CUR_DIRECTION);
6560309124Sdim}
6561309124Sdim
6562341825Sdimstatic __inline__ __m128 __DEFAULT_FN_ATTRS128
6563309124Sdim_mm_mask_scalef_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
6564309124Sdim{
6565309124Sdim return (__m128) __builtin_ia32_scalefss_round_mask ( (__v4sf) __A,
6566309124Sdim                (__v4sf) __B,
6567309124Sdim                (__v4sf) __W,
6568309124Sdim                (__mmask8) __U,
6569309124Sdim                _MM_FROUND_CUR_DIRECTION);
6570309124Sdim}
6571309124Sdim
6572341825Sdim#define _mm_mask_scalef_round_ss(W, U, A, B, R) \
6573309124Sdim  (__m128)__builtin_ia32_scalefss_round_mask((__v4sf)(__m128)(A), \
6574309124Sdim                                             (__v4sf)(__m128)(B), \
6575309124Sdim                                             (__v4sf)(__m128)(W), \
6576341825Sdim                                             (__mmask8)(U), (int)(R))
6577309124Sdim
6578341825Sdimstatic __inline__ __m128 __DEFAULT_FN_ATTRS128
6579309124Sdim_mm_maskz_scalef_ss (__mmask8 __U, __m128 __A, __m128 __B)
6580309124Sdim{
6581309124Sdim return (__m128) __builtin_ia32_scalefss_round_mask ( (__v4sf) __A,
6582309124Sdim                 (__v4sf) __B,
6583309124Sdim                (__v4sf) _mm_setzero_ps (),
6584309124Sdim                (__mmask8) __U,
6585309124Sdim                _MM_FROUND_CUR_DIRECTION);
6586309124Sdim}
6587309124Sdim
6588341825Sdim#define _mm_maskz_scalef_round_ss(U, A, B, R) \
6589309124Sdim  (__m128)__builtin_ia32_scalefss_round_mask((__v4sf)(__m128)(A), \
6590309124Sdim                                             (__v4sf)(__m128)(B), \
6591309124Sdim                                             (__v4sf)_mm_setzero_ps(), \
6592309124Sdim                                             (__mmask8)(U), \
6593341825Sdim                                             (int)(R))
6594309124Sdim
6595341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
6596314564Sdim_mm512_srai_epi32(__m512i __A, int __B)
6597314564Sdim{
6598314564Sdim  return (__m512i)__builtin_ia32_psradi512((__v16si)__A, __B);
6599314564Sdim}
6600309124Sdim
6601341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
6602314564Sdim_mm512_mask_srai_epi32(__m512i __W, __mmask16 __U, __m512i __A, int __B)
6603314564Sdim{
6604341825Sdim  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
6605341825Sdim                                         (__v16si)_mm512_srai_epi32(__A, __B),
6606314564Sdim                                         (__v16si)__W);
6607314564Sdim}
6608309124Sdim
6609341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
6610314564Sdim_mm512_maskz_srai_epi32(__mmask16 __U, __m512i __A, int __B) {
6611341825Sdim  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
6612341825Sdim                                         (__v16si)_mm512_srai_epi32(__A, __B),
6613314564Sdim                                         (__v16si)_mm512_setzero_si512());
6614314564Sdim}
6615309124Sdim
6616341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
6617314564Sdim_mm512_srai_epi64(__m512i __A, int __B)
6618314564Sdim{
6619314564Sdim  return (__m512i)__builtin_ia32_psraqi512((__v8di)__A, __B);
6620314564Sdim}
6621309124Sdim
6622341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
6623314564Sdim_mm512_mask_srai_epi64(__m512i __W, __mmask8 __U, __m512i __A, int __B)
6624314564Sdim{
6625341825Sdim  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
6626341825Sdim                                          (__v8di)_mm512_srai_epi64(__A, __B),
6627314564Sdim                                          (__v8di)__W);
6628314564Sdim}
6629309124Sdim
6630341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
6631314564Sdim_mm512_maskz_srai_epi64(__mmask8 __U, __m512i __A, int __B)
6632314564Sdim{
6633341825Sdim  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
6634341825Sdim                                          (__v8di)_mm512_srai_epi64(__A, __B),
6635314564Sdim                                          (__v8di)_mm512_setzero_si512());
6636314564Sdim}
6637309124Sdim
6638341825Sdim#define _mm512_shuffle_f32x4(A, B, imm) \
6639341825Sdim  (__m512)__builtin_ia32_shuf_f32x4((__v16sf)(__m512)(A), \
6640341825Sdim                                    (__v16sf)(__m512)(B), (int)(imm))
6641309124Sdim
6642341825Sdim#define _mm512_mask_shuffle_f32x4(W, U, A, B, imm) \
6643327952Sdim  (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
6644327952Sdim                                      (__v16sf)_mm512_shuffle_f32x4((A), (B), (imm)), \
6645341825Sdim                                      (__v16sf)(__m512)(W))
6646309124Sdim
6647341825Sdim#define _mm512_maskz_shuffle_f32x4(U, A, B, imm) \
6648327952Sdim  (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
6649327952Sdim                                      (__v16sf)_mm512_shuffle_f32x4((A), (B), (imm)), \
6650341825Sdim                                      (__v16sf)_mm512_setzero_ps())
6651309124Sdim
6652341825Sdim#define _mm512_shuffle_f64x2(A, B, imm) \
6653341825Sdim  (__m512d)__builtin_ia32_shuf_f64x2((__v8df)(__m512d)(A), \
6654341825Sdim                                     (__v8df)(__m512d)(B), (int)(imm))
6655309124Sdim
6656341825Sdim#define _mm512_mask_shuffle_f64x2(W, U, A, B, imm) \
6657327952Sdim  (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
6658327952Sdim                                       (__v8df)_mm512_shuffle_f64x2((A), (B), (imm)), \
6659341825Sdim                                       (__v8df)(__m512d)(W))
6660309124Sdim
6661341825Sdim#define _mm512_maskz_shuffle_f64x2(U, A, B, imm) \
6662327952Sdim  (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
6663327952Sdim                                       (__v8df)_mm512_shuffle_f64x2((A), (B), (imm)), \
6664341825Sdim                                       (__v8df)_mm512_setzero_pd())
6665309124Sdim
6666341825Sdim#define _mm512_shuffle_i32x4(A, B, imm) \
6667341825Sdim  (__m512i)__builtin_ia32_shuf_i32x4((__v16si)(__m512i)(A), \
6668341825Sdim                                     (__v16si)(__m512i)(B), (int)(imm))
6669309124Sdim
6670341825Sdim#define _mm512_mask_shuffle_i32x4(W, U, A, B, imm) \
6671327952Sdim  (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
6672327952Sdim                                      (__v16si)_mm512_shuffle_i32x4((A), (B), (imm)), \
6673341825Sdim                                      (__v16si)(__m512i)(W))
6674309124Sdim
6675341825Sdim#define _mm512_maskz_shuffle_i32x4(U, A, B, imm) \
6676327952Sdim  (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
6677327952Sdim                                      (__v16si)_mm512_shuffle_i32x4((A), (B), (imm)), \
6678341825Sdim                                      (__v16si)_mm512_setzero_si512())
6679309124Sdim
6680341825Sdim#define _mm512_shuffle_i64x2(A, B, imm) \
6681341825Sdim  (__m512i)__builtin_ia32_shuf_i64x2((__v8di)(__m512i)(A), \
6682341825Sdim                                     (__v8di)(__m512i)(B), (int)(imm))
6683309124Sdim
6684341825Sdim#define _mm512_mask_shuffle_i64x2(W, U, A, B, imm) \
6685327952Sdim  (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
6686327952Sdim                                      (__v8di)_mm512_shuffle_i64x2((A), (B), (imm)), \
6687341825Sdim                                      (__v8di)(__m512i)(W))
6688309124Sdim
6689341825Sdim#define _mm512_maskz_shuffle_i64x2(U, A, B, imm) \
6690327952Sdim  (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
6691327952Sdim                                      (__v8di)_mm512_shuffle_i64x2((A), (B), (imm)), \
6692341825Sdim                                      (__v8di)_mm512_setzero_si512())
6693309124Sdim
6694341825Sdim#define _mm512_shuffle_pd(A, B, M) \
6695341825Sdim  (__m512d)__builtin_ia32_shufpd512((__v8df)(__m512d)(A), \
6696341825Sdim                                    (__v8df)(__m512d)(B), (int)(M))
6697309124Sdim
6698341825Sdim#define _mm512_mask_shuffle_pd(W, U, A, B, M) \
6699309124Sdim  (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
6700309124Sdim                                       (__v8df)_mm512_shuffle_pd((A), (B), (M)), \
6701341825Sdim                                       (__v8df)(__m512d)(W))
6702309124Sdim
6703341825Sdim#define _mm512_maskz_shuffle_pd(U, A, B, M) \
6704309124Sdim  (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
6705309124Sdim                                       (__v8df)_mm512_shuffle_pd((A), (B), (M)), \
6706341825Sdim                                       (__v8df)_mm512_setzero_pd())
6707309124Sdim
6708341825Sdim#define _mm512_shuffle_ps(A, B, M) \
6709341825Sdim  (__m512)__builtin_ia32_shufps512((__v16sf)(__m512)(A), \
6710341825Sdim                                   (__v16sf)(__m512)(B), (int)(M))
6711309124Sdim
6712341825Sdim#define _mm512_mask_shuffle_ps(W, U, A, B, M) \
6713309124Sdim  (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
6714309124Sdim                                      (__v16sf)_mm512_shuffle_ps((A), (B), (M)), \
6715341825Sdim                                      (__v16sf)(__m512)(W))
6716309124Sdim
6717341825Sdim#define _mm512_maskz_shuffle_ps(U, A, B, M) \
6718309124Sdim  (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
6719309124Sdim                                      (__v16sf)_mm512_shuffle_ps((A), (B), (M)), \
6720341825Sdim                                      (__v16sf)_mm512_setzero_ps())
6721309124Sdim
6722341825Sdim#define _mm_sqrt_round_sd(A, B, R) \
6723309124Sdim  (__m128d)__builtin_ia32_sqrtsd_round_mask((__v2df)(__m128d)(A), \
6724309124Sdim                                            (__v2df)(__m128d)(B), \
6725309124Sdim                                            (__v2df)_mm_setzero_pd(), \
6726341825Sdim                                            (__mmask8)-1, (int)(R))
6727309124Sdim
6728341825Sdimstatic __inline__ __m128d __DEFAULT_FN_ATTRS128
6729309124Sdim_mm_mask_sqrt_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
6730309124Sdim{
6731309124Sdim return (__m128d) __builtin_ia32_sqrtsd_round_mask ( (__v2df) __A,
6732309124Sdim                 (__v2df) __B,
6733309124Sdim                (__v2df) __W,
6734309124Sdim                (__mmask8) __U,
6735309124Sdim                _MM_FROUND_CUR_DIRECTION);
6736309124Sdim}
6737309124Sdim
6738341825Sdim#define _mm_mask_sqrt_round_sd(W, U, A, B, R) \
6739309124Sdim  (__m128d)__builtin_ia32_sqrtsd_round_mask((__v2df)(__m128d)(A), \
6740309124Sdim                                            (__v2df)(__m128d)(B), \
6741309124Sdim                                            (__v2df)(__m128d)(W), \
6742341825Sdim                                            (__mmask8)(U), (int)(R))
6743309124Sdim
6744341825Sdimstatic __inline__ __m128d __DEFAULT_FN_ATTRS128
6745309124Sdim_mm_maskz_sqrt_sd (__mmask8 __U, __m128d __A, __m128d __B)
6746309124Sdim{
6747309124Sdim return (__m128d) __builtin_ia32_sqrtsd_round_mask ( (__v2df) __A,
6748309124Sdim                 (__v2df) __B,
6749309124Sdim                (__v2df) _mm_setzero_pd (),
6750309124Sdim                (__mmask8) __U,
6751309124Sdim                _MM_FROUND_CUR_DIRECTION);
6752309124Sdim}
6753309124Sdim
6754341825Sdim#define _mm_maskz_sqrt_round_sd(U, A, B, R) \
6755309124Sdim  (__m128d)__builtin_ia32_sqrtsd_round_mask((__v2df)(__m128d)(A), \
6756309124Sdim                                            (__v2df)(__m128d)(B), \
6757309124Sdim                                            (__v2df)_mm_setzero_pd(), \
6758341825Sdim                                            (__mmask8)(U), (int)(R))
6759309124Sdim
6760341825Sdim#define _mm_sqrt_round_ss(A, B, R) \
6761309124Sdim  (__m128)__builtin_ia32_sqrtss_round_mask((__v4sf)(__m128)(A), \
6762309124Sdim                                           (__v4sf)(__m128)(B), \
6763309124Sdim                                           (__v4sf)_mm_setzero_ps(), \
6764341825Sdim                                           (__mmask8)-1, (int)(R))
6765309124Sdim
6766341825Sdimstatic __inline__ __m128 __DEFAULT_FN_ATTRS128
6767309124Sdim_mm_mask_sqrt_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
6768309124Sdim{
6769309124Sdim return (__m128) __builtin_ia32_sqrtss_round_mask ( (__v4sf) __A,
6770309124Sdim                 (__v4sf) __B,
6771309124Sdim                (__v4sf) __W,
6772309124Sdim                (__mmask8) __U,
6773309124Sdim                _MM_FROUND_CUR_DIRECTION);
6774309124Sdim}
6775309124Sdim
6776341825Sdim#define _mm_mask_sqrt_round_ss(W, U, A, B, R) \
6777309124Sdim  (__m128)__builtin_ia32_sqrtss_round_mask((__v4sf)(__m128)(A), \
6778309124Sdim                                           (__v4sf)(__m128)(B), \
6779309124Sdim                                           (__v4sf)(__m128)(W), (__mmask8)(U), \
6780341825Sdim                                           (int)(R))
6781309124Sdim
6782341825Sdimstatic __inline__ __m128 __DEFAULT_FN_ATTRS128
6783309124Sdim_mm_maskz_sqrt_ss (__mmask8 __U, __m128 __A, __m128 __B)
6784309124Sdim{
6785309124Sdim return (__m128) __builtin_ia32_sqrtss_round_mask ( (__v4sf) __A,
6786309124Sdim                 (__v4sf) __B,
6787309124Sdim                (__v4sf) _mm_setzero_ps (),
6788309124Sdim                (__mmask8) __U,
6789309124Sdim                _MM_FROUND_CUR_DIRECTION);
6790309124Sdim}
6791309124Sdim
6792341825Sdim#define _mm_maskz_sqrt_round_ss(U, A, B, R) \
6793309124Sdim  (__m128)__builtin_ia32_sqrtss_round_mask((__v4sf)(__m128)(A), \
6794309124Sdim                                           (__v4sf)(__m128)(B), \
6795309124Sdim                                           (__v4sf)_mm_setzero_ps(), \
6796341825Sdim                                           (__mmask8)(U), (int)(R))
6797309124Sdim
6798341825Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS512
6799321369Sdim_mm512_broadcast_f32x4(__m128 __A)
6800309124Sdim{
6801321369Sdim  return (__m512)__builtin_shufflevector((__v4sf)__A, (__v4sf)__A,
6802321369Sdim                                         0, 1, 2, 3, 0, 1, 2, 3,
6803321369Sdim                                         0, 1, 2, 3, 0, 1, 2, 3);
6804309124Sdim}
6805309124Sdim
6806341825Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS512
6807321369Sdim_mm512_mask_broadcast_f32x4(__m512 __O, __mmask16 __M, __m128 __A)
6808309124Sdim{
6809321369Sdim  return (__m512)__builtin_ia32_selectps_512((__mmask16)__M,
6810321369Sdim                                           (__v16sf)_mm512_broadcast_f32x4(__A),
6811321369Sdim                                           (__v16sf)__O);
6812309124Sdim}
6813309124Sdim
6814341825Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS512
6815321369Sdim_mm512_maskz_broadcast_f32x4(__mmask16 __M, __m128 __A)
6816309124Sdim{
6817321369Sdim  return (__m512)__builtin_ia32_selectps_512((__mmask16)__M,
6818321369Sdim                                           (__v16sf)_mm512_broadcast_f32x4(__A),
6819321369Sdim                                           (__v16sf)_mm512_setzero_ps());
6820309124Sdim}
6821309124Sdim
6822341825Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS512
6823321369Sdim_mm512_broadcast_f64x4(__m256d __A)
6824309124Sdim{
6825321369Sdim  return (__m512d)__builtin_shufflevector((__v4df)__A, (__v4df)__A,
6826321369Sdim                                          0, 1, 2, 3, 0, 1, 2, 3);
6827309124Sdim}
6828309124Sdim
6829341825Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS512
6830321369Sdim_mm512_mask_broadcast_f64x4(__m512d __O, __mmask8 __M, __m256d __A)
6831309124Sdim{
6832321369Sdim  return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__M,
6833321369Sdim                                            (__v8df)_mm512_broadcast_f64x4(__A),
6834321369Sdim                                            (__v8df)__O);
6835309124Sdim}
6836309124Sdim
6837341825Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS512
6838321369Sdim_mm512_maskz_broadcast_f64x4(__mmask8 __M, __m256d __A)
6839309124Sdim{
6840321369Sdim  return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__M,
6841321369Sdim                                            (__v8df)_mm512_broadcast_f64x4(__A),
6842321369Sdim                                            (__v8df)_mm512_setzero_pd());
6843309124Sdim}
6844309124Sdim
6845341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
6846321369Sdim_mm512_broadcast_i32x4(__m128i __A)
6847309124Sdim{
6848321369Sdim  return (__m512i)__builtin_shufflevector((__v4si)__A, (__v4si)__A,
6849321369Sdim                                          0, 1, 2, 3, 0, 1, 2, 3,
6850321369Sdim                                          0, 1, 2, 3, 0, 1, 2, 3);
6851309124Sdim}
6852309124Sdim
6853341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
6854321369Sdim_mm512_mask_broadcast_i32x4(__m512i __O, __mmask16 __M, __m128i __A)
6855309124Sdim{
6856321369Sdim  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
6857321369Sdim                                           (__v16si)_mm512_broadcast_i32x4(__A),
6858321369Sdim                                           (__v16si)__O);
6859309124Sdim}
6860309124Sdim
6861341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
6862321369Sdim_mm512_maskz_broadcast_i32x4(__mmask16 __M, __m128i __A)
6863309124Sdim{
6864321369Sdim  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
6865321369Sdim                                           (__v16si)_mm512_broadcast_i32x4(__A),
6866321369Sdim                                           (__v16si)_mm512_setzero_si512());
6867309124Sdim}
6868309124Sdim
6869341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
6870321369Sdim_mm512_broadcast_i64x4(__m256i __A)
6871309124Sdim{
6872321369Sdim  return (__m512i)__builtin_shufflevector((__v4di)__A, (__v4di)__A,
6873321369Sdim                                          0, 1, 2, 3, 0, 1, 2, 3);
6874309124Sdim}
6875309124Sdim
6876341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
6877321369Sdim_mm512_mask_broadcast_i64x4(__m512i __O, __mmask8 __M, __m256i __A)
6878309124Sdim{
6879321369Sdim  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
6880321369Sdim                                            (__v8di)_mm512_broadcast_i64x4(__A),
6881321369Sdim                                            (__v8di)__O);
6882309124Sdim}
6883309124Sdim
6884341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
6885321369Sdim_mm512_maskz_broadcast_i64x4(__mmask8 __M, __m256i __A)
6886309124Sdim{
6887321369Sdim  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
6888321369Sdim                                            (__v8di)_mm512_broadcast_i64x4(__A),
6889321369Sdim                                            (__v8di)_mm512_setzero_si512());
6890309124Sdim}
6891309124Sdim
6892341825Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS512
6893309124Sdim_mm512_mask_broadcastsd_pd (__m512d __O, __mmask8 __M, __m128d __A)
6894309124Sdim{
6895309124Sdim  return (__m512d)__builtin_ia32_selectpd_512(__M,
6896309124Sdim                                              (__v8df) _mm512_broadcastsd_pd(__A),
6897309124Sdim                                              (__v8df) __O);
6898309124Sdim}
6899309124Sdim
6900341825Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS512
6901309124Sdim_mm512_maskz_broadcastsd_pd (__mmask8 __M, __m128d __A)
6902309124Sdim{
6903309124Sdim  return (__m512d)__builtin_ia32_selectpd_512(__M,
6904309124Sdim                                              (__v8df) _mm512_broadcastsd_pd(__A),
6905309124Sdim                                              (__v8df) _mm512_setzero_pd());
6906309124Sdim}
6907309124Sdim
6908341825Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS512
6909309124Sdim_mm512_mask_broadcastss_ps (__m512 __O, __mmask16 __M, __m128 __A)
6910309124Sdim{
6911309124Sdim  return (__m512)__builtin_ia32_selectps_512(__M,
6912309124Sdim                                             (__v16sf) _mm512_broadcastss_ps(__A),
6913309124Sdim                                             (__v16sf) __O);
6914309124Sdim}
6915309124Sdim
6916341825Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS512
6917309124Sdim_mm512_maskz_broadcastss_ps (__mmask16 __M, __m128 __A)
6918309124Sdim{
6919309124Sdim  return (__m512)__builtin_ia32_selectps_512(__M,
6920309124Sdim                                             (__v16sf) _mm512_broadcastss_ps(__A),
6921309124Sdim                                             (__v16sf) _mm512_setzero_ps());
6922309124Sdim}
6923309124Sdim
6924341825Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS512
6925309124Sdim_mm512_cvtsepi32_epi8 (__m512i __A)
6926309124Sdim{
6927309124Sdim  return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A,
6928309124Sdim               (__v16qi) _mm_undefined_si128 (),
6929309124Sdim               (__mmask16) -1);
6930309124Sdim}
6931309124Sdim
6932341825Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS512
6933309124Sdim_mm512_mask_cvtsepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A)
6934309124Sdim{
6935309124Sdim  return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A,
6936309124Sdim               (__v16qi) __O, __M);
6937309124Sdim}
6938309124Sdim
6939341825Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS512
6940309124Sdim_mm512_maskz_cvtsepi32_epi8 (__mmask16 __M, __m512i __A)
6941309124Sdim{
6942309124Sdim  return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A,
6943309124Sdim               (__v16qi) _mm_setzero_si128 (),
6944309124Sdim               __M);
6945309124Sdim}
6946309124Sdim
6947341825Sdimstatic __inline__ void __DEFAULT_FN_ATTRS512
6948309124Sdim_mm512_mask_cvtsepi32_storeu_epi8 (void * __P, __mmask16 __M, __m512i __A)
6949309124Sdim{
6950309124Sdim  __builtin_ia32_pmovsdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M);
6951309124Sdim}
6952309124Sdim
6953341825Sdimstatic __inline__ __m256i __DEFAULT_FN_ATTRS512
6954309124Sdim_mm512_cvtsepi32_epi16 (__m512i __A)
6955309124Sdim{
6956309124Sdim  return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A,
6957309124Sdim               (__v16hi) _mm256_undefined_si256 (),
6958309124Sdim               (__mmask16) -1);
6959309124Sdim}
6960309124Sdim
6961341825Sdimstatic __inline__ __m256i __DEFAULT_FN_ATTRS512
6962309124Sdim_mm512_mask_cvtsepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A)
6963309124Sdim{
6964309124Sdim  return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A,
6965309124Sdim               (__v16hi) __O, __M);
6966309124Sdim}
6967309124Sdim
6968341825Sdimstatic __inline__ __m256i __DEFAULT_FN_ATTRS512
6969309124Sdim_mm512_maskz_cvtsepi32_epi16 (__mmask16 __M, __m512i __A)
6970309124Sdim{
6971309124Sdim  return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A,
6972309124Sdim               (__v16hi) _mm256_setzero_si256 (),
6973309124Sdim               __M);
6974309124Sdim}
6975309124Sdim
6976341825Sdimstatic __inline__ void __DEFAULT_FN_ATTRS512
6977309124Sdim_mm512_mask_cvtsepi32_storeu_epi16 (void *__P, __mmask16 __M, __m512i __A)
6978309124Sdim{
6979309124Sdim  __builtin_ia32_pmovsdw512mem_mask ((__v16hi*) __P, (__v16si) __A, __M);
6980309124Sdim}
6981309124Sdim
6982341825Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS512
6983309124Sdim_mm512_cvtsepi64_epi8 (__m512i __A)
6984309124Sdim{
6985309124Sdim  return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A,
6986309124Sdim               (__v16qi) _mm_undefined_si128 (),
6987309124Sdim               (__mmask8) -1);
6988309124Sdim}
6989309124Sdim
6990341825Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS512
6991309124Sdim_mm512_mask_cvtsepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A)
6992309124Sdim{
6993309124Sdim  return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A,
6994309124Sdim               (__v16qi) __O, __M);
6995309124Sdim}
6996309124Sdim
6997341825Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS512
6998309124Sdim_mm512_maskz_cvtsepi64_epi8 (__mmask8 __M, __m512i __A)
6999309124Sdim{
7000309124Sdim  return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A,
7001309124Sdim               (__v16qi) _mm_setzero_si128 (),
7002309124Sdim               __M);
7003309124Sdim}
7004309124Sdim
7005341825Sdimstatic __inline__ void __DEFAULT_FN_ATTRS512
7006309124Sdim_mm512_mask_cvtsepi64_storeu_epi8 (void * __P, __mmask8 __M, __m512i __A)
7007309124Sdim{
7008309124Sdim  __builtin_ia32_pmovsqb512mem_mask ((__v16qi *) __P, (__v8di) __A, __M);
7009309124Sdim}
7010309124Sdim
7011341825Sdimstatic __inline__ __m256i __DEFAULT_FN_ATTRS512
7012309124Sdim_mm512_cvtsepi64_epi32 (__m512i __A)
7013309124Sdim{
7014309124Sdim  return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A,
7015309124Sdim               (__v8si) _mm256_undefined_si256 (),
7016309124Sdim               (__mmask8) -1);
7017309124Sdim}
7018309124Sdim
7019341825Sdimstatic __inline__ __m256i __DEFAULT_FN_ATTRS512
7020309124Sdim_mm512_mask_cvtsepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A)
7021309124Sdim{
7022309124Sdim  return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A,
7023309124Sdim               (__v8si) __O, __M);
7024309124Sdim}
7025309124Sdim
7026341825Sdimstatic __inline__ __m256i __DEFAULT_FN_ATTRS512
7027309124Sdim_mm512_maskz_cvtsepi64_epi32 (__mmask8 __M, __m512i __A)
7028309124Sdim{
7029309124Sdim  return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A,
7030309124Sdim               (__v8si) _mm256_setzero_si256 (),
7031309124Sdim               __M);
7032309124Sdim}
7033309124Sdim
7034341825Sdimstatic __inline__ void __DEFAULT_FN_ATTRS512
7035309124Sdim_mm512_mask_cvtsepi64_storeu_epi32 (void *__P, __mmask8 __M, __m512i __A)
7036309124Sdim{
7037309124Sdim  __builtin_ia32_pmovsqd512mem_mask ((__v8si *) __P, (__v8di) __A, __M);
7038309124Sdim}
7039309124Sdim
7040341825Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS512
7041309124Sdim_mm512_cvtsepi64_epi16 (__m512i __A)
7042309124Sdim{
7043309124Sdim  return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A,
7044309124Sdim               (__v8hi) _mm_undefined_si128 (),
7045309124Sdim               (__mmask8) -1);
7046309124Sdim}
7047309124Sdim
7048341825Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS512
7049309124Sdim_mm512_mask_cvtsepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A)
7050309124Sdim{
7051309124Sdim  return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A,
7052309124Sdim               (__v8hi) __O, __M);
7053309124Sdim}
7054309124Sdim
7055341825Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS512
7056309124Sdim_mm512_maskz_cvtsepi64_epi16 (__mmask8 __M, __m512i __A)
7057309124Sdim{
7058309124Sdim  return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A,
7059309124Sdim               (__v8hi) _mm_setzero_si128 (),
7060309124Sdim               __M);
7061309124Sdim}
7062309124Sdim
7063341825Sdimstatic __inline__ void __DEFAULT_FN_ATTRS512
7064309124Sdim_mm512_mask_cvtsepi64_storeu_epi16 (void * __P, __mmask8 __M, __m512i __A)
7065309124Sdim{
7066309124Sdim  __builtin_ia32_pmovsqw512mem_mask ((__v8hi *) __P, (__v8di) __A, __M);
7067309124Sdim}
7068309124Sdim
7069341825Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS512
7070309124Sdim_mm512_cvtusepi32_epi8 (__m512i __A)
7071309124Sdim{
7072309124Sdim  return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A,
7073309124Sdim                (__v16qi) _mm_undefined_si128 (),
7074309124Sdim                (__mmask16) -1);
7075309124Sdim}
7076309124Sdim
7077341825Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS512
7078309124Sdim_mm512_mask_cvtusepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A)
7079309124Sdim{
7080309124Sdim  return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A,
7081309124Sdim                (__v16qi) __O,
7082309124Sdim                __M);
7083309124Sdim}
7084309124Sdim
7085341825Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS512
7086309124Sdim_mm512_maskz_cvtusepi32_epi8 (__mmask16 __M, __m512i __A)
7087309124Sdim{
7088309124Sdim  return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A,
7089309124Sdim                (__v16qi) _mm_setzero_si128 (),
7090309124Sdim                __M);
7091309124Sdim}
7092309124Sdim
7093341825Sdimstatic __inline__ void __DEFAULT_FN_ATTRS512
7094309124Sdim_mm512_mask_cvtusepi32_storeu_epi8 (void * __P, __mmask16 __M, __m512i __A)
7095309124Sdim{
7096309124Sdim  __builtin_ia32_pmovusdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M);
7097309124Sdim}
7098309124Sdim
7099341825Sdimstatic __inline__ __m256i __DEFAULT_FN_ATTRS512
7100309124Sdim_mm512_cvtusepi32_epi16 (__m512i __A)
7101309124Sdim{
7102309124Sdim  return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A,
7103309124Sdim                (__v16hi) _mm256_undefined_si256 (),
7104309124Sdim                (__mmask16) -1);
7105309124Sdim}
7106309124Sdim
7107341825Sdimstatic __inline__ __m256i __DEFAULT_FN_ATTRS512
7108309124Sdim_mm512_mask_cvtusepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A)
7109309124Sdim{
7110309124Sdim  return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A,
7111309124Sdim                (__v16hi) __O,
7112309124Sdim                __M);
7113309124Sdim}
7114309124Sdim
7115341825Sdimstatic __inline__ __m256i __DEFAULT_FN_ATTRS512
7116309124Sdim_mm512_maskz_cvtusepi32_epi16 (__mmask16 __M, __m512i __A)
7117309124Sdim{
7118309124Sdim  return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A,
7119309124Sdim                (__v16hi) _mm256_setzero_si256 (),
7120309124Sdim                __M);
7121309124Sdim}
7122309124Sdim
7123341825Sdimstatic __inline__ void __DEFAULT_FN_ATTRS512
7124309124Sdim_mm512_mask_cvtusepi32_storeu_epi16 (void *__P, __mmask16 __M, __m512i __A)
7125309124Sdim{
7126309124Sdim  __builtin_ia32_pmovusdw512mem_mask ((__v16hi*) __P, (__v16si) __A, __M);
7127309124Sdim}
7128309124Sdim
7129341825Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS512
7130309124Sdim_mm512_cvtusepi64_epi8 (__m512i __A)
7131309124Sdim{
7132309124Sdim  return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A,
7133309124Sdim                (__v16qi) _mm_undefined_si128 (),
7134309124Sdim                (__mmask8) -1);
7135309124Sdim}
7136309124Sdim
7137341825Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS512
7138309124Sdim_mm512_mask_cvtusepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A)
7139309124Sdim{
7140309124Sdim  return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A,
7141309124Sdim                (__v16qi) __O,
7142309124Sdim                __M);
7143309124Sdim}
7144309124Sdim
7145341825Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS512
7146309124Sdim_mm512_maskz_cvtusepi64_epi8 (__mmask8 __M, __m512i __A)
7147309124Sdim{
7148309124Sdim  return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A,
7149309124Sdim                (__v16qi) _mm_setzero_si128 (),
7150309124Sdim                __M);
7151309124Sdim}
7152309124Sdim
7153341825Sdimstatic __inline__ void __DEFAULT_FN_ATTRS512
7154309124Sdim_mm512_mask_cvtusepi64_storeu_epi8 (void * __P, __mmask8 __M, __m512i __A)
7155309124Sdim{
7156309124Sdim  __builtin_ia32_pmovusqb512mem_mask ((__v16qi *) __P, (__v8di) __A, __M);
7157309124Sdim}
7158309124Sdim
7159341825Sdimstatic __inline__ __m256i __DEFAULT_FN_ATTRS512
7160309124Sdim_mm512_cvtusepi64_epi32 (__m512i __A)
7161309124Sdim{
7162309124Sdim  return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A,
7163309124Sdim                (__v8si) _mm256_undefined_si256 (),
7164309124Sdim                (__mmask8) -1);
7165309124Sdim}
7166309124Sdim
7167341825Sdimstatic __inline__ __m256i __DEFAULT_FN_ATTRS512
7168309124Sdim_mm512_mask_cvtusepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A)
7169309124Sdim{
7170309124Sdim  return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A,
7171309124Sdim                (__v8si) __O, __M);
7172309124Sdim}
7173309124Sdim
7174341825Sdimstatic __inline__ __m256i __DEFAULT_FN_ATTRS512
7175309124Sdim_mm512_maskz_cvtusepi64_epi32 (__mmask8 __M, __m512i __A)
7176309124Sdim{
7177309124Sdim  return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A,
7178309124Sdim                (__v8si) _mm256_setzero_si256 (),
7179309124Sdim                __M);
7180309124Sdim}
7181309124Sdim
7182341825Sdimstatic __inline__ void __DEFAULT_FN_ATTRS512
7183309124Sdim_mm512_mask_cvtusepi64_storeu_epi32 (void* __P, __mmask8 __M, __m512i __A)
7184309124Sdim{
7185309124Sdim  __builtin_ia32_pmovusqd512mem_mask ((__v8si*) __P, (__v8di) __A, __M);
7186309124Sdim}
7187309124Sdim
7188341825Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS512
7189309124Sdim_mm512_cvtusepi64_epi16 (__m512i __A)
7190309124Sdim{
7191309124Sdim  return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A,
7192309124Sdim                (__v8hi) _mm_undefined_si128 (),
7193309124Sdim                (__mmask8) -1);
7194309124Sdim}
7195309124Sdim
7196341825Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS512
7197309124Sdim_mm512_mask_cvtusepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A)
7198309124Sdim{
7199309124Sdim  return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A,
7200309124Sdim                (__v8hi) __O, __M);
7201309124Sdim}
7202309124Sdim
7203341825Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS512
7204309124Sdim_mm512_maskz_cvtusepi64_epi16 (__mmask8 __M, __m512i __A)
7205309124Sdim{
7206309124Sdim  return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A,
7207309124Sdim                (__v8hi) _mm_setzero_si128 (),
7208309124Sdim                __M);
7209309124Sdim}
7210309124Sdim
7211341825Sdimstatic __inline__ void __DEFAULT_FN_ATTRS512
7212309124Sdim_mm512_mask_cvtusepi64_storeu_epi16 (void *__P, __mmask8 __M, __m512i __A)
7213309124Sdim{
7214309124Sdim  __builtin_ia32_pmovusqw512mem_mask ((__v8hi*) __P, (__v8di) __A, __M);
7215309124Sdim}
7216309124Sdim
7217341825Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS512
7218309124Sdim_mm512_cvtepi32_epi8 (__m512i __A)
7219309124Sdim{
7220309124Sdim  return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A,
7221309124Sdim              (__v16qi) _mm_undefined_si128 (),
7222309124Sdim              (__mmask16) -1);
7223309124Sdim}
7224309124Sdim
7225341825Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS512
7226309124Sdim_mm512_mask_cvtepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A)
7227309124Sdim{
7228309124Sdim  return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A,
7229309124Sdim              (__v16qi) __O, __M);
7230309124Sdim}
7231309124Sdim
7232341825Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS512
7233309124Sdim_mm512_maskz_cvtepi32_epi8 (__mmask16 __M, __m512i __A)
7234309124Sdim{
7235309124Sdim  return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A,
7236309124Sdim              (__v16qi) _mm_setzero_si128 (),
7237309124Sdim              __M);
7238309124Sdim}
7239309124Sdim
7240341825Sdimstatic __inline__ void __DEFAULT_FN_ATTRS512
7241309124Sdim_mm512_mask_cvtepi32_storeu_epi8 (void * __P, __mmask16 __M, __m512i __A)
7242309124Sdim{
7243309124Sdim  __builtin_ia32_pmovdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M);
7244309124Sdim}
7245309124Sdim
7246341825Sdimstatic __inline__ __m256i __DEFAULT_FN_ATTRS512
7247309124Sdim_mm512_cvtepi32_epi16 (__m512i __A)
7248309124Sdim{
7249309124Sdim  return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A,
7250309124Sdim              (__v16hi) _mm256_undefined_si256 (),
7251309124Sdim              (__mmask16) -1);
7252309124Sdim}
7253309124Sdim
7254341825Sdimstatic __inline__ __m256i __DEFAULT_FN_ATTRS512
7255309124Sdim_mm512_mask_cvtepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A)
7256309124Sdim{
7257309124Sdim  return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A,
7258309124Sdim              (__v16hi) __O, __M);
7259309124Sdim}
7260309124Sdim
7261341825Sdimstatic __inline__ __m256i __DEFAULT_FN_ATTRS512
7262309124Sdim_mm512_maskz_cvtepi32_epi16 (__mmask16 __M, __m512i __A)
7263309124Sdim{
7264309124Sdim  return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A,
7265309124Sdim              (__v16hi) _mm256_setzero_si256 (),
7266309124Sdim              __M);
7267309124Sdim}
7268309124Sdim
7269341825Sdimstatic __inline__ void __DEFAULT_FN_ATTRS512
7270309124Sdim_mm512_mask_cvtepi32_storeu_epi16 (void * __P, __mmask16 __M, __m512i __A)
7271309124Sdim{
7272309124Sdim  __builtin_ia32_pmovdw512mem_mask ((__v16hi *) __P, (__v16si) __A, __M);
7273309124Sdim}
7274309124Sdim
7275341825Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS512
7276309124Sdim_mm512_cvtepi64_epi8 (__m512i __A)
7277309124Sdim{
7278309124Sdim  return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A,
7279309124Sdim              (__v16qi) _mm_undefined_si128 (),
7280309124Sdim              (__mmask8) -1);
7281309124Sdim}
7282309124Sdim
7283341825Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS512
7284309124Sdim_mm512_mask_cvtepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A)
7285309124Sdim{
7286309124Sdim  return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A,
7287309124Sdim              (__v16qi) __O, __M);
7288309124Sdim}
7289309124Sdim
7290341825Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS512
7291309124Sdim_mm512_maskz_cvtepi64_epi8 (__mmask8 __M, __m512i __A)
7292309124Sdim{
7293309124Sdim  return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A,
7294309124Sdim              (__v16qi) _mm_setzero_si128 (),
7295309124Sdim              __M);
7296309124Sdim}
7297309124Sdim
7298341825Sdimstatic __inline__ void __DEFAULT_FN_ATTRS512
7299309124Sdim_mm512_mask_cvtepi64_storeu_epi8 (void * __P, __mmask8 __M, __m512i __A)
7300309124Sdim{
7301309124Sdim  __builtin_ia32_pmovqb512mem_mask ((__v16qi *) __P, (__v8di) __A, __M);
7302309124Sdim}
7303309124Sdim
7304341825Sdimstatic __inline__ __m256i __DEFAULT_FN_ATTRS512
7305309124Sdim_mm512_cvtepi64_epi32 (__m512i __A)
7306309124Sdim{
7307309124Sdim  return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A,
7308309124Sdim              (__v8si) _mm256_undefined_si256 (),
7309309124Sdim              (__mmask8) -1);
7310309124Sdim}
7311309124Sdim
7312341825Sdimstatic __inline__ __m256i __DEFAULT_FN_ATTRS512
7313309124Sdim_mm512_mask_cvtepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A)
7314309124Sdim{
7315309124Sdim  return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A,
7316309124Sdim              (__v8si) __O, __M);
7317309124Sdim}
7318309124Sdim
7319341825Sdimstatic __inline__ __m256i __DEFAULT_FN_ATTRS512
7320309124Sdim_mm512_maskz_cvtepi64_epi32 (__mmask8 __M, __m512i __A)
7321309124Sdim{
7322309124Sdim  return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A,
7323309124Sdim              (__v8si) _mm256_setzero_si256 (),
7324309124Sdim              __M);
7325309124Sdim}
7326309124Sdim
7327341825Sdimstatic __inline__ void __DEFAULT_FN_ATTRS512
7328309124Sdim_mm512_mask_cvtepi64_storeu_epi32 (void* __P, __mmask8 __M, __m512i __A)
7329309124Sdim{
7330309124Sdim  __builtin_ia32_pmovqd512mem_mask ((__v8si *) __P, (__v8di) __A, __M);
7331309124Sdim}
7332309124Sdim
7333341825Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS512
7334309124Sdim_mm512_cvtepi64_epi16 (__m512i __A)
7335309124Sdim{
7336309124Sdim  return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A,
7337309124Sdim              (__v8hi) _mm_undefined_si128 (),
7338309124Sdim              (__mmask8) -1);
7339309124Sdim}
7340309124Sdim
7341341825Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS512
7342309124Sdim_mm512_mask_cvtepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A)
7343309124Sdim{
7344309124Sdim  return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A,
7345309124Sdim              (__v8hi) __O, __M);
7346309124Sdim}
7347309124Sdim
7348341825Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS512
7349309124Sdim_mm512_maskz_cvtepi64_epi16 (__mmask8 __M, __m512i __A)
7350309124Sdim{
7351309124Sdim  return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A,
7352309124Sdim              (__v8hi) _mm_setzero_si128 (),
7353309124Sdim              __M);
7354309124Sdim}
7355309124Sdim
7356341825Sdimstatic __inline__ void __DEFAULT_FN_ATTRS512
7357309124Sdim_mm512_mask_cvtepi64_storeu_epi16 (void *__P, __mmask8 __M, __m512i __A)
7358309124Sdim{
7359309124Sdim  __builtin_ia32_pmovqw512mem_mask ((__v8hi *) __P, (__v8di) __A, __M);
7360309124Sdim}
7361309124Sdim
7362341825Sdim#define _mm512_extracti32x4_epi32(A, imm) \
7363341825Sdim  (__m128i)__builtin_ia32_extracti32x4_mask((__v16si)(__m512i)(A), (int)(imm), \
7364341825Sdim                                            (__v4si)_mm_undefined_si128(), \
7365341825Sdim                                            (__mmask8)-1)
7366309124Sdim
7367341825Sdim#define _mm512_mask_extracti32x4_epi32(W, U, A, imm) \
7368341825Sdim  (__m128i)__builtin_ia32_extracti32x4_mask((__v16si)(__m512i)(A), (int)(imm), \
7369341825Sdim                                            (__v4si)(__m128i)(W), \
7370341825Sdim                                            (__mmask8)(U))
7371309124Sdim
7372341825Sdim#define _mm512_maskz_extracti32x4_epi32(U, A, imm) \
7373341825Sdim  (__m128i)__builtin_ia32_extracti32x4_mask((__v16si)(__m512i)(A), (int)(imm), \
7374341825Sdim                                            (__v4si)_mm_setzero_si128(), \
7375341825Sdim                                            (__mmask8)(U))
7376309124Sdim
7377341825Sdim#define _mm512_extracti64x4_epi64(A, imm) \
7378341825Sdim  (__m256i)__builtin_ia32_extracti64x4_mask((__v8di)(__m512i)(A), (int)(imm), \
7379341825Sdim                                            (__v4di)_mm256_undefined_si256(), \
7380341825Sdim                                            (__mmask8)-1)
7381309124Sdim
7382341825Sdim#define _mm512_mask_extracti64x4_epi64(W, U, A, imm) \
7383341825Sdim  (__m256i)__builtin_ia32_extracti64x4_mask((__v8di)(__m512i)(A), (int)(imm), \
7384341825Sdim                                            (__v4di)(__m256i)(W), \
7385341825Sdim                                            (__mmask8)(U))
7386309124Sdim
7387341825Sdim#define _mm512_maskz_extracti64x4_epi64(U, A, imm) \
7388341825Sdim  (__m256i)__builtin_ia32_extracti64x4_mask((__v8di)(__m512i)(A), (int)(imm), \
7389341825Sdim                                            (__v4di)_mm256_setzero_si256(), \
7390341825Sdim                                            (__mmask8)(U))
7391309124Sdim
7392341825Sdim#define _mm512_insertf64x4(A, B, imm) \
7393341825Sdim  (__m512d)__builtin_ia32_insertf64x4((__v8df)(__m512d)(A), \
7394341825Sdim                                      (__v4df)(__m256d)(B), (int)(imm))
7395309124Sdim
7396341825Sdim#define _mm512_mask_insertf64x4(W, U, A, B, imm) \
7397314564Sdim  (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
7398314564Sdim                                  (__v8df)_mm512_insertf64x4((A), (B), (imm)), \
7399341825Sdim                                  (__v8df)(__m512d)(W))
7400309124Sdim
7401341825Sdim#define _mm512_maskz_insertf64x4(U, A, B, imm) \
7402314564Sdim  (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
7403314564Sdim                                  (__v8df)_mm512_insertf64x4((A), (B), (imm)), \
7404341825Sdim                                  (__v8df)_mm512_setzero_pd())
7405309124Sdim
7406341825Sdim#define _mm512_inserti64x4(A, B, imm) \
7407341825Sdim  (__m512i)__builtin_ia32_inserti64x4((__v8di)(__m512i)(A), \
7408341825Sdim                                      (__v4di)(__m256i)(B), (int)(imm))
7409309124Sdim
7410341825Sdim#define _mm512_mask_inserti64x4(W, U, A, B, imm) \
7411314564Sdim  (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
7412314564Sdim                                  (__v8di)_mm512_inserti64x4((A), (B), (imm)), \
7413341825Sdim                                  (__v8di)(__m512i)(W))
7414309124Sdim
7415341825Sdim#define _mm512_maskz_inserti64x4(U, A, B, imm) \
7416314564Sdim  (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
7417314564Sdim                                  (__v8di)_mm512_inserti64x4((A), (B), (imm)), \
7418341825Sdim                                  (__v8di)_mm512_setzero_si512())
7419309124Sdim
7420341825Sdim#define _mm512_insertf32x4(A, B, imm) \
7421341825Sdim  (__m512)__builtin_ia32_insertf32x4((__v16sf)(__m512)(A), \
7422341825Sdim                                     (__v4sf)(__m128)(B), (int)(imm))
7423309124Sdim
7424341825Sdim#define _mm512_mask_insertf32x4(W, U, A, B, imm) \
7425314564Sdim  (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
7426314564Sdim                                 (__v16sf)_mm512_insertf32x4((A), (B), (imm)), \
7427341825Sdim                                 (__v16sf)(__m512)(W))
7428309124Sdim
7429341825Sdim#define _mm512_maskz_insertf32x4(U, A, B, imm) \
7430314564Sdim  (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
7431314564Sdim                                 (__v16sf)_mm512_insertf32x4((A), (B), (imm)), \
7432341825Sdim                                 (__v16sf)_mm512_setzero_ps())
7433309124Sdim
7434341825Sdim#define _mm512_inserti32x4(A, B, imm) \
7435341825Sdim  (__m512i)__builtin_ia32_inserti32x4((__v16si)(__m512i)(A), \
7436341825Sdim                                      (__v4si)(__m128i)(B), (int)(imm))
7437309124Sdim
7438341825Sdim#define _mm512_mask_inserti32x4(W, U, A, B, imm) \
7439314564Sdim  (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
7440314564Sdim                                 (__v16si)_mm512_inserti32x4((A), (B), (imm)), \
7441341825Sdim                                 (__v16si)(__m512i)(W))
7442309124Sdim
7443341825Sdim#define _mm512_maskz_inserti32x4(U, A, B, imm) \
7444314564Sdim  (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
7445314564Sdim                                 (__v16si)_mm512_inserti32x4((A), (B), (imm)), \
7446341825Sdim                                 (__v16si)_mm512_setzero_si512())
7447309124Sdim
7448341825Sdim#define _mm512_getmant_round_pd(A, B, C, R) \
7449309124Sdim  (__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \
7450309124Sdim                                            (int)(((C)<<2) | (B)), \
7451309124Sdim                                            (__v8df)_mm512_undefined_pd(), \
7452341825Sdim                                            (__mmask8)-1, (int)(R))
7453309124Sdim
7454341825Sdim#define _mm512_mask_getmant_round_pd(W, U, A, B, C, R) \
7455309124Sdim  (__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \
7456309124Sdim                                            (int)(((C)<<2) | (B)), \
7457309124Sdim                                            (__v8df)(__m512d)(W), \
7458341825Sdim                                            (__mmask8)(U), (int)(R))
7459309124Sdim
7460341825Sdim#define _mm512_maskz_getmant_round_pd(U, A, B, C, R) \
7461309124Sdim  (__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \
7462309124Sdim                                            (int)(((C)<<2) | (B)), \
7463309124Sdim                                            (__v8df)_mm512_setzero_pd(), \
7464341825Sdim                                            (__mmask8)(U), (int)(R))
7465309124Sdim
7466341825Sdim#define _mm512_getmant_pd(A, B, C) \
7467309124Sdim  (__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \
7468309124Sdim                                            (int)(((C)<<2) | (B)), \
7469309124Sdim                                            (__v8df)_mm512_setzero_pd(), \
7470309124Sdim                                            (__mmask8)-1, \
7471341825Sdim                                            _MM_FROUND_CUR_DIRECTION)
7472309124Sdim
7473341825Sdim#define _mm512_mask_getmant_pd(W, U, A, B, C) \
7474309124Sdim  (__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \
7475309124Sdim                                            (int)(((C)<<2) | (B)), \
7476309124Sdim                                            (__v8df)(__m512d)(W), \
7477309124Sdim                                            (__mmask8)(U), \
7478341825Sdim                                            _MM_FROUND_CUR_DIRECTION)
7479309124Sdim
7480341825Sdim#define _mm512_maskz_getmant_pd(U, A, B, C) \
7481309124Sdim  (__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \
7482309124Sdim                                            (int)(((C)<<2) | (B)), \
7483309124Sdim                                            (__v8df)_mm512_setzero_pd(), \
7484309124Sdim                                            (__mmask8)(U), \
7485341825Sdim                                            _MM_FROUND_CUR_DIRECTION)
7486309124Sdim
7487341825Sdim#define _mm512_getmant_round_ps(A, B, C, R) \
7488309124Sdim  (__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \
7489309124Sdim                                           (int)(((C)<<2) | (B)), \
7490309124Sdim                                           (__v16sf)_mm512_undefined_ps(), \
7491341825Sdim                                           (__mmask16)-1, (int)(R))
7492309124Sdim
7493341825Sdim#define _mm512_mask_getmant_round_ps(W, U, A, B, C, R) \
7494309124Sdim  (__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \
7495309124Sdim                                           (int)(((C)<<2) | (B)), \
7496309124Sdim                                           (__v16sf)(__m512)(W), \
7497341825Sdim                                           (__mmask16)(U), (int)(R))
7498309124Sdim
7499341825Sdim#define _mm512_maskz_getmant_round_ps(U, A, B, C, R) \
7500309124Sdim  (__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \
7501309124Sdim                                           (int)(((C)<<2) | (B)), \
7502309124Sdim                                           (__v16sf)_mm512_setzero_ps(), \
7503341825Sdim                                           (__mmask16)(U), (int)(R))
7504309124Sdim
7505341825Sdim#define _mm512_getmant_ps(A, B, C) \
7506309124Sdim  (__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \
7507309124Sdim                                           (int)(((C)<<2)|(B)), \
7508309124Sdim                                           (__v16sf)_mm512_undefined_ps(), \
7509309124Sdim                                           (__mmask16)-1, \
7510341825Sdim                                           _MM_FROUND_CUR_DIRECTION)
7511309124Sdim
7512341825Sdim#define _mm512_mask_getmant_ps(W, U, A, B, C) \
7513309124Sdim  (__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \
7514309124Sdim                                           (int)(((C)<<2)|(B)), \
7515309124Sdim                                           (__v16sf)(__m512)(W), \
7516309124Sdim                                           (__mmask16)(U), \
7517341825Sdim                                           _MM_FROUND_CUR_DIRECTION)
7518309124Sdim
7519341825Sdim#define _mm512_maskz_getmant_ps(U, A, B, C) \
7520309124Sdim  (__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \
7521309124Sdim                                           (int)(((C)<<2)|(B)), \
7522309124Sdim                                           (__v16sf)_mm512_setzero_ps(), \
7523309124Sdim                                           (__mmask16)(U), \
7524341825Sdim                                           _MM_FROUND_CUR_DIRECTION)
7525309124Sdim
7526341825Sdim#define _mm512_getexp_round_pd(A, R) \
7527309124Sdim  (__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
7528309124Sdim                                           (__v8df)_mm512_undefined_pd(), \
7529341825Sdim                                           (__mmask8)-1, (int)(R))
7530309124Sdim
7531341825Sdim#define _mm512_mask_getexp_round_pd(W, U, A, R) \
7532309124Sdim  (__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
7533309124Sdim                                           (__v8df)(__m512d)(W), \
7534341825Sdim                                           (__mmask8)(U), (int)(R))
7535309124Sdim
7536341825Sdim#define _mm512_maskz_getexp_round_pd(U, A, R) \
7537309124Sdim  (__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
7538309124Sdim                                           (__v8df)_mm512_setzero_pd(), \
7539341825Sdim                                           (__mmask8)(U), (int)(R))
7540309124Sdim
7541341825Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS512
7542309124Sdim_mm512_getexp_pd (__m512d __A)
7543309124Sdim{
7544309124Sdim  return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
7545309124Sdim                (__v8df) _mm512_undefined_pd (),
7546309124Sdim                (__mmask8) -1,
7547309124Sdim                _MM_FROUND_CUR_DIRECTION);
7548309124Sdim}
7549309124Sdim
7550341825Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS512
7551309124Sdim_mm512_mask_getexp_pd (__m512d __W, __mmask8 __U, __m512d __A)
7552309124Sdim{
7553309124Sdim  return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
7554309124Sdim                (__v8df) __W,
7555309124Sdim                (__mmask8) __U,
7556309124Sdim                _MM_FROUND_CUR_DIRECTION);
7557309124Sdim}
7558309124Sdim
7559341825Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS512
7560309124Sdim_mm512_maskz_getexp_pd (__mmask8 __U, __m512d __A)
7561309124Sdim{
7562309124Sdim  return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
7563309124Sdim                (__v8df) _mm512_setzero_pd (),
7564309124Sdim                (__mmask8) __U,
7565309124Sdim                _MM_FROUND_CUR_DIRECTION);
7566309124Sdim}
7567309124Sdim
7568341825Sdim#define _mm512_getexp_round_ps(A, R) \
7569309124Sdim  (__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
7570309124Sdim                                          (__v16sf)_mm512_undefined_ps(), \
7571341825Sdim                                          (__mmask16)-1, (int)(R))
7572309124Sdim
7573341825Sdim#define _mm512_mask_getexp_round_ps(W, U, A, R) \
7574309124Sdim  (__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
7575309124Sdim                                          (__v16sf)(__m512)(W), \
7576341825Sdim                                          (__mmask16)(U), (int)(R))
7577309124Sdim
7578341825Sdim#define _mm512_maskz_getexp_round_ps(U, A, R) \
7579309124Sdim  (__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
7580309124Sdim                                          (__v16sf)_mm512_setzero_ps(), \
7581341825Sdim                                          (__mmask16)(U), (int)(R))
7582309124Sdim
7583341825Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS512
7584309124Sdim_mm512_getexp_ps (__m512 __A)
7585309124Sdim{
7586309124Sdim  return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
7587309124Sdim               (__v16sf) _mm512_undefined_ps (),
7588309124Sdim               (__mmask16) -1,
7589309124Sdim               _MM_FROUND_CUR_DIRECTION);
7590309124Sdim}
7591309124Sdim
7592341825Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS512
7593309124Sdim_mm512_mask_getexp_ps (__m512 __W, __mmask16 __U, __m512 __A)
7594309124Sdim{
7595309124Sdim  return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
7596309124Sdim               (__v16sf) __W,
7597309124Sdim               (__mmask16) __U,
7598309124Sdim               _MM_FROUND_CUR_DIRECTION);
7599309124Sdim}
7600309124Sdim
7601341825Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS512
7602309124Sdim_mm512_maskz_getexp_ps (__mmask16 __U, __m512 __A)
7603309124Sdim{
7604309124Sdim  return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
7605309124Sdim               (__v16sf) _mm512_setzero_ps (),
7606309124Sdim               (__mmask16) __U,
7607309124Sdim               _MM_FROUND_CUR_DIRECTION);
7608309124Sdim}
7609309124Sdim
7610341825Sdim#define _mm512_i64gather_ps(index, addr, scale) \
7611309124Sdim  (__m256)__builtin_ia32_gatherdiv16sf((__v8sf)_mm256_undefined_ps(), \
7612344779Sdim                                       (void const *)(addr), \
7613309124Sdim                                       (__v8di)(__m512i)(index), (__mmask8)-1, \
7614341825Sdim                                       (int)(scale))
7615309124Sdim
7616341825Sdim#define _mm512_mask_i64gather_ps(v1_old, mask, index, addr, scale) \
7617321369Sdim  (__m256)__builtin_ia32_gatherdiv16sf((__v8sf)(__m256)(v1_old),\
7618344779Sdim                                       (void const *)(addr), \
7619321369Sdim                                       (__v8di)(__m512i)(index), \
7620341825Sdim                                       (__mmask8)(mask), (int)(scale))
7621309124Sdim
7622341825Sdim#define _mm512_i64gather_epi32(index, addr, scale) \
7623341825Sdim  (__m256i)__builtin_ia32_gatherdiv16si((__v8si)_mm256_undefined_si256(), \
7624344779Sdim                                        (void const *)(addr), \
7625309124Sdim                                        (__v8di)(__m512i)(index), \
7626341825Sdim                                        (__mmask8)-1, (int)(scale))
7627309124Sdim
7628341825Sdim#define _mm512_mask_i64gather_epi32(v1_old, mask, index, addr, scale) \
7629309124Sdim  (__m256i)__builtin_ia32_gatherdiv16si((__v8si)(__m256i)(v1_old), \
7630344779Sdim                                        (void const *)(addr), \
7631309124Sdim                                        (__v8di)(__m512i)(index), \
7632341825Sdim                                        (__mmask8)(mask), (int)(scale))
7633309124Sdim
7634341825Sdim#define _mm512_i64gather_pd(index, addr, scale) \
7635309124Sdim  (__m512d)__builtin_ia32_gatherdiv8df((__v8df)_mm512_undefined_pd(), \
7636344779Sdim                                       (void const *)(addr), \
7637309124Sdim                                       (__v8di)(__m512i)(index), (__mmask8)-1, \
7638341825Sdim                                       (int)(scale))
7639309124Sdim
7640341825Sdim#define _mm512_mask_i64gather_pd(v1_old, mask, index, addr, scale) \
7641309124Sdim  (__m512d)__builtin_ia32_gatherdiv8df((__v8df)(__m512d)(v1_old), \
7642344779Sdim                                       (void const *)(addr), \
7643309124Sdim                                       (__v8di)(__m512i)(index), \
7644341825Sdim                                       (__mmask8)(mask), (int)(scale))
7645309124Sdim
7646341825Sdim#define _mm512_i64gather_epi64(index, addr, scale) \
7647341825Sdim  (__m512i)__builtin_ia32_gatherdiv8di((__v8di)_mm512_undefined_epi32(), \
7648344779Sdim                                       (void const *)(addr), \
7649309124Sdim                                       (__v8di)(__m512i)(index), (__mmask8)-1, \
7650341825Sdim                                       (int)(scale))
7651309124Sdim
7652341825Sdim#define _mm512_mask_i64gather_epi64(v1_old, mask, index, addr, scale) \
7653309124Sdim  (__m512i)__builtin_ia32_gatherdiv8di((__v8di)(__m512i)(v1_old), \
7654344779Sdim                                       (void const *)(addr), \
7655309124Sdim                                       (__v8di)(__m512i)(index), \
7656341825Sdim                                       (__mmask8)(mask), (int)(scale))
7657309124Sdim
7658341825Sdim#define _mm512_i32gather_ps(index, addr, scale) \
7659309124Sdim  (__m512)__builtin_ia32_gathersiv16sf((__v16sf)_mm512_undefined_ps(), \
7660344779Sdim                                       (void const *)(addr), \
7661360784Sdim                                       (__v16si)(__m512)(index), \
7662341825Sdim                                       (__mmask16)-1, (int)(scale))
7663309124Sdim
7664341825Sdim#define _mm512_mask_i32gather_ps(v1_old, mask, index, addr, scale) \
7665309124Sdim  (__m512)__builtin_ia32_gathersiv16sf((__v16sf)(__m512)(v1_old), \
7666344779Sdim                                       (void const *)(addr), \
7667360784Sdim                                       (__v16si)(__m512)(index), \
7668341825Sdim                                       (__mmask16)(mask), (int)(scale))
7669309124Sdim
7670341825Sdim#define _mm512_i32gather_epi32(index, addr, scale) \
7671309124Sdim  (__m512i)__builtin_ia32_gathersiv16si((__v16si)_mm512_undefined_epi32(), \
7672344779Sdim                                        (void const *)(addr), \
7673309124Sdim                                        (__v16si)(__m512i)(index), \
7674341825Sdim                                        (__mmask16)-1, (int)(scale))
7675309124Sdim
7676341825Sdim#define _mm512_mask_i32gather_epi32(v1_old, mask, index, addr, scale) \
7677309124Sdim  (__m512i)__builtin_ia32_gathersiv16si((__v16si)(__m512i)(v1_old), \
7678344779Sdim                                        (void const *)(addr), \
7679309124Sdim                                        (__v16si)(__m512i)(index), \
7680341825Sdim                                        (__mmask16)(mask), (int)(scale))
7681309124Sdim
7682341825Sdim#define _mm512_i32gather_pd(index, addr, scale) \
7683309124Sdim  (__m512d)__builtin_ia32_gathersiv8df((__v8df)_mm512_undefined_pd(), \
7684344779Sdim                                       (void const *)(addr), \
7685309124Sdim                                       (__v8si)(__m256i)(index), (__mmask8)-1, \
7686341825Sdim                                       (int)(scale))
7687309124Sdim
7688341825Sdim#define _mm512_mask_i32gather_pd(v1_old, mask, index, addr, scale) \
7689309124Sdim  (__m512d)__builtin_ia32_gathersiv8df((__v8df)(__m512d)(v1_old), \
7690344779Sdim                                       (void const *)(addr), \
7691309124Sdim                                       (__v8si)(__m256i)(index), \
7692341825Sdim                                       (__mmask8)(mask), (int)(scale))
7693309124Sdim
7694341825Sdim#define _mm512_i32gather_epi64(index, addr, scale) \
7695309124Sdim  (__m512i)__builtin_ia32_gathersiv8di((__v8di)_mm512_undefined_epi32(), \
7696344779Sdim                                       (void const *)(addr), \
7697309124Sdim                                       (__v8si)(__m256i)(index), (__mmask8)-1, \
7698341825Sdim                                       (int)(scale))
7699309124Sdim
7700341825Sdim#define _mm512_mask_i32gather_epi64(v1_old, mask, index, addr, scale) \
7701309124Sdim  (__m512i)__builtin_ia32_gathersiv8di((__v8di)(__m512i)(v1_old), \
7702344779Sdim                                       (void const *)(addr), \
7703309124Sdim                                       (__v8si)(__m256i)(index), \
7704341825Sdim                                       (__mmask8)(mask), (int)(scale))
7705309124Sdim
7706341825Sdim#define _mm512_i64scatter_ps(addr, index, v1, scale) \
7707344779Sdim  __builtin_ia32_scatterdiv16sf((void *)(addr), (__mmask8)-1, \
7708309124Sdim                                (__v8di)(__m512i)(index), \
7709341825Sdim                                (__v8sf)(__m256)(v1), (int)(scale))
7710309124Sdim
7711341825Sdim#define _mm512_mask_i64scatter_ps(addr, mask, index, v1, scale) \
7712344779Sdim  __builtin_ia32_scatterdiv16sf((void *)(addr), (__mmask8)(mask), \
7713309124Sdim                                (__v8di)(__m512i)(index), \
7714341825Sdim                                (__v8sf)(__m256)(v1), (int)(scale))
7715309124Sdim
7716341825Sdim#define _mm512_i64scatter_epi32(addr, index, v1, scale) \
7717344779Sdim  __builtin_ia32_scatterdiv16si((void *)(addr), (__mmask8)-1, \
7718309124Sdim                                (__v8di)(__m512i)(index), \
7719341825Sdim                                (__v8si)(__m256i)(v1), (int)(scale))
7720309124Sdim
7721341825Sdim#define _mm512_mask_i64scatter_epi32(addr, mask, index, v1, scale) \
7722344779Sdim  __builtin_ia32_scatterdiv16si((void *)(addr), (__mmask8)(mask), \
7723309124Sdim                                (__v8di)(__m512i)(index), \
7724341825Sdim                                (__v8si)(__m256i)(v1), (int)(scale))
7725309124Sdim
7726341825Sdim#define _mm512_i64scatter_pd(addr, index, v1, scale) \
7727344779Sdim  __builtin_ia32_scatterdiv8df((void *)(addr), (__mmask8)-1, \
7728309124Sdim                               (__v8di)(__m512i)(index), \
7729341825Sdim                               (__v8df)(__m512d)(v1), (int)(scale))
7730309124Sdim
7731341825Sdim#define _mm512_mask_i64scatter_pd(addr, mask, index, v1, scale) \
7732344779Sdim  __builtin_ia32_scatterdiv8df((void *)(addr), (__mmask8)(mask), \
7733309124Sdim                               (__v8di)(__m512i)(index), \
7734341825Sdim                               (__v8df)(__m512d)(v1), (int)(scale))
7735309124Sdim
7736341825Sdim#define _mm512_i64scatter_epi64(addr, index, v1, scale) \
7737344779Sdim  __builtin_ia32_scatterdiv8di((void *)(addr), (__mmask8)-1, \
7738309124Sdim                               (__v8di)(__m512i)(index), \
7739341825Sdim                               (__v8di)(__m512i)(v1), (int)(scale))
7740309124Sdim
7741341825Sdim#define _mm512_mask_i64scatter_epi64(addr, mask, index, v1, scale) \
7742344779Sdim  __builtin_ia32_scatterdiv8di((void *)(addr), (__mmask8)(mask), \
7743309124Sdim                               (__v8di)(__m512i)(index), \
7744341825Sdim                               (__v8di)(__m512i)(v1), (int)(scale))
7745309124Sdim
7746341825Sdim#define _mm512_i32scatter_ps(addr, index, v1, scale) \
7747344779Sdim  __builtin_ia32_scattersiv16sf((void *)(addr), (__mmask16)-1, \
7748309124Sdim                                (__v16si)(__m512i)(index), \
7749341825Sdim                                (__v16sf)(__m512)(v1), (int)(scale))
7750309124Sdim
7751341825Sdim#define _mm512_mask_i32scatter_ps(addr, mask, index, v1, scale) \
7752344779Sdim  __builtin_ia32_scattersiv16sf((void *)(addr), (__mmask16)(mask), \
7753309124Sdim                                (__v16si)(__m512i)(index), \
7754341825Sdim                                (__v16sf)(__m512)(v1), (int)(scale))
7755309124Sdim
7756341825Sdim#define _mm512_i32scatter_epi32(addr, index, v1, scale) \
7757344779Sdim  __builtin_ia32_scattersiv16si((void *)(addr), (__mmask16)-1, \
7758309124Sdim                                (__v16si)(__m512i)(index), \
7759341825Sdim                                (__v16si)(__m512i)(v1), (int)(scale))
7760309124Sdim
7761341825Sdim#define _mm512_mask_i32scatter_epi32(addr, mask, index, v1, scale) \
7762344779Sdim  __builtin_ia32_scattersiv16si((void *)(addr), (__mmask16)(mask), \
7763309124Sdim                                (__v16si)(__m512i)(index), \
7764341825Sdim                                (__v16si)(__m512i)(v1), (int)(scale))
7765309124Sdim
7766341825Sdim#define _mm512_i32scatter_pd(addr, index, v1, scale) \
7767344779Sdim  __builtin_ia32_scattersiv8df((void *)(addr), (__mmask8)-1, \
7768309124Sdim                               (__v8si)(__m256i)(index), \
7769341825Sdim                               (__v8df)(__m512d)(v1), (int)(scale))
7770309124Sdim
7771341825Sdim#define _mm512_mask_i32scatter_pd(addr, mask, index, v1, scale) \
7772344779Sdim  __builtin_ia32_scattersiv8df((void *)(addr), (__mmask8)(mask), \
7773309124Sdim                               (__v8si)(__m256i)(index), \
7774341825Sdim                               (__v8df)(__m512d)(v1), (int)(scale))
7775309124Sdim
7776341825Sdim#define _mm512_i32scatter_epi64(addr, index, v1, scale) \
7777344779Sdim  __builtin_ia32_scattersiv8di((void *)(addr), (__mmask8)-1, \
7778309124Sdim                               (__v8si)(__m256i)(index), \
7779341825Sdim                               (__v8di)(__m512i)(v1), (int)(scale))
7780309124Sdim
7781341825Sdim#define _mm512_mask_i32scatter_epi64(addr, mask, index, v1, scale) \
7782344779Sdim  __builtin_ia32_scattersiv8di((void *)(addr), (__mmask8)(mask), \
7783309124Sdim                               (__v8si)(__m256i)(index), \
7784341825Sdim                               (__v8di)(__m512i)(v1), (int)(scale))
7785309124Sdim
7786341825Sdimstatic __inline__ __m128 __DEFAULT_FN_ATTRS128
7787309124Sdim_mm_mask_fmadd_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
7788309124Sdim{
7789341825Sdim  return __builtin_ia32_vfmaddss3_mask((__v4sf)__W,
7790341825Sdim                                       (__v4sf)__A,
7791341825Sdim                                       (__v4sf)__B,
7792341825Sdim                                       (__mmask8)__U,
7793341825Sdim                                       _MM_FROUND_CUR_DIRECTION);
7794309124Sdim}
7795309124Sdim
7796341825Sdim#define _mm_fmadd_round_ss(A, B, C, R) \
7797341825Sdim  (__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(A), \
7798341825Sdim                                        (__v4sf)(__m128)(B), \
7799341825Sdim                                        (__v4sf)(__m128)(C), (__mmask8)-1, \
7800341825Sdim                                        (int)(R))
7801341825Sdim
7802341825Sdim#define _mm_mask_fmadd_round_ss(W, U, A, B, R) \
7803314564Sdim  (__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(W), \
7804314564Sdim                                        (__v4sf)(__m128)(A), \
7805314564Sdim                                        (__v4sf)(__m128)(B), (__mmask8)(U), \
7806341825Sdim                                        (int)(R))
7807309124Sdim
7808341825Sdimstatic __inline__ __m128 __DEFAULT_FN_ATTRS128
7809309124Sdim_mm_maskz_fmadd_ss (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
7810309124Sdim{
7811341825Sdim  return __builtin_ia32_vfmaddss3_maskz((__v4sf)__A,
7812341825Sdim                                        (__v4sf)__B,
7813341825Sdim                                        (__v4sf)__C,
7814341825Sdim                                        (__mmask8)__U,
7815341825Sdim                                        _MM_FROUND_CUR_DIRECTION);
7816309124Sdim}
7817309124Sdim
7818341825Sdim#define _mm_maskz_fmadd_round_ss(U, A, B, C, R) \
7819309124Sdim  (__m128)__builtin_ia32_vfmaddss3_maskz((__v4sf)(__m128)(A), \
7820309124Sdim                                         (__v4sf)(__m128)(B), \
7821309124Sdim                                         (__v4sf)(__m128)(C), (__mmask8)(U), \
7822341825Sdim                                         (int)(R))
7823309124Sdim
7824341825Sdimstatic __inline__ __m128 __DEFAULT_FN_ATTRS128
7825309124Sdim_mm_mask3_fmadd_ss (__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U)
7826309124Sdim{
7827341825Sdim  return __builtin_ia32_vfmaddss3_mask3((__v4sf)__W,
7828341825Sdim                                        (__v4sf)__X,
7829341825Sdim                                        (__v4sf)__Y,
7830341825Sdim                                        (__mmask8)__U,
7831341825Sdim                                        _MM_FROUND_CUR_DIRECTION);
7832309124Sdim}
7833309124Sdim
7834341825Sdim#define _mm_mask3_fmadd_round_ss(W, X, Y, U, R) \
7835309124Sdim  (__m128)__builtin_ia32_vfmaddss3_mask3((__v4sf)(__m128)(W), \
7836309124Sdim                                         (__v4sf)(__m128)(X), \
7837309124Sdim                                         (__v4sf)(__m128)(Y), (__mmask8)(U), \
7838341825Sdim                                         (int)(R))
7839309124Sdim
7840341825Sdimstatic __inline__ __m128 __DEFAULT_FN_ATTRS128
7841309124Sdim_mm_mask_fmsub_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
7842309124Sdim{
7843341825Sdim  return __builtin_ia32_vfmaddss3_mask((__v4sf)__W,
7844341825Sdim                                       (__v4sf)__A,
7845341825Sdim                                       -(__v4sf)__B,
7846341825Sdim                                       (__mmask8)__U,
7847341825Sdim                                       _MM_FROUND_CUR_DIRECTION);
7848309124Sdim}
7849309124Sdim
7850341825Sdim#define _mm_fmsub_round_ss(A, B, C, R) \
7851341825Sdim  (__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(A), \
7852341825Sdim                                        (__v4sf)(__m128)(B), \
7853341825Sdim                                        -(__v4sf)(__m128)(C), (__mmask8)-1, \
7854341825Sdim                                        (int)(R))
7855341825Sdim
7856341825Sdim#define _mm_mask_fmsub_round_ss(W, U, A, B, R) \
7857314564Sdim  (__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(W), \
7858314564Sdim                                        (__v4sf)(__m128)(A), \
7859341825Sdim                                        -(__v4sf)(__m128)(B), (__mmask8)(U), \
7860341825Sdim                                        (int)(R))
7861309124Sdim
7862341825Sdimstatic __inline__ __m128 __DEFAULT_FN_ATTRS128
7863309124Sdim_mm_maskz_fmsub_ss (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
7864309124Sdim{
7865341825Sdim  return __builtin_ia32_vfmaddss3_maskz((__v4sf)__A,
7866341825Sdim                                        (__v4sf)__B,
7867341825Sdim                                        -(__v4sf)__C,
7868341825Sdim                                        (__mmask8)__U,
7869341825Sdim                                        _MM_FROUND_CUR_DIRECTION);
7870309124Sdim}
7871309124Sdim
7872341825Sdim#define _mm_maskz_fmsub_round_ss(U, A, B, C, R) \
7873309124Sdim  (__m128)__builtin_ia32_vfmaddss3_maskz((__v4sf)(__m128)(A), \
7874309124Sdim                                         (__v4sf)(__m128)(B), \
7875309124Sdim                                         -(__v4sf)(__m128)(C), (__mmask8)(U), \
7876341825Sdim                                         (int)(R))
7877309124Sdim
7878341825Sdimstatic __inline__ __m128 __DEFAULT_FN_ATTRS128
7879309124Sdim_mm_mask3_fmsub_ss (__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U)
7880309124Sdim{
7881341825Sdim  return __builtin_ia32_vfmsubss3_mask3((__v4sf)__W,
7882341825Sdim                                        (__v4sf)__X,
7883341825Sdim                                        (__v4sf)__Y,
7884341825Sdim                                        (__mmask8)__U,
7885341825Sdim                                        _MM_FROUND_CUR_DIRECTION);
7886309124Sdim}
7887309124Sdim
7888341825Sdim#define _mm_mask3_fmsub_round_ss(W, X, Y, U, R) \
7889314564Sdim  (__m128)__builtin_ia32_vfmsubss3_mask3((__v4sf)(__m128)(W), \
7890309124Sdim                                         (__v4sf)(__m128)(X), \
7891314564Sdim                                         (__v4sf)(__m128)(Y), (__mmask8)(U), \
7892341825Sdim                                         (int)(R))
7893309124Sdim
7894341825Sdimstatic __inline__ __m128 __DEFAULT_FN_ATTRS128
7895309124Sdim_mm_mask_fnmadd_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
7896309124Sdim{
7897341825Sdim  return __builtin_ia32_vfmaddss3_mask((__v4sf)__W,
7898341825Sdim                                       -(__v4sf)__A,
7899341825Sdim                                       (__v4sf)__B,
7900341825Sdim                                       (__mmask8)__U,
7901341825Sdim                                       _MM_FROUND_CUR_DIRECTION);
7902309124Sdim}
7903309124Sdim
7904341825Sdim#define _mm_fnmadd_round_ss(A, B, C, R) \
7905341825Sdim  (__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(A), \
7906341825Sdim                                        -(__v4sf)(__m128)(B), \
7907341825Sdim                                        (__v4sf)(__m128)(C), (__mmask8)-1, \
7908341825Sdim                                        (int)(R))
7909341825Sdim
7910341825Sdim#define _mm_mask_fnmadd_round_ss(W, U, A, B, R) \
7911314564Sdim  (__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(W), \
7912314564Sdim                                        -(__v4sf)(__m128)(A), \
7913314564Sdim                                        (__v4sf)(__m128)(B), (__mmask8)(U), \
7914341825Sdim                                        (int)(R))
7915309124Sdim
7916341825Sdimstatic __inline__ __m128 __DEFAULT_FN_ATTRS128
7917309124Sdim_mm_maskz_fnmadd_ss (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
7918309124Sdim{
7919341825Sdim  return __builtin_ia32_vfmaddss3_maskz((__v4sf)__A,
7920341825Sdim                                        -(__v4sf)__B,
7921341825Sdim                                        (__v4sf)__C,
7922341825Sdim                                        (__mmask8)__U,
7923341825Sdim                                        _MM_FROUND_CUR_DIRECTION);
7924309124Sdim}
7925309124Sdim
7926341825Sdim#define _mm_maskz_fnmadd_round_ss(U, A, B, C, R) \
7927341825Sdim  (__m128)__builtin_ia32_vfmaddss3_maskz((__v4sf)(__m128)(A), \
7928341825Sdim                                         -(__v4sf)(__m128)(B), \
7929309124Sdim                                         (__v4sf)(__m128)(C), (__mmask8)(U), \
7930341825Sdim                                         (int)(R))
7931309124Sdim
7932341825Sdimstatic __inline__ __m128 __DEFAULT_FN_ATTRS128
7933309124Sdim_mm_mask3_fnmadd_ss (__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U)
7934309124Sdim{
7935341825Sdim  return __builtin_ia32_vfmaddss3_mask3((__v4sf)__W,
7936341825Sdim                                        -(__v4sf)__X,
7937341825Sdim                                        (__v4sf)__Y,
7938341825Sdim                                        (__mmask8)__U,
7939341825Sdim                                        _MM_FROUND_CUR_DIRECTION);
7940309124Sdim}
7941309124Sdim
7942341825Sdim#define _mm_mask3_fnmadd_round_ss(W, X, Y, U, R) \
7943341825Sdim  (__m128)__builtin_ia32_vfmaddss3_mask3((__v4sf)(__m128)(W), \
7944341825Sdim                                         -(__v4sf)(__m128)(X), \
7945309124Sdim                                         (__v4sf)(__m128)(Y), (__mmask8)(U), \
7946341825Sdim                                         (int)(R))
7947309124Sdim
7948341825Sdimstatic __inline__ __m128 __DEFAULT_FN_ATTRS128
7949309124Sdim_mm_mask_fnmsub_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
7950309124Sdim{
7951341825Sdim  return __builtin_ia32_vfmaddss3_mask((__v4sf)__W,
7952341825Sdim                                       -(__v4sf)__A,
7953341825Sdim                                       -(__v4sf)__B,
7954341825Sdim                                       (__mmask8)__U,
7955341825Sdim                                       _MM_FROUND_CUR_DIRECTION);
7956309124Sdim}
7957309124Sdim
7958341825Sdim#define _mm_fnmsub_round_ss(A, B, C, R) \
7959341825Sdim  (__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(A), \
7960341825Sdim                                        -(__v4sf)(__m128)(B), \
7961341825Sdim                                        -(__v4sf)(__m128)(C), (__mmask8)-1, \
7962341825Sdim                                        (int)(R))
7963341825Sdim
7964341825Sdim#define _mm_mask_fnmsub_round_ss(W, U, A, B, R) \
7965314564Sdim  (__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(W), \
7966314564Sdim                                        -(__v4sf)(__m128)(A), \
7967314564Sdim                                        -(__v4sf)(__m128)(B), (__mmask8)(U), \
7968341825Sdim                                        (int)(R))
7969309124Sdim
7970341825Sdimstatic __inline__ __m128 __DEFAULT_FN_ATTRS128
7971309124Sdim_mm_maskz_fnmsub_ss (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
7972309124Sdim{
7973341825Sdim  return __builtin_ia32_vfmaddss3_maskz((__v4sf)__A,
7974341825Sdim                                        -(__v4sf)__B,
7975341825Sdim                                        -(__v4sf)__C,
7976341825Sdim                                        (__mmask8)__U,
7977341825Sdim                                        _MM_FROUND_CUR_DIRECTION);
7978309124Sdim}
7979309124Sdim
7980341825Sdim#define _mm_maskz_fnmsub_round_ss(U, A, B, C, R) \
7981341825Sdim  (__m128)__builtin_ia32_vfmaddss3_maskz((__v4sf)(__m128)(A), \
7982341825Sdim                                         -(__v4sf)(__m128)(B), \
7983309124Sdim                                         -(__v4sf)(__m128)(C), (__mmask8)(U), \
7984341825Sdim                                         (int)(R))
7985309124Sdim
7986341825Sdimstatic __inline__ __m128 __DEFAULT_FN_ATTRS128
7987309124Sdim_mm_mask3_fnmsub_ss (__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U)
7988309124Sdim{
7989341825Sdim  return __builtin_ia32_vfmsubss3_mask3((__v4sf)__W,
7990341825Sdim                                        -(__v4sf)__X,
7991341825Sdim                                        (__v4sf)__Y,
7992341825Sdim                                        (__mmask8)__U,
7993341825Sdim                                        _MM_FROUND_CUR_DIRECTION);
7994309124Sdim}
7995309124Sdim
7996341825Sdim#define _mm_mask3_fnmsub_round_ss(W, X, Y, U, R) \
7997341825Sdim  (__m128)__builtin_ia32_vfmsubss3_mask3((__v4sf)(__m128)(W), \
7998341825Sdim                                         -(__v4sf)(__m128)(X), \
7999314564Sdim                                         (__v4sf)(__m128)(Y), (__mmask8)(U), \
8000341825Sdim                                         (int)(R))
8001309124Sdim
8002341825Sdimstatic __inline__ __m128d __DEFAULT_FN_ATTRS128
8003309124Sdim_mm_mask_fmadd_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
8004309124Sdim{
8005341825Sdim  return __builtin_ia32_vfmaddsd3_mask((__v2df)__W,
8006341825Sdim                                       (__v2df)__A,
8007341825Sdim                                       (__v2df)__B,
8008341825Sdim                                       (__mmask8)__U,
8009341825Sdim                                       _MM_FROUND_CUR_DIRECTION);
8010309124Sdim}
8011309124Sdim
8012341825Sdim#define _mm_fmadd_round_sd(A, B, C, R) \
8013341825Sdim  (__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(A), \
8014341825Sdim                                         (__v2df)(__m128d)(B), \
8015341825Sdim                                         (__v2df)(__m128d)(C), (__mmask8)-1, \
8016341825Sdim                                         (int)(R))
8017341825Sdim
8018341825Sdim#define _mm_mask_fmadd_round_sd(W, U, A, B, R) \
8019314564Sdim  (__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(W), \
8020314564Sdim                                         (__v2df)(__m128d)(A), \
8021314564Sdim                                         (__v2df)(__m128d)(B), (__mmask8)(U), \
8022341825Sdim                                         (int)(R))
8023309124Sdim
8024341825Sdimstatic __inline__ __m128d __DEFAULT_FN_ATTRS128
8025309124Sdim_mm_maskz_fmadd_sd (__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
8026309124Sdim{
8027341825Sdim  return __builtin_ia32_vfmaddsd3_maskz((__v2df)__A,
8028341825Sdim                                        (__v2df)__B,
8029341825Sdim                                        (__v2df)__C,
8030341825Sdim                                        (__mmask8)__U,
8031341825Sdim                                        _MM_FROUND_CUR_DIRECTION);
8032309124Sdim}
8033309124Sdim
8034341825Sdim#define _mm_maskz_fmadd_round_sd(U, A, B, C, R) \
8035309124Sdim  (__m128d)__builtin_ia32_vfmaddsd3_maskz((__v2df)(__m128d)(A), \
8036309124Sdim                                          (__v2df)(__m128d)(B), \
8037309124Sdim                                          (__v2df)(__m128d)(C), (__mmask8)(U), \
8038341825Sdim                                          (int)(R))
8039309124Sdim
8040341825Sdimstatic __inline__ __m128d __DEFAULT_FN_ATTRS128
8041309124Sdim_mm_mask3_fmadd_sd (__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U)
8042309124Sdim{
8043341825Sdim  return __builtin_ia32_vfmaddsd3_mask3((__v2df)__W,
8044341825Sdim                                        (__v2df)__X,
8045341825Sdim                                        (__v2df)__Y,
8046341825Sdim                                        (__mmask8)__U,
8047341825Sdim                                        _MM_FROUND_CUR_DIRECTION);
8048309124Sdim}
8049309124Sdim
8050341825Sdim#define _mm_mask3_fmadd_round_sd(W, X, Y, U, R) \
8051309124Sdim  (__m128d)__builtin_ia32_vfmaddsd3_mask3((__v2df)(__m128d)(W), \
8052309124Sdim                                          (__v2df)(__m128d)(X), \
8053309124Sdim                                          (__v2df)(__m128d)(Y), (__mmask8)(U), \
8054341825Sdim                                          (int)(R))
8055309124Sdim
8056341825Sdimstatic __inline__ __m128d __DEFAULT_FN_ATTRS128
8057309124Sdim_mm_mask_fmsub_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
8058309124Sdim{
8059341825Sdim  return __builtin_ia32_vfmaddsd3_mask((__v2df)__W,
8060341825Sdim                                       (__v2df)__A,
8061341825Sdim                                       -(__v2df)__B,
8062341825Sdim                                       (__mmask8)__U,
8063341825Sdim                                       _MM_FROUND_CUR_DIRECTION);
8064309124Sdim}
8065309124Sdim
8066341825Sdim#define _mm_fmsub_round_sd(A, B, C, R) \
8067341825Sdim  (__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(A), \
8068341825Sdim                                         (__v2df)(__m128d)(B), \
8069341825Sdim                                         -(__v2df)(__m128d)(C), (__mmask8)-1, \
8070341825Sdim                                         (int)(R))
8071341825Sdim
8072341825Sdim#define _mm_mask_fmsub_round_sd(W, U, A, B, R) \
8073314564Sdim  (__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(W), \
8074314564Sdim                                         (__v2df)(__m128d)(A), \
8075314564Sdim                                         -(__v2df)(__m128d)(B), (__mmask8)(U), \
8076341825Sdim                                         (int)(R))
8077309124Sdim
8078341825Sdimstatic __inline__ __m128d __DEFAULT_FN_ATTRS128
8079309124Sdim_mm_maskz_fmsub_sd (__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
8080309124Sdim{
8081341825Sdim  return __builtin_ia32_vfmaddsd3_maskz((__v2df)__A,
8082341825Sdim                                        (__v2df)__B,
8083341825Sdim                                        -(__v2df)__C,
8084341825Sdim                                        (__mmask8)__U,
8085341825Sdim                                        _MM_FROUND_CUR_DIRECTION);
8086309124Sdim}
8087309124Sdim
8088341825Sdim#define _mm_maskz_fmsub_round_sd(U, A, B, C, R) \
8089309124Sdim  (__m128d)__builtin_ia32_vfmaddsd3_maskz((__v2df)(__m128d)(A), \
8090309124Sdim                                          (__v2df)(__m128d)(B), \
8091309124Sdim                                          -(__v2df)(__m128d)(C), \
8092341825Sdim                                          (__mmask8)(U), (int)(R))
8093309124Sdim
8094341825Sdimstatic __inline__ __m128d __DEFAULT_FN_ATTRS128
8095309124Sdim_mm_mask3_fmsub_sd (__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U)
8096309124Sdim{
8097341825Sdim  return __builtin_ia32_vfmsubsd3_mask3((__v2df)__W,
8098341825Sdim                                        (__v2df)__X,
8099341825Sdim                                        (__v2df)__Y,
8100341825Sdim                                        (__mmask8)__U,
8101341825Sdim                                        _MM_FROUND_CUR_DIRECTION);
8102309124Sdim}
8103309124Sdim
8104341825Sdim#define _mm_mask3_fmsub_round_sd(W, X, Y, U, R) \
8105314564Sdim  (__m128d)__builtin_ia32_vfmsubsd3_mask3((__v2df)(__m128d)(W), \
8106309124Sdim                                          (__v2df)(__m128d)(X), \
8107314564Sdim                                          (__v2df)(__m128d)(Y), \
8108341825Sdim                                          (__mmask8)(U), (int)(R))
8109309124Sdim
8110341825Sdimstatic __inline__ __m128d __DEFAULT_FN_ATTRS128
8111309124Sdim_mm_mask_fnmadd_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
8112309124Sdim{
8113341825Sdim  return __builtin_ia32_vfmaddsd3_mask((__v2df)__W,
8114341825Sdim                                       -(__v2df)__A,
8115341825Sdim                                       (__v2df)__B,
8116341825Sdim                                       (__mmask8)__U,
8117341825Sdim                                       _MM_FROUND_CUR_DIRECTION);
8118309124Sdim}
8119309124Sdim
8120341825Sdim#define _mm_fnmadd_round_sd(A, B, C, R) \
8121341825Sdim  (__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(A), \
8122341825Sdim                                         -(__v2df)(__m128d)(B), \
8123341825Sdim                                         (__v2df)(__m128d)(C), (__mmask8)-1, \
8124341825Sdim                                         (int)(R))
8125341825Sdim
8126341825Sdim#define _mm_mask_fnmadd_round_sd(W, U, A, B, R) \
8127314564Sdim  (__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(W), \
8128314564Sdim                                         -(__v2df)(__m128d)(A), \
8129314564Sdim                                         (__v2df)(__m128d)(B), (__mmask8)(U), \
8130341825Sdim                                         (int)(R))
8131309124Sdim
8132341825Sdimstatic __inline__ __m128d __DEFAULT_FN_ATTRS128
8133309124Sdim_mm_maskz_fnmadd_sd (__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
8134309124Sdim{
8135341825Sdim  return __builtin_ia32_vfmaddsd3_maskz((__v2df)__A,
8136341825Sdim                                        -(__v2df)__B,
8137341825Sdim                                        (__v2df)__C,
8138341825Sdim                                        (__mmask8)__U,
8139341825Sdim                                        _MM_FROUND_CUR_DIRECTION);
8140309124Sdim}
8141309124Sdim
8142341825Sdim#define _mm_maskz_fnmadd_round_sd(U, A, B, C, R) \
8143341825Sdim  (__m128d)__builtin_ia32_vfmaddsd3_maskz((__v2df)(__m128d)(A), \
8144341825Sdim                                          -(__v2df)(__m128d)(B), \
8145309124Sdim                                          (__v2df)(__m128d)(C), (__mmask8)(U), \
8146341825Sdim                                          (int)(R))
8147309124Sdim
8148341825Sdimstatic __inline__ __m128d __DEFAULT_FN_ATTRS128
8149309124Sdim_mm_mask3_fnmadd_sd (__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U)
8150309124Sdim{
8151341825Sdim  return __builtin_ia32_vfmaddsd3_mask3((__v2df)__W,
8152341825Sdim                                        -(__v2df)__X,
8153341825Sdim                                        (__v2df)__Y,
8154341825Sdim                                        (__mmask8)__U,
8155341825Sdim                                        _MM_FROUND_CUR_DIRECTION);
8156309124Sdim}
8157309124Sdim
8158341825Sdim#define _mm_mask3_fnmadd_round_sd(W, X, Y, U, R) \
8159341825Sdim  (__m128d)__builtin_ia32_vfmaddsd3_mask3((__v2df)(__m128d)(W), \
8160341825Sdim                                          -(__v2df)(__m128d)(X), \
8161309124Sdim                                          (__v2df)(__m128d)(Y), (__mmask8)(U), \
8162341825Sdim                                          (int)(R))
8163309124Sdim
8164341825Sdimstatic __inline__ __m128d __DEFAULT_FN_ATTRS128
8165309124Sdim_mm_mask_fnmsub_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
8166309124Sdim{
8167341825Sdim  return __builtin_ia32_vfmaddsd3_mask((__v2df)__W,
8168341825Sdim                                       -(__v2df)__A,
8169341825Sdim                                       -(__v2df)__B,
8170341825Sdim                                       (__mmask8)__U,
8171341825Sdim                                       _MM_FROUND_CUR_DIRECTION);
8172309124Sdim}
8173309124Sdim
8174341825Sdim#define _mm_fnmsub_round_sd(A, B, C, R) \
8175341825Sdim  (__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(A), \
8176341825Sdim                                         -(__v2df)(__m128d)(B), \
8177341825Sdim                                         -(__v2df)(__m128d)(C), (__mmask8)-1, \
8178341825Sdim                                         (int)(R))
8179341825Sdim
8180341825Sdim#define _mm_mask_fnmsub_round_sd(W, U, A, B, R) \
8181314564Sdim  (__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(W), \
8182314564Sdim                                         -(__v2df)(__m128d)(A), \
8183314564Sdim                                         -(__v2df)(__m128d)(B), (__mmask8)(U), \
8184341825Sdim                                         (int)(R))
8185309124Sdim
8186341825Sdimstatic __inline__ __m128d __DEFAULT_FN_ATTRS128
8187309124Sdim_mm_maskz_fnmsub_sd (__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
8188309124Sdim{
8189341825Sdim  return __builtin_ia32_vfmaddsd3_maskz((__v2df)__A,
8190341825Sdim                                        -(__v2df)__B,
8191341825Sdim                                        -(__v2df)__C,
8192341825Sdim                                        (__mmask8)__U,
8193341825Sdim                                        _MM_FROUND_CUR_DIRECTION);
8194309124Sdim}
8195309124Sdim
8196341825Sdim#define _mm_maskz_fnmsub_round_sd(U, A, B, C, R) \
8197341825Sdim  (__m128d)__builtin_ia32_vfmaddsd3_maskz((__v2df)(__m128d)(A), \
8198341825Sdim                                          -(__v2df)(__m128d)(B), \
8199309124Sdim                                          -(__v2df)(__m128d)(C), \
8200309124Sdim                                          (__mmask8)(U), \
8201341825Sdim                                          (int)(R))
8202309124Sdim
8203341825Sdimstatic __inline__ __m128d __DEFAULT_FN_ATTRS128
8204309124Sdim_mm_mask3_fnmsub_sd (__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U)
8205309124Sdim{
8206341825Sdim  return __builtin_ia32_vfmsubsd3_mask3((__v2df)__W,
8207341825Sdim                                        -(__v2df)__X,
8208341825Sdim                                        (__v2df)__Y,
8209341825Sdim                                        (__mmask8)__U,
8210341825Sdim                                        _MM_FROUND_CUR_DIRECTION);
8211309124Sdim}
8212309124Sdim
8213341825Sdim#define _mm_mask3_fnmsub_round_sd(W, X, Y, U, R) \
8214341825Sdim  (__m128d)__builtin_ia32_vfmsubsd3_mask3((__v2df)(__m128d)(W), \
8215341825Sdim                                          -(__v2df)(__m128d)(X), \
8216314564Sdim                                          (__v2df)(__m128d)(Y), \
8217341825Sdim                                          (__mmask8)(U), (int)(R))
8218309124Sdim
8219341825Sdim#define _mm512_permutex_pd(X, C) \
8220341825Sdim  (__m512d)__builtin_ia32_permdf512((__v8df)(__m512d)(X), (int)(C))
8221309124Sdim
8222341825Sdim#define _mm512_mask_permutex_pd(W, U, X, C) \
8223309124Sdim  (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
8224309124Sdim                                       (__v8df)_mm512_permutex_pd((X), (C)), \
8225341825Sdim                                       (__v8df)(__m512d)(W))
8226309124Sdim
8227341825Sdim#define _mm512_maskz_permutex_pd(U, X, C) \
8228309124Sdim  (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
8229309124Sdim                                       (__v8df)_mm512_permutex_pd((X), (C)), \
8230341825Sdim                                       (__v8df)_mm512_setzero_pd())
8231309124Sdim
8232341825Sdim#define _mm512_permutex_epi64(X, C) \
8233341825Sdim  (__m512i)__builtin_ia32_permdi512((__v8di)(__m512i)(X), (int)(C))
8234309124Sdim
8235341825Sdim#define _mm512_mask_permutex_epi64(W, U, X, C) \
8236309124Sdim  (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
8237309124Sdim                                      (__v8di)_mm512_permutex_epi64((X), (C)), \
8238341825Sdim                                      (__v8di)(__m512i)(W))
8239309124Sdim
8240341825Sdim#define _mm512_maskz_permutex_epi64(U, X, C) \
8241309124Sdim  (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
8242309124Sdim                                      (__v8di)_mm512_permutex_epi64((X), (C)), \
8243341825Sdim                                      (__v8di)_mm512_setzero_si512())
8244309124Sdim
8245341825Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS512
8246309124Sdim_mm512_permutexvar_pd (__m512i __X, __m512d __Y)
8247309124Sdim{
8248341825Sdim  return (__m512d)__builtin_ia32_permvardf512((__v8df) __Y, (__v8di) __X);
8249309124Sdim}
8250309124Sdim
8251341825Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS512
8252309124Sdim_mm512_mask_permutexvar_pd (__m512d __W, __mmask8 __U, __m512i __X, __m512d __Y)
8253309124Sdim{
8254341825Sdim  return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
8255341825Sdim                                        (__v8df)_mm512_permutexvar_pd(__X, __Y),
8256341825Sdim                                        (__v8df)__W);
8257309124Sdim}
8258309124Sdim
8259341825Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS512
8260309124Sdim_mm512_maskz_permutexvar_pd (__mmask8 __U, __m512i __X, __m512d __Y)
8261309124Sdim{
8262341825Sdim  return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
8263341825Sdim                                        (__v8df)_mm512_permutexvar_pd(__X, __Y),
8264341825Sdim                                        (__v8df)_mm512_setzero_pd());
8265309124Sdim}
8266309124Sdim
8267341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
8268341825Sdim_mm512_permutexvar_epi64 (__m512i __X, __m512i __Y)
8269309124Sdim{
8270341825Sdim  return (__m512i)__builtin_ia32_permvardi512((__v8di)__Y, (__v8di)__X);
8271309124Sdim}
8272309124Sdim
8273341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
8274341825Sdim_mm512_maskz_permutexvar_epi64 (__mmask8 __M, __m512i __X, __m512i __Y)
8275309124Sdim{
8276341825Sdim  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
8277341825Sdim                                     (__v8di)_mm512_permutexvar_epi64(__X, __Y),
8278341825Sdim                                     (__v8di)_mm512_setzero_si512());
8279309124Sdim}
8280309124Sdim
8281341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
8282309124Sdim_mm512_mask_permutexvar_epi64 (__m512i __W, __mmask8 __M, __m512i __X,
8283309124Sdim             __m512i __Y)
8284309124Sdim{
8285341825Sdim  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
8286341825Sdim                                     (__v8di)_mm512_permutexvar_epi64(__X, __Y),
8287341825Sdim                                     (__v8di)__W);
8288309124Sdim}
8289309124Sdim
8290341825Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS512
8291309124Sdim_mm512_permutexvar_ps (__m512i __X, __m512 __Y)
8292309124Sdim{
8293341825Sdim  return (__m512)__builtin_ia32_permvarsf512((__v16sf)__Y, (__v16si)__X);
8294309124Sdim}
8295309124Sdim
8296341825Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS512
8297309124Sdim_mm512_mask_permutexvar_ps (__m512 __W, __mmask16 __U, __m512i __X, __m512 __Y)
8298309124Sdim{
8299341825Sdim  return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
8300341825Sdim                                       (__v16sf)_mm512_permutexvar_ps(__X, __Y),
8301341825Sdim                                       (__v16sf)__W);
8302309124Sdim}
8303309124Sdim
8304341825Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS512
8305309124Sdim_mm512_maskz_permutexvar_ps (__mmask16 __U, __m512i __X, __m512 __Y)
8306309124Sdim{
8307341825Sdim  return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
8308341825Sdim                                       (__v16sf)_mm512_permutexvar_ps(__X, __Y),
8309341825Sdim                                       (__v16sf)_mm512_setzero_ps());
8310309124Sdim}
8311309124Sdim
8312341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
8313341825Sdim_mm512_permutexvar_epi32 (__m512i __X, __m512i __Y)
8314309124Sdim{
8315341825Sdim  return (__m512i)__builtin_ia32_permvarsi512((__v16si)__Y, (__v16si)__X);
8316309124Sdim}
8317309124Sdim
8318341825Sdim#define _mm512_permutevar_epi32 _mm512_permutexvar_epi32
8319341825Sdim
8320341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
8321341825Sdim_mm512_maskz_permutexvar_epi32 (__mmask16 __M, __m512i __X, __m512i __Y)
8322309124Sdim{
8323341825Sdim  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
8324341825Sdim                                    (__v16si)_mm512_permutexvar_epi32(__X, __Y),
8325341825Sdim                                    (__v16si)_mm512_setzero_si512());
8326309124Sdim}
8327309124Sdim
8328341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
8329309124Sdim_mm512_mask_permutexvar_epi32 (__m512i __W, __mmask16 __M, __m512i __X,
8330309124Sdim             __m512i __Y)
8331309124Sdim{
8332341825Sdim  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
8333341825Sdim                                    (__v16si)_mm512_permutexvar_epi32(__X, __Y),
8334341825Sdim                                    (__v16si)__W);
8335309124Sdim}
8336309124Sdim
8337321369Sdim#define _mm512_mask_permutevar_epi32 _mm512_mask_permutexvar_epi32
8338321369Sdim
8339344779Sdimstatic __inline__ __mmask16 __DEFAULT_FN_ATTRS
8340309124Sdim_mm512_kand (__mmask16 __A, __mmask16 __B)
8341309124Sdim{
8342309124Sdim  return (__mmask16) __builtin_ia32_kandhi ((__mmask16) __A, (__mmask16) __B);
8343309124Sdim}
8344309124Sdim
8345344779Sdimstatic __inline__ __mmask16 __DEFAULT_FN_ATTRS
8346309124Sdim_mm512_kandn (__mmask16 __A, __mmask16 __B)
8347309124Sdim{
8348309124Sdim  return (__mmask16) __builtin_ia32_kandnhi ((__mmask16) __A, (__mmask16) __B);
8349309124Sdim}
8350309124Sdim
8351344779Sdimstatic __inline__ __mmask16 __DEFAULT_FN_ATTRS
8352309124Sdim_mm512_kor (__mmask16 __A, __mmask16 __B)
8353309124Sdim{
8354309124Sdim  return (__mmask16) __builtin_ia32_korhi ((__mmask16) __A, (__mmask16) __B);
8355309124Sdim}
8356309124Sdim
8357344779Sdimstatic __inline__ int __DEFAULT_FN_ATTRS
8358309124Sdim_mm512_kortestc (__mmask16 __A, __mmask16 __B)
8359309124Sdim{
8360309124Sdim  return __builtin_ia32_kortestchi ((__mmask16) __A, (__mmask16) __B);
8361309124Sdim}
8362309124Sdim
8363344779Sdimstatic __inline__ int __DEFAULT_FN_ATTRS
8364309124Sdim_mm512_kortestz (__mmask16 __A, __mmask16 __B)
8365309124Sdim{
8366309124Sdim  return __builtin_ia32_kortestzhi ((__mmask16) __A, (__mmask16) __B);
8367309124Sdim}
8368309124Sdim
8369344779Sdimstatic __inline__ unsigned char __DEFAULT_FN_ATTRS
8370344779Sdim_kortestc_mask16_u8(__mmask16 __A, __mmask16 __B)
8371344779Sdim{
8372344779Sdim  return (unsigned char)__builtin_ia32_kortestchi(__A, __B);
8373344779Sdim}
8374344779Sdim
8375344779Sdimstatic __inline__ unsigned char __DEFAULT_FN_ATTRS
8376344779Sdim_kortestz_mask16_u8(__mmask16 __A, __mmask16 __B)
8377344779Sdim{
8378344779Sdim  return (unsigned char)__builtin_ia32_kortestzhi(__A, __B);
8379344779Sdim}
8380344779Sdim
8381344779Sdimstatic __inline__ unsigned char __DEFAULT_FN_ATTRS
8382344779Sdim_kortest_mask16_u8(__mmask16 __A, __mmask16 __B, unsigned char *__C) {
8383344779Sdim  *__C = (unsigned char)__builtin_ia32_kortestchi(__A, __B);
8384344779Sdim  return (unsigned char)__builtin_ia32_kortestzhi(__A, __B);
8385344779Sdim}
8386344779Sdim
8387344779Sdimstatic __inline__ __mmask16 __DEFAULT_FN_ATTRS
8388309124Sdim_mm512_kunpackb (__mmask16 __A, __mmask16 __B)
8389309124Sdim{
8390329410Sdim  return (__mmask16) __builtin_ia32_kunpckhi ((__mmask16) __A, (__mmask16) __B);
8391309124Sdim}
8392309124Sdim
8393344779Sdimstatic __inline__ __mmask16 __DEFAULT_FN_ATTRS
8394309124Sdim_mm512_kxnor (__mmask16 __A, __mmask16 __B)
8395309124Sdim{
8396309124Sdim  return (__mmask16) __builtin_ia32_kxnorhi ((__mmask16) __A, (__mmask16) __B);
8397309124Sdim}
8398309124Sdim
8399344779Sdimstatic __inline__ __mmask16 __DEFAULT_FN_ATTRS
8400309124Sdim_mm512_kxor (__mmask16 __A, __mmask16 __B)
8401309124Sdim{
8402309124Sdim  return (__mmask16) __builtin_ia32_kxorhi ((__mmask16) __A, (__mmask16) __B);
8403309124Sdim}
8404309124Sdim
8405344779Sdim#define _kand_mask16 _mm512_kand
8406344779Sdim#define _kandn_mask16 _mm512_kandn
8407344779Sdim#define _knot_mask16 _mm512_knot
8408344779Sdim#define _kor_mask16 _mm512_kor
8409344779Sdim#define _kxnor_mask16 _mm512_kxnor
8410344779Sdim#define _kxor_mask16 _mm512_kxor
8411344779Sdim
8412344779Sdim#define _kshiftli_mask16(A, I) \
8413344779Sdim  (__mmask16)__builtin_ia32_kshiftlihi((__mmask16)(A), (unsigned int)(I))
8414344779Sdim
8415344779Sdim#define _kshiftri_mask16(A, I) \
8416344779Sdim  (__mmask16)__builtin_ia32_kshiftrihi((__mmask16)(A), (unsigned int)(I))
8417344779Sdim
8418344779Sdimstatic __inline__ unsigned int __DEFAULT_FN_ATTRS
8419344779Sdim_cvtmask16_u32(__mmask16 __A) {
8420344779Sdim  return (unsigned int)__builtin_ia32_kmovw((__mmask16)__A);
8421344779Sdim}
8422344779Sdim
8423344779Sdimstatic __inline__ __mmask16 __DEFAULT_FN_ATTRS
8424344779Sdim_cvtu32_mask16(unsigned int __A) {
8425344779Sdim  return (__mmask16)__builtin_ia32_kmovw((__mmask16)__A);
8426344779Sdim}
8427344779Sdim
8428344779Sdimstatic __inline__ __mmask16 __DEFAULT_FN_ATTRS
8429344779Sdim_load_mask16(__mmask16 *__A) {
8430344779Sdim  return (__mmask16)__builtin_ia32_kmovw(*(__mmask16 *)__A);
8431344779Sdim}
8432344779Sdim
8433344779Sdimstatic __inline__ void __DEFAULT_FN_ATTRS
8434344779Sdim_store_mask16(__mmask16 *__A, __mmask16 __B) {
8435344779Sdim  *(__mmask16 *)__A = __builtin_ia32_kmovw((__mmask16)__B);
8436344779Sdim}
8437344779Sdim
8438341825Sdimstatic __inline__ void __DEFAULT_FN_ATTRS512
8439360784Sdim_mm512_stream_si512 (void * __P, __m512i __A)
8440309124Sdim{
8441322320Sdim  typedef __v8di __v8di_aligned __attribute__((aligned(64)));
8442322320Sdim  __builtin_nontemporal_store((__v8di_aligned)__A, (__v8di_aligned*)__P);
8443309124Sdim}
8444309124Sdim
8445341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
8446327952Sdim_mm512_stream_load_si512 (void const *__P)
8447309124Sdim{
8448322320Sdim  typedef __v8di __v8di_aligned __attribute__((aligned(64)));
8449322320Sdim  return (__m512i) __builtin_nontemporal_load((const __v8di_aligned *)__P);
8450309124Sdim}
8451309124Sdim
8452341825Sdimstatic __inline__ void __DEFAULT_FN_ATTRS512
8453360784Sdim_mm512_stream_pd (void *__P, __m512d __A)
8454309124Sdim{
8455322320Sdim  typedef __v8df __v8df_aligned __attribute__((aligned(64)));
8456322320Sdim  __builtin_nontemporal_store((__v8df_aligned)__A, (__v8df_aligned*)__P);
8457309124Sdim}
8458309124Sdim
8459341825Sdimstatic __inline__ void __DEFAULT_FN_ATTRS512
8460360784Sdim_mm512_stream_ps (void *__P, __m512 __A)
8461309124Sdim{
8462322320Sdim  typedef __v16sf __v16sf_aligned __attribute__((aligned(64)));
8463322320Sdim  __builtin_nontemporal_store((__v16sf_aligned)__A, (__v16sf_aligned*)__P);
8464309124Sdim}
8465309124Sdim
8466341825Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS512
8467309124Sdim_mm512_mask_compress_pd (__m512d __W, __mmask8 __U, __m512d __A)
8468309124Sdim{
8469309124Sdim  return (__m512d) __builtin_ia32_compressdf512_mask ((__v8df) __A,
8470309124Sdim                  (__v8df) __W,
8471309124Sdim                  (__mmask8) __U);
8472309124Sdim}
8473309124Sdim
8474341825Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS512
8475309124Sdim_mm512_maskz_compress_pd (__mmask8 __U, __m512d __A)
8476309124Sdim{
8477309124Sdim  return (__m512d) __builtin_ia32_compressdf512_mask ((__v8df) __A,
8478309124Sdim                  (__v8df)
8479309124Sdim                  _mm512_setzero_pd (),
8480309124Sdim                  (__mmask8) __U);
8481309124Sdim}
8482309124Sdim
8483341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
8484309124Sdim_mm512_mask_compress_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
8485309124Sdim{
8486309124Sdim  return (__m512i) __builtin_ia32_compressdi512_mask ((__v8di) __A,
8487309124Sdim                  (__v8di) __W,
8488309124Sdim                  (__mmask8) __U);
8489309124Sdim}
8490309124Sdim
8491341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
8492309124Sdim_mm512_maskz_compress_epi64 (__mmask8 __U, __m512i __A)
8493309124Sdim{
8494309124Sdim  return (__m512i) __builtin_ia32_compressdi512_mask ((__v8di) __A,
8495309124Sdim                  (__v8di)
8496309124Sdim                  _mm512_setzero_si512 (),
8497309124Sdim                  (__mmask8) __U);
8498309124Sdim}
8499309124Sdim
8500341825Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS512
8501309124Sdim_mm512_mask_compress_ps (__m512 __W, __mmask16 __U, __m512 __A)
8502309124Sdim{
8503309124Sdim  return (__m512) __builtin_ia32_compresssf512_mask ((__v16sf) __A,
8504309124Sdim                 (__v16sf) __W,
8505309124Sdim                 (__mmask16) __U);
8506309124Sdim}
8507309124Sdim
8508341825Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS512
8509309124Sdim_mm512_maskz_compress_ps (__mmask16 __U, __m512 __A)
8510309124Sdim{
8511309124Sdim  return (__m512) __builtin_ia32_compresssf512_mask ((__v16sf) __A,
8512309124Sdim                 (__v16sf)
8513309124Sdim                 _mm512_setzero_ps (),
8514309124Sdim                 (__mmask16) __U);
8515309124Sdim}
8516309124Sdim
8517341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
8518309124Sdim_mm512_mask_compress_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
8519309124Sdim{
8520309124Sdim  return (__m512i) __builtin_ia32_compresssi512_mask ((__v16si) __A,
8521309124Sdim                  (__v16si) __W,
8522309124Sdim                  (__mmask16) __U);
8523309124Sdim}
8524309124Sdim
8525341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
8526309124Sdim_mm512_maskz_compress_epi32 (__mmask16 __U, __m512i __A)
8527309124Sdim{
8528309124Sdim  return (__m512i) __builtin_ia32_compresssi512_mask ((__v16si) __A,
8529309124Sdim                  (__v16si)
8530309124Sdim                  _mm512_setzero_si512 (),
8531309124Sdim                  (__mmask16) __U);
8532309124Sdim}
8533309124Sdim
8534341825Sdim#define _mm_cmp_round_ss_mask(X, Y, P, R) \
8535309124Sdim  (__mmask8)__builtin_ia32_cmpss_mask((__v4sf)(__m128)(X), \
8536309124Sdim                                      (__v4sf)(__m128)(Y), (int)(P), \
8537341825Sdim                                      (__mmask8)-1, (int)(R))
8538309124Sdim
8539341825Sdim#define _mm_mask_cmp_round_ss_mask(M, X, Y, P, R) \
8540309124Sdim  (__mmask8)__builtin_ia32_cmpss_mask((__v4sf)(__m128)(X), \
8541309124Sdim                                      (__v4sf)(__m128)(Y), (int)(P), \
8542341825Sdim                                      (__mmask8)(M), (int)(R))
8543309124Sdim
8544341825Sdim#define _mm_cmp_ss_mask(X, Y, P) \
8545309124Sdim  (__mmask8)__builtin_ia32_cmpss_mask((__v4sf)(__m128)(X), \
8546309124Sdim                                      (__v4sf)(__m128)(Y), (int)(P), \
8547309124Sdim                                      (__mmask8)-1, \
8548341825Sdim                                      _MM_FROUND_CUR_DIRECTION)
8549309124Sdim
8550341825Sdim#define _mm_mask_cmp_ss_mask(M, X, Y, P) \
8551309124Sdim  (__mmask8)__builtin_ia32_cmpss_mask((__v4sf)(__m128)(X), \
8552309124Sdim                                      (__v4sf)(__m128)(Y), (int)(P), \
8553309124Sdim                                      (__mmask8)(M), \
8554341825Sdim                                      _MM_FROUND_CUR_DIRECTION)
8555309124Sdim
8556341825Sdim#define _mm_cmp_round_sd_mask(X, Y, P, R) \
8557309124Sdim  (__mmask8)__builtin_ia32_cmpsd_mask((__v2df)(__m128d)(X), \
8558309124Sdim                                      (__v2df)(__m128d)(Y), (int)(P), \
8559341825Sdim                                      (__mmask8)-1, (int)(R))
8560309124Sdim
8561341825Sdim#define _mm_mask_cmp_round_sd_mask(M, X, Y, P, R) \
8562309124Sdim  (__mmask8)__builtin_ia32_cmpsd_mask((__v2df)(__m128d)(X), \
8563309124Sdim                                      (__v2df)(__m128d)(Y), (int)(P), \
8564341825Sdim                                      (__mmask8)(M), (int)(R))
8565309124Sdim
8566341825Sdim#define _mm_cmp_sd_mask(X, Y, P) \
8567309124Sdim  (__mmask8)__builtin_ia32_cmpsd_mask((__v2df)(__m128d)(X), \
8568309124Sdim                                      (__v2df)(__m128d)(Y), (int)(P), \
8569309124Sdim                                      (__mmask8)-1, \
8570341825Sdim                                      _MM_FROUND_CUR_DIRECTION)
8571309124Sdim
8572341825Sdim#define _mm_mask_cmp_sd_mask(M, X, Y, P) \
8573309124Sdim  (__mmask8)__builtin_ia32_cmpsd_mask((__v2df)(__m128d)(X), \
8574309124Sdim                                      (__v2df)(__m128d)(Y), (int)(P), \
8575309124Sdim                                      (__mmask8)(M), \
8576341825Sdim                                      _MM_FROUND_CUR_DIRECTION)
8577309124Sdim
8578327952Sdim/* Bit Test */
8579327952Sdim
8580341825Sdimstatic __inline __mmask16 __DEFAULT_FN_ATTRS512
8581327952Sdim_mm512_test_epi32_mask (__m512i __A, __m512i __B)
8582327952Sdim{
8583327952Sdim  return _mm512_cmpneq_epi32_mask (_mm512_and_epi32(__A, __B),
8584341825Sdim                                   _mm512_setzero_si512());
8585327952Sdim}
8586327952Sdim
8587341825Sdimstatic __inline__ __mmask16 __DEFAULT_FN_ATTRS512
8588327952Sdim_mm512_mask_test_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B)
8589327952Sdim{
8590327952Sdim  return _mm512_mask_cmpneq_epi32_mask (__U, _mm512_and_epi32 (__A, __B),
8591341825Sdim                                        _mm512_setzero_si512());
8592327952Sdim}
8593327952Sdim
8594341825Sdimstatic __inline __mmask8 __DEFAULT_FN_ATTRS512
8595327952Sdim_mm512_test_epi64_mask (__m512i __A, __m512i __B)
8596327952Sdim{
8597327952Sdim  return _mm512_cmpneq_epi64_mask (_mm512_and_epi32 (__A, __B),
8598341825Sdim                                   _mm512_setzero_si512());
8599327952Sdim}
8600327952Sdim
8601341825Sdimstatic __inline__ __mmask8 __DEFAULT_FN_ATTRS512
8602327952Sdim_mm512_mask_test_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B)
8603327952Sdim{
8604327952Sdim  return _mm512_mask_cmpneq_epi64_mask (__U, _mm512_and_epi32 (__A, __B),
8605341825Sdim                                        _mm512_setzero_si512());
8606327952Sdim}
8607327952Sdim
8608341825Sdimstatic __inline__ __mmask16 __DEFAULT_FN_ATTRS512
8609327952Sdim_mm512_testn_epi32_mask (__m512i __A, __m512i __B)
8610327952Sdim{
8611327952Sdim  return _mm512_cmpeq_epi32_mask (_mm512_and_epi32 (__A, __B),
8612341825Sdim                                  _mm512_setzero_si512());
8613327952Sdim}
8614327952Sdim
8615341825Sdimstatic __inline__ __mmask16 __DEFAULT_FN_ATTRS512
8616327952Sdim_mm512_mask_testn_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B)
8617327952Sdim{
8618327952Sdim  return _mm512_mask_cmpeq_epi32_mask (__U, _mm512_and_epi32 (__A, __B),
8619341825Sdim                                       _mm512_setzero_si512());
8620327952Sdim}
8621327952Sdim
8622341825Sdimstatic __inline__ __mmask8 __DEFAULT_FN_ATTRS512
8623327952Sdim_mm512_testn_epi64_mask (__m512i __A, __m512i __B)
8624327952Sdim{
8625327952Sdim  return _mm512_cmpeq_epi64_mask (_mm512_and_epi32 (__A, __B),
8626341825Sdim                                  _mm512_setzero_si512());
8627327952Sdim}
8628327952Sdim
8629341825Sdimstatic __inline__ __mmask8 __DEFAULT_FN_ATTRS512
8630327952Sdim_mm512_mask_testn_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B)
8631327952Sdim{
8632327952Sdim  return _mm512_mask_cmpeq_epi64_mask (__U, _mm512_and_epi32 (__A, __B),
8633341825Sdim                                       _mm512_setzero_si512());
8634327952Sdim}
8635327952Sdim
8636341825Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS512
8637309124Sdim_mm512_movehdup_ps (__m512 __A)
8638309124Sdim{
8639309124Sdim  return (__m512)__builtin_shufflevector((__v16sf)__A, (__v16sf)__A,
8640309124Sdim                         1, 1, 3, 3, 5, 5, 7, 7, 9, 9, 11, 11, 13, 13, 15, 15);
8641309124Sdim}
8642309124Sdim
8643341825Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS512
8644309124Sdim_mm512_mask_movehdup_ps (__m512 __W, __mmask16 __U, __m512 __A)
8645309124Sdim{
8646309124Sdim  return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
8647309124Sdim                                             (__v16sf)_mm512_movehdup_ps(__A),
8648309124Sdim                                             (__v16sf)__W);
8649309124Sdim}
8650309124Sdim
8651341825Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS512
8652309124Sdim_mm512_maskz_movehdup_ps (__mmask16 __U, __m512 __A)
8653309124Sdim{
8654309124Sdim  return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
8655309124Sdim                                             (__v16sf)_mm512_movehdup_ps(__A),
8656309124Sdim                                             (__v16sf)_mm512_setzero_ps());
8657309124Sdim}
8658309124Sdim
8659341825Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS512
8660309124Sdim_mm512_moveldup_ps (__m512 __A)
8661309124Sdim{
8662309124Sdim  return (__m512)__builtin_shufflevector((__v16sf)__A, (__v16sf)__A,
8663309124Sdim                         0, 0, 2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12, 14, 14);
8664309124Sdim}
8665309124Sdim
8666341825Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS512
8667309124Sdim_mm512_mask_moveldup_ps (__m512 __W, __mmask16 __U, __m512 __A)
8668309124Sdim{
8669309124Sdim  return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
8670309124Sdim                                             (__v16sf)_mm512_moveldup_ps(__A),
8671309124Sdim                                             (__v16sf)__W);
8672309124Sdim}
8673309124Sdim
8674341825Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS512
8675309124Sdim_mm512_maskz_moveldup_ps (__mmask16 __U, __m512 __A)
8676309124Sdim{
8677309124Sdim  return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
8678309124Sdim                                             (__v16sf)_mm512_moveldup_ps(__A),
8679309124Sdim                                             (__v16sf)_mm512_setzero_ps());
8680309124Sdim}
8681309124Sdim
8682341825Sdimstatic __inline__ __m128 __DEFAULT_FN_ATTRS128
8683314564Sdim_mm_mask_move_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
8684314564Sdim{
8685341825Sdim  return __builtin_ia32_selectss_128(__U, _mm_move_ss(__A, __B), __W);
8686314564Sdim}
8687314564Sdim
8688341825Sdimstatic __inline__ __m128 __DEFAULT_FN_ATTRS128
8689314564Sdim_mm_maskz_move_ss (__mmask8 __U, __m128 __A, __m128 __B)
8690314564Sdim{
8691341825Sdim  return __builtin_ia32_selectss_128(__U, _mm_move_ss(__A, __B),
8692341825Sdim                                     _mm_setzero_ps());
8693314564Sdim}
8694314564Sdim
8695341825Sdimstatic __inline__ __m128d __DEFAULT_FN_ATTRS128
8696314564Sdim_mm_mask_move_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
8697314564Sdim{
8698341825Sdim  return __builtin_ia32_selectsd_128(__U, _mm_move_sd(__A, __B), __W);
8699314564Sdim}
8700314564Sdim
8701341825Sdimstatic __inline__ __m128d __DEFAULT_FN_ATTRS128
8702314564Sdim_mm_maskz_move_sd (__mmask8 __U, __m128d __A, __m128d __B)
8703314564Sdim{
8704341825Sdim  return __builtin_ia32_selectsd_128(__U, _mm_move_sd(__A, __B),
8705341825Sdim                                     _mm_setzero_pd());
8706314564Sdim}
8707314564Sdim
8708341825Sdimstatic __inline__ void __DEFAULT_FN_ATTRS128
8709314564Sdim_mm_mask_store_ss (float * __W, __mmask8 __U, __m128 __A)
8710314564Sdim{
8711341825Sdim  __builtin_ia32_storess128_mask ((__v4sf *)__W, __A, __U & 1);
8712314564Sdim}
8713314564Sdim
8714341825Sdimstatic __inline__ void __DEFAULT_FN_ATTRS128
8715314564Sdim_mm_mask_store_sd (double * __W, __mmask8 __U, __m128d __A)
8716314564Sdim{
8717341825Sdim  __builtin_ia32_storesd128_mask ((__v2df *)__W, __A, __U & 1);
8718314564Sdim}
8719314564Sdim
8720341825Sdimstatic __inline__ __m128 __DEFAULT_FN_ATTRS128
8721314564Sdim_mm_mask_load_ss (__m128 __W, __mmask8 __U, const float* __A)
8722314564Sdim{
8723314564Sdim  __m128 src = (__v4sf) __builtin_shufflevector((__v4sf) __W,
8724341825Sdim                                                (__v4sf)_mm_setzero_ps(),
8725314564Sdim                                                0, 4, 4, 4);
8726314564Sdim
8727360784Sdim  return (__m128) __builtin_ia32_loadss128_mask ((const __v4sf *) __A, src, __U & 1);
8728314564Sdim}
8729314564Sdim
8730341825Sdimstatic __inline__ __m128 __DEFAULT_FN_ATTRS128
8731314564Sdim_mm_maskz_load_ss (__mmask8 __U, const float* __A)
8732314564Sdim{
8733360784Sdim  return (__m128)__builtin_ia32_loadss128_mask ((const __v4sf *) __A,
8734341825Sdim                                                (__v4sf) _mm_setzero_ps(),
8735341825Sdim                                                __U & 1);
8736314564Sdim}
8737314564Sdim
8738341825Sdimstatic __inline__ __m128d __DEFAULT_FN_ATTRS128
8739314564Sdim_mm_mask_load_sd (__m128d __W, __mmask8 __U, const double* __A)
8740314564Sdim{
8741314564Sdim  __m128d src = (__v2df) __builtin_shufflevector((__v2df) __W,
8742341825Sdim                                                 (__v2df)_mm_setzero_pd(),
8743341825Sdim                                                 0, 2);
8744314564Sdim
8745360784Sdim  return (__m128d) __builtin_ia32_loadsd128_mask ((const __v2df *) __A, src, __U & 1);
8746314564Sdim}
8747314564Sdim
8748341825Sdimstatic __inline__ __m128d __DEFAULT_FN_ATTRS128
8749314564Sdim_mm_maskz_load_sd (__mmask8 __U, const double* __A)
8750314564Sdim{
8751360784Sdim  return (__m128d) __builtin_ia32_loadsd128_mask ((const __v2df *) __A,
8752341825Sdim                                                  (__v2df) _mm_setzero_pd(),
8753341825Sdim                                                  __U & 1);
8754314564Sdim}
8755314564Sdim
8756341825Sdim#define _mm512_shuffle_epi32(A, I) \
8757341825Sdim  (__m512i)__builtin_ia32_pshufd512((__v16si)(__m512i)(A), (int)(I))
8758309124Sdim
8759341825Sdim#define _mm512_mask_shuffle_epi32(W, U, A, I) \
8760309124Sdim  (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
8761309124Sdim                                      (__v16si)_mm512_shuffle_epi32((A), (I)), \
8762341825Sdim                                      (__v16si)(__m512i)(W))
8763309124Sdim
8764341825Sdim#define _mm512_maskz_shuffle_epi32(U, A, I) \
8765309124Sdim  (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
8766309124Sdim                                      (__v16si)_mm512_shuffle_epi32((A), (I)), \
8767341825Sdim                                      (__v16si)_mm512_setzero_si512())
8768309124Sdim
8769341825Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS512
8770309124Sdim_mm512_mask_expand_pd (__m512d __W, __mmask8 __U, __m512d __A)
8771309124Sdim{
8772309124Sdim  return (__m512d) __builtin_ia32_expanddf512_mask ((__v8df) __A,
8773309124Sdim                (__v8df) __W,
8774309124Sdim                (__mmask8) __U);
8775309124Sdim}
8776309124Sdim
8777341825Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS512
8778309124Sdim_mm512_maskz_expand_pd (__mmask8 __U, __m512d __A)
8779309124Sdim{
8780309124Sdim  return (__m512d) __builtin_ia32_expanddf512_mask ((__v8df) __A,
8781309124Sdim                (__v8df) _mm512_setzero_pd (),
8782309124Sdim                (__mmask8) __U);
8783309124Sdim}
8784309124Sdim
8785341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
8786309124Sdim_mm512_mask_expand_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
8787309124Sdim{
8788309124Sdim  return (__m512i) __builtin_ia32_expanddi512_mask ((__v8di) __A,
8789309124Sdim                (__v8di) __W,
8790309124Sdim                (__mmask8) __U);
8791309124Sdim}
8792309124Sdim
8793341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
8794309124Sdim_mm512_maskz_expand_epi64 ( __mmask8 __U, __m512i __A)
8795309124Sdim{
8796309124Sdim  return (__m512i) __builtin_ia32_expanddi512_mask ((__v8di) __A,
8797341825Sdim                (__v8di) _mm512_setzero_si512 (),
8798309124Sdim                (__mmask8) __U);
8799309124Sdim}
8800309124Sdim
8801341825Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS512
8802309124Sdim_mm512_mask_expandloadu_pd(__m512d __W, __mmask8 __U, void const *__P)
8803309124Sdim{
8804309124Sdim  return (__m512d) __builtin_ia32_expandloaddf512_mask ((const __v8df *)__P,
8805309124Sdim              (__v8df) __W,
8806309124Sdim              (__mmask8) __U);
8807309124Sdim}
8808309124Sdim
8809341825Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS512
8810309124Sdim_mm512_maskz_expandloadu_pd(__mmask8 __U, void const *__P)
8811309124Sdim{
8812309124Sdim  return (__m512d) __builtin_ia32_expandloaddf512_mask ((const __v8df *)__P,
8813309124Sdim              (__v8df) _mm512_setzero_pd(),
8814309124Sdim              (__mmask8) __U);
8815309124Sdim}
8816309124Sdim
8817341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
8818309124Sdim_mm512_mask_expandloadu_epi64(__m512i __W, __mmask8 __U, void const *__P)
8819309124Sdim{
8820309124Sdim  return (__m512i) __builtin_ia32_expandloaddi512_mask ((const __v8di *)__P,
8821309124Sdim              (__v8di) __W,
8822309124Sdim              (__mmask8) __U);
8823309124Sdim}
8824309124Sdim
8825341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
8826309124Sdim_mm512_maskz_expandloadu_epi64(__mmask8 __U, void const *__P)
8827309124Sdim{
8828309124Sdim  return (__m512i) __builtin_ia32_expandloaddi512_mask ((const __v8di *)__P,
8829341825Sdim              (__v8di) _mm512_setzero_si512(),
8830309124Sdim              (__mmask8) __U);
8831309124Sdim}
8832309124Sdim
8833341825Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS512
8834309124Sdim_mm512_mask_expandloadu_ps(__m512 __W, __mmask16 __U, void const *__P)
8835309124Sdim{
8836309124Sdim  return (__m512) __builtin_ia32_expandloadsf512_mask ((const __v16sf *)__P,
8837309124Sdim                   (__v16sf) __W,
8838309124Sdim                   (__mmask16) __U);
8839309124Sdim}
8840309124Sdim
8841341825Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS512
8842309124Sdim_mm512_maskz_expandloadu_ps(__mmask16 __U, void const *__P)
8843309124Sdim{
8844309124Sdim  return (__m512) __builtin_ia32_expandloadsf512_mask ((const __v16sf *)__P,
8845309124Sdim                   (__v16sf) _mm512_setzero_ps(),
8846309124Sdim                   (__mmask16) __U);
8847309124Sdim}
8848309124Sdim
8849341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
8850309124Sdim_mm512_mask_expandloadu_epi32(__m512i __W, __mmask16 __U, void const *__P)
8851309124Sdim{
8852309124Sdim  return (__m512i) __builtin_ia32_expandloadsi512_mask ((const __v16si *)__P,
8853309124Sdim              (__v16si) __W,
8854309124Sdim              (__mmask16) __U);
8855309124Sdim}
8856309124Sdim
8857341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
8858309124Sdim_mm512_maskz_expandloadu_epi32(__mmask16 __U, void const *__P)
8859309124Sdim{
8860309124Sdim  return (__m512i) __builtin_ia32_expandloadsi512_mask ((const __v16si *)__P,
8861341825Sdim              (__v16si) _mm512_setzero_si512(),
8862309124Sdim              (__mmask16) __U);
8863309124Sdim}
8864309124Sdim
8865341825Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS512
8866309124Sdim_mm512_mask_expand_ps (__m512 __W, __mmask16 __U, __m512 __A)
8867309124Sdim{
8868309124Sdim  return (__m512) __builtin_ia32_expandsf512_mask ((__v16sf) __A,
8869309124Sdim               (__v16sf) __W,
8870309124Sdim               (__mmask16) __U);
8871309124Sdim}
8872309124Sdim
8873341825Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS512
8874309124Sdim_mm512_maskz_expand_ps (__mmask16 __U, __m512 __A)
8875309124Sdim{
8876309124Sdim  return (__m512) __builtin_ia32_expandsf512_mask ((__v16sf) __A,
8877309124Sdim               (__v16sf) _mm512_setzero_ps(),
8878309124Sdim               (__mmask16) __U);
8879309124Sdim}
8880309124Sdim
8881341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
8882309124Sdim_mm512_mask_expand_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
8883309124Sdim{
8884309124Sdim  return (__m512i) __builtin_ia32_expandsi512_mask ((__v16si) __A,
8885309124Sdim                (__v16si) __W,
8886309124Sdim                (__mmask16) __U);
8887309124Sdim}
8888309124Sdim
8889341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
8890309124Sdim_mm512_maskz_expand_epi32 (__mmask16 __U, __m512i __A)
8891309124Sdim{
8892309124Sdim  return (__m512i) __builtin_ia32_expandsi512_mask ((__v16si) __A,
8893341825Sdim                (__v16si) _mm512_setzero_si512(),
8894309124Sdim                (__mmask16) __U);
8895309124Sdim}
8896309124Sdim
8897341825Sdim#define _mm512_cvt_roundps_pd(A, R) \
8898309124Sdim  (__m512d)__builtin_ia32_cvtps2pd512_mask((__v8sf)(__m256)(A), \
8899309124Sdim                                           (__v8df)_mm512_undefined_pd(), \
8900341825Sdim                                           (__mmask8)-1, (int)(R))
8901309124Sdim
8902341825Sdim#define _mm512_mask_cvt_roundps_pd(W, U, A, R) \
8903309124Sdim  (__m512d)__builtin_ia32_cvtps2pd512_mask((__v8sf)(__m256)(A), \
8904309124Sdim                                           (__v8df)(__m512d)(W), \
8905341825Sdim                                           (__mmask8)(U), (int)(R))
8906309124Sdim
8907341825Sdim#define _mm512_maskz_cvt_roundps_pd(U, A, R) \
8908309124Sdim  (__m512d)__builtin_ia32_cvtps2pd512_mask((__v8sf)(__m256)(A), \
8909309124Sdim                                           (__v8df)_mm512_setzero_pd(), \
8910341825Sdim                                           (__mmask8)(U), (int)(R))
8911309124Sdim
8912341825Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS512
8913309124Sdim_mm512_cvtps_pd (__m256 __A)
8914309124Sdim{
8915341825Sdim  return (__m512d) __builtin_convertvector((__v8sf)__A, __v8df);
8916309124Sdim}
8917309124Sdim
8918341825Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS512
8919309124Sdim_mm512_mask_cvtps_pd (__m512d __W, __mmask8 __U, __m256 __A)
8920309124Sdim{
8921341825Sdim  return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
8922341825Sdim                                              (__v8df)_mm512_cvtps_pd(__A),
8923341825Sdim                                              (__v8df)__W);
8924309124Sdim}
8925309124Sdim
8926341825Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS512
8927309124Sdim_mm512_maskz_cvtps_pd (__mmask8 __U, __m256 __A)
8928309124Sdim{
8929341825Sdim  return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
8930341825Sdim                                              (__v8df)_mm512_cvtps_pd(__A),
8931341825Sdim                                              (__v8df)_mm512_setzero_pd());
8932309124Sdim}
8933309124Sdim
8934341825Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS512
8935314564Sdim_mm512_cvtpslo_pd (__m512 __A)
8936314564Sdim{
8937341825Sdim  return (__m512d) _mm512_cvtps_pd(_mm512_castps512_ps256(__A));
8938314564Sdim}
8939314564Sdim
8940341825Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS512
8941314564Sdim_mm512_mask_cvtpslo_pd (__m512d __W, __mmask8 __U, __m512 __A)
8942314564Sdim{
8943341825Sdim  return (__m512d) _mm512_mask_cvtps_pd(__W, __U, _mm512_castps512_ps256(__A));
8944314564Sdim}
8945314564Sdim
8946341825Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS512
8947309124Sdim_mm512_mask_mov_pd (__m512d __W, __mmask8 __U, __m512d __A)
8948309124Sdim{
8949309124Sdim  return (__m512d) __builtin_ia32_selectpd_512 ((__mmask8) __U,
8950309124Sdim              (__v8df) __A,
8951309124Sdim              (__v8df) __W);
8952309124Sdim}
8953309124Sdim
8954341825Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS512
8955309124Sdim_mm512_maskz_mov_pd (__mmask8 __U, __m512d __A)
8956309124Sdim{
8957309124Sdim  return (__m512d) __builtin_ia32_selectpd_512 ((__mmask8) __U,
8958309124Sdim              (__v8df) __A,
8959309124Sdim              (__v8df) _mm512_setzero_pd ());
8960309124Sdim}
8961309124Sdim
8962341825Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS512
8963309124Sdim_mm512_mask_mov_ps (__m512 __W, __mmask16 __U, __m512 __A)
8964309124Sdim{
8965309124Sdim  return (__m512) __builtin_ia32_selectps_512 ((__mmask16) __U,
8966309124Sdim             (__v16sf) __A,
8967309124Sdim             (__v16sf) __W);
8968309124Sdim}
8969309124Sdim
8970341825Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS512
8971309124Sdim_mm512_maskz_mov_ps (__mmask16 __U, __m512 __A)
8972309124Sdim{
8973309124Sdim  return (__m512) __builtin_ia32_selectps_512 ((__mmask16) __U,
8974309124Sdim             (__v16sf) __A,
8975309124Sdim             (__v16sf) _mm512_setzero_ps ());
8976309124Sdim}
8977309124Sdim
8978341825Sdimstatic __inline__ void __DEFAULT_FN_ATTRS512
8979309124Sdim_mm512_mask_compressstoreu_pd (void *__P, __mmask8 __U, __m512d __A)
8980309124Sdim{
8981309124Sdim  __builtin_ia32_compressstoredf512_mask ((__v8df *) __P, (__v8df) __A,
8982309124Sdim            (__mmask8) __U);
8983309124Sdim}
8984309124Sdim
8985341825Sdimstatic __inline__ void __DEFAULT_FN_ATTRS512
8986309124Sdim_mm512_mask_compressstoreu_epi64 (void *__P, __mmask8 __U, __m512i __A)
8987309124Sdim{
8988309124Sdim  __builtin_ia32_compressstoredi512_mask ((__v8di *) __P, (__v8di) __A,
8989309124Sdim            (__mmask8) __U);
8990309124Sdim}
8991309124Sdim
8992341825Sdimstatic __inline__ void __DEFAULT_FN_ATTRS512
8993309124Sdim_mm512_mask_compressstoreu_ps (void *__P, __mmask16 __U, __m512 __A)
8994309124Sdim{
8995309124Sdim  __builtin_ia32_compressstoresf512_mask ((__v16sf *) __P, (__v16sf) __A,
8996309124Sdim            (__mmask16) __U);
8997309124Sdim}
8998309124Sdim
8999341825Sdimstatic __inline__ void __DEFAULT_FN_ATTRS512
9000309124Sdim_mm512_mask_compressstoreu_epi32 (void *__P, __mmask16 __U, __m512i __A)
9001309124Sdim{
9002309124Sdim  __builtin_ia32_compressstoresi512_mask ((__v16si *) __P, (__v16si) __A,
9003309124Sdim            (__mmask16) __U);
9004309124Sdim}
9005309124Sdim
9006341825Sdim#define _mm_cvt_roundsd_ss(A, B, R) \
9007309124Sdim  (__m128)__builtin_ia32_cvtsd2ss_round_mask((__v4sf)(__m128)(A), \
9008309124Sdim                                             (__v2df)(__m128d)(B), \
9009309124Sdim                                             (__v4sf)_mm_undefined_ps(), \
9010341825Sdim                                             (__mmask8)-1, (int)(R))
9011309124Sdim
9012341825Sdim#define _mm_mask_cvt_roundsd_ss(W, U, A, B, R) \
9013309124Sdim  (__m128)__builtin_ia32_cvtsd2ss_round_mask((__v4sf)(__m128)(A), \
9014309124Sdim                                             (__v2df)(__m128d)(B), \
9015309124Sdim                                             (__v4sf)(__m128)(W), \
9016341825Sdim                                             (__mmask8)(U), (int)(R))
9017309124Sdim
9018341825Sdim#define _mm_maskz_cvt_roundsd_ss(U, A, B, R) \
9019309124Sdim  (__m128)__builtin_ia32_cvtsd2ss_round_mask((__v4sf)(__m128)(A), \
9020309124Sdim                                             (__v2df)(__m128d)(B), \
9021309124Sdim                                             (__v4sf)_mm_setzero_ps(), \
9022341825Sdim                                             (__mmask8)(U), (int)(R))
9023309124Sdim
9024341825Sdimstatic __inline__ __m128 __DEFAULT_FN_ATTRS128
9025309124Sdim_mm_mask_cvtsd_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128d __B)
9026309124Sdim{
9027341825Sdim  return __builtin_ia32_cvtsd2ss_round_mask ((__v4sf)__A,
9028341825Sdim                                             (__v2df)__B,
9029341825Sdim                                             (__v4sf)__W,
9030341825Sdim                                             (__mmask8)__U, _MM_FROUND_CUR_DIRECTION);
9031309124Sdim}
9032309124Sdim
9033341825Sdimstatic __inline__ __m128 __DEFAULT_FN_ATTRS128
9034309124Sdim_mm_maskz_cvtsd_ss (__mmask8 __U, __m128 __A, __m128d __B)
9035309124Sdim{
9036341825Sdim  return __builtin_ia32_cvtsd2ss_round_mask ((__v4sf)__A,
9037341825Sdim                                             (__v2df)__B,
9038322320Sdim                                             (__v4sf)_mm_setzero_ps(),
9039341825Sdim                                             (__mmask8)__U, _MM_FROUND_CUR_DIRECTION);
9040309124Sdim}
9041309124Sdim
9042309124Sdim#define _mm_cvtss_i32 _mm_cvtss_si32
9043314564Sdim#define _mm_cvtsd_i32 _mm_cvtsd_si32
9044314564Sdim#define _mm_cvti32_sd _mm_cvtsi32_sd
9045314564Sdim#define _mm_cvti32_ss _mm_cvtsi32_ss
9046314564Sdim#ifdef __x86_64__
9047309124Sdim#define _mm_cvtss_i64 _mm_cvtss_si64
9048309124Sdim#define _mm_cvtsd_i64 _mm_cvtsd_si64
9049309124Sdim#define _mm_cvti64_sd _mm_cvtsi64_sd
9050309124Sdim#define _mm_cvti64_ss _mm_cvtsi64_ss
9051314564Sdim#endif
9052309124Sdim
9053314564Sdim#ifdef __x86_64__
9054341825Sdim#define _mm_cvt_roundi64_sd(A, B, R) \
9055309124Sdim  (__m128d)__builtin_ia32_cvtsi2sd64((__v2df)(__m128d)(A), (long long)(B), \
9056341825Sdim                                     (int)(R))
9057309124Sdim
9058341825Sdim#define _mm_cvt_roundsi64_sd(A, B, R) \
9059309124Sdim  (__m128d)__builtin_ia32_cvtsi2sd64((__v2df)(__m128d)(A), (long long)(B), \
9060341825Sdim                                     (int)(R))
9061314564Sdim#endif
9062309124Sdim
9063341825Sdim#define _mm_cvt_roundsi32_ss(A, B, R) \
9064341825Sdim  (__m128)__builtin_ia32_cvtsi2ss32((__v4sf)(__m128)(A), (int)(B), (int)(R))
9065309124Sdim
9066341825Sdim#define _mm_cvt_roundi32_ss(A, B, R) \
9067341825Sdim  (__m128)__builtin_ia32_cvtsi2ss32((__v4sf)(__m128)(A), (int)(B), (int)(R))
9068309124Sdim
9069314564Sdim#ifdef __x86_64__
9070341825Sdim#define _mm_cvt_roundsi64_ss(A, B, R) \
9071309124Sdim  (__m128)__builtin_ia32_cvtsi2ss64((__v4sf)(__m128)(A), (long long)(B), \
9072341825Sdim                                    (int)(R))
9073309124Sdim
9074341825Sdim#define _mm_cvt_roundi64_ss(A, B, R) \
9075309124Sdim  (__m128)__builtin_ia32_cvtsi2ss64((__v4sf)(__m128)(A), (long long)(B), \
9076341825Sdim                                    (int)(R))
9077314564Sdim#endif
9078309124Sdim
9079341825Sdim#define _mm_cvt_roundss_sd(A, B, R) \
9080309124Sdim  (__m128d)__builtin_ia32_cvtss2sd_round_mask((__v2df)(__m128d)(A), \
9081309124Sdim                                              (__v4sf)(__m128)(B), \
9082309124Sdim                                              (__v2df)_mm_undefined_pd(), \
9083341825Sdim                                              (__mmask8)-1, (int)(R))
9084309124Sdim
9085341825Sdim#define _mm_mask_cvt_roundss_sd(W, U, A, B, R) \
9086309124Sdim  (__m128d)__builtin_ia32_cvtss2sd_round_mask((__v2df)(__m128d)(A), \
9087309124Sdim                                              (__v4sf)(__m128)(B), \
9088309124Sdim                                              (__v2df)(__m128d)(W), \
9089341825Sdim                                              (__mmask8)(U), (int)(R))
9090309124Sdim
9091341825Sdim#define _mm_maskz_cvt_roundss_sd(U, A, B, R) \
9092309124Sdim  (__m128d)__builtin_ia32_cvtss2sd_round_mask((__v2df)(__m128d)(A), \
9093309124Sdim                                              (__v4sf)(__m128)(B), \
9094309124Sdim                                              (__v2df)_mm_setzero_pd(), \
9095341825Sdim                                              (__mmask8)(U), (int)(R))
9096309124Sdim
9097341825Sdimstatic __inline__ __m128d __DEFAULT_FN_ATTRS128
9098309124Sdim_mm_mask_cvtss_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128 __B)
9099309124Sdim{
9100341825Sdim  return __builtin_ia32_cvtss2sd_round_mask((__v2df)__A,
9101341825Sdim                                            (__v4sf)__B,
9102341825Sdim                                            (__v2df)__W,
9103341825Sdim                                            (__mmask8)__U, _MM_FROUND_CUR_DIRECTION);
9104309124Sdim}
9105309124Sdim
9106341825Sdimstatic __inline__ __m128d __DEFAULT_FN_ATTRS128
9107309124Sdim_mm_maskz_cvtss_sd (__mmask8 __U, __m128d __A, __m128 __B)
9108309124Sdim{
9109341825Sdim  return __builtin_ia32_cvtss2sd_round_mask((__v2df)__A,
9110341825Sdim                                            (__v4sf)__B,
9111341825Sdim                                            (__v2df)_mm_setzero_pd(),
9112341825Sdim                                            (__mmask8)__U, _MM_FROUND_CUR_DIRECTION);
9113309124Sdim}
9114309124Sdim
9115341825Sdimstatic __inline__ __m128d __DEFAULT_FN_ATTRS128
9116309124Sdim_mm_cvtu32_sd (__m128d __A, unsigned __B)
9117309124Sdim{
9118341825Sdim  __A[0] = __B;
9119341825Sdim  return __A;
9120309124Sdim}
9121309124Sdim
9122314564Sdim#ifdef __x86_64__
9123341825Sdim#define _mm_cvt_roundu64_sd(A, B, R) \
9124309124Sdim  (__m128d)__builtin_ia32_cvtusi2sd64((__v2df)(__m128d)(A), \
9125341825Sdim                                      (unsigned long long)(B), (int)(R))
9126309124Sdim
9127341825Sdimstatic __inline__ __m128d __DEFAULT_FN_ATTRS128
9128309124Sdim_mm_cvtu64_sd (__m128d __A, unsigned long long __B)
9129309124Sdim{
9130341825Sdim  __A[0] = __B;
9131341825Sdim  return __A;
9132309124Sdim}
9133314564Sdim#endif
9134309124Sdim
9135341825Sdim#define _mm_cvt_roundu32_ss(A, B, R) \
9136309124Sdim  (__m128)__builtin_ia32_cvtusi2ss32((__v4sf)(__m128)(A), (unsigned int)(B), \
9137341825Sdim                                     (int)(R))
9138309124Sdim
9139341825Sdimstatic __inline__ __m128 __DEFAULT_FN_ATTRS128
9140309124Sdim_mm_cvtu32_ss (__m128 __A, unsigned __B)
9141309124Sdim{
9142341825Sdim  __A[0] = __B;
9143341825Sdim  return __A;
9144309124Sdim}
9145309124Sdim
9146314564Sdim#ifdef __x86_64__
9147341825Sdim#define _mm_cvt_roundu64_ss(A, B, R) \
9148309124Sdim  (__m128)__builtin_ia32_cvtusi2ss64((__v4sf)(__m128)(A), \
9149341825Sdim                                     (unsigned long long)(B), (int)(R))
9150309124Sdim
9151341825Sdimstatic __inline__ __m128 __DEFAULT_FN_ATTRS128
9152309124Sdim_mm_cvtu64_ss (__m128 __A, unsigned long long __B)
9153309124Sdim{
9154341825Sdim  __A[0] = __B;
9155341825Sdim  return __A;
9156309124Sdim}
9157314564Sdim#endif
9158309124Sdim
9159341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
9160309124Sdim_mm512_mask_set1_epi32 (__m512i __O, __mmask16 __M, int __A)
9161309124Sdim{
9162327952Sdim  return (__m512i) __builtin_ia32_selectd_512(__M,
9163327952Sdim                                              (__v16si) _mm512_set1_epi32(__A),
9164327952Sdim                                              (__v16si) __O);
9165309124Sdim}
9166309124Sdim
9167341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
9168309124Sdim_mm512_mask_set1_epi64 (__m512i __O, __mmask8 __M, long long __A)
9169309124Sdim{
9170327952Sdim  return (__m512i) __builtin_ia32_selectq_512(__M,
9171327952Sdim                                              (__v8di) _mm512_set1_epi64(__A),
9172327952Sdim                                              (__v8di) __O);
9173309124Sdim}
9174309124Sdim
9175341825Sdimstatic  __inline __m512i __DEFAULT_FN_ATTRS512
9176321369Sdim_mm512_set_epi8 (char __e63, char __e62, char __e61, char __e60, char __e59,
9177321369Sdim    char __e58, char __e57, char __e56, char __e55, char __e54, char __e53,
9178321369Sdim    char __e52, char __e51, char __e50, char __e49, char __e48, char __e47,
9179321369Sdim    char __e46, char __e45, char __e44, char __e43, char __e42, char __e41,
9180321369Sdim    char __e40, char __e39, char __e38, char __e37, char __e36, char __e35,
9181321369Sdim    char __e34, char __e33, char __e32, char __e31, char __e30, char __e29,
9182321369Sdim    char __e28, char __e27, char __e26, char __e25, char __e24, char __e23,
9183321369Sdim    char __e22, char __e21, char __e20, char __e19, char __e18, char __e17,
9184321369Sdim    char __e16, char __e15, char __e14, char __e13, char __e12, char __e11,
9185321369Sdim    char __e10, char __e9, char __e8, char __e7, char __e6, char __e5,
9186321369Sdim    char __e4, char __e3, char __e2, char __e1, char __e0) {
9187321369Sdim
9188321369Sdim  return __extension__ (__m512i)(__v64qi)
9189321369Sdim    {__e0, __e1, __e2, __e3, __e4, __e5, __e6, __e7,
9190321369Sdim     __e8, __e9, __e10, __e11, __e12, __e13, __e14, __e15,
9191321369Sdim     __e16, __e17, __e18, __e19, __e20, __e21, __e22, __e23,
9192321369Sdim     __e24, __e25, __e26, __e27, __e28, __e29, __e30, __e31,
9193321369Sdim     __e32, __e33, __e34, __e35, __e36, __e37, __e38, __e39,
9194321369Sdim     __e40, __e41, __e42, __e43, __e44, __e45, __e46, __e47,
9195321369Sdim     __e48, __e49, __e50, __e51, __e52, __e53, __e54, __e55,
9196321369Sdim     __e56, __e57, __e58, __e59, __e60, __e61, __e62, __e63};
9197321369Sdim}
9198321369Sdim
9199341825Sdimstatic  __inline __m512i __DEFAULT_FN_ATTRS512
9200321369Sdim_mm512_set_epi16(short __e31, short __e30, short __e29, short __e28,
9201321369Sdim    short __e27, short __e26, short __e25, short __e24, short __e23,
9202321369Sdim    short __e22, short __e21, short __e20, short __e19, short __e18,
9203321369Sdim    short __e17, short __e16, short __e15, short __e14, short __e13,
9204321369Sdim    short __e12, short __e11, short __e10, short __e9, short __e8,
9205321369Sdim    short __e7, short __e6, short __e5, short __e4, short __e3,
9206321369Sdim    short __e2, short __e1, short __e0) {
9207321369Sdim  return __extension__ (__m512i)(__v32hi)
9208321369Sdim    {__e0, __e1, __e2, __e3, __e4, __e5, __e6, __e7,
9209321369Sdim     __e8, __e9, __e10, __e11, __e12, __e13, __e14, __e15,
9210321369Sdim     __e16, __e17, __e18, __e19, __e20, __e21, __e22, __e23,
9211321369Sdim     __e24, __e25, __e26, __e27, __e28, __e29, __e30, __e31 };
9212321369Sdim}
9213321369Sdim
9214341825Sdimstatic __inline __m512i __DEFAULT_FN_ATTRS512
9215309124Sdim_mm512_set_epi32 (int __A, int __B, int __C, int __D,
9216309124Sdim     int __E, int __F, int __G, int __H,
9217309124Sdim     int __I, int __J, int __K, int __L,
9218309124Sdim     int __M, int __N, int __O, int __P)
9219309124Sdim{
9220309124Sdim  return __extension__ (__m512i)(__v16si)
9221309124Sdim  { __P, __O, __N, __M, __L, __K, __J, __I,
9222309124Sdim    __H, __G, __F, __E, __D, __C, __B, __A };
9223309124Sdim}
9224309124Sdim
9225309124Sdim#define _mm512_setr_epi32(e0,e1,e2,e3,e4,e5,e6,e7,           \
9226309124Sdim       e8,e9,e10,e11,e12,e13,e14,e15)          \
9227309124Sdim  _mm512_set_epi32((e15),(e14),(e13),(e12),(e11),(e10),(e9),(e8),(e7),(e6), \
9228309124Sdim                   (e5),(e4),(e3),(e2),(e1),(e0))
9229309124Sdim
9230341825Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
9231309124Sdim_mm512_set_epi64 (long long __A, long long __B, long long __C,
9232309124Sdim     long long __D, long long __E, long long __F,
9233309124Sdim     long long __G, long long __H)
9234309124Sdim{
9235309124Sdim  return __extension__ (__m512i) (__v8di)
9236309124Sdim  { __H, __G, __F, __E, __D, __C, __B, __A };
9237309124Sdim}
9238309124Sdim
9239309124Sdim#define _mm512_setr_epi64(e0,e1,e2,e3,e4,e5,e6,e7)           \
9240309124Sdim  _mm512_set_epi64((e7),(e6),(e5),(e4),(e3),(e2),(e1),(e0))
9241309124Sdim
9242341825Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS512
9243309124Sdim_mm512_set_pd (double __A, double __B, double __C, double __D,
9244309124Sdim        double __E, double __F, double __G, double __H)
9245309124Sdim{
9246309124Sdim  return __extension__ (__m512d)
9247309124Sdim  { __H, __G, __F, __E, __D, __C, __B, __A };
9248309124Sdim}
9249309124Sdim
9250309124Sdim#define _mm512_setr_pd(e0,e1,e2,e3,e4,e5,e6,e7)              \
9251309124Sdim  _mm512_set_pd((e7),(e6),(e5),(e4),(e3),(e2),(e1),(e0))
9252309124Sdim
9253341825Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS512
9254309124Sdim_mm512_set_ps (float __A, float __B, float __C, float __D,
9255309124Sdim        float __E, float __F, float __G, float __H,
9256309124Sdim        float __I, float __J, float __K, float __L,
9257309124Sdim        float __M, float __N, float __O, float __P)
9258309124Sdim{
9259309124Sdim  return __extension__ (__m512)
9260309124Sdim  { __P, __O, __N, __M, __L, __K, __J, __I,
9261309124Sdim    __H, __G, __F, __E, __D, __C, __B, __A };
9262309124Sdim}
9263309124Sdim
9264309124Sdim#define _mm512_setr_ps(e0,e1,e2,e3,e4,e5,e6,e7,e8,e9,e10,e11,e12,e13,e14,e15) \
9265309124Sdim  _mm512_set_ps((e15),(e14),(e13),(e12),(e11),(e10),(e9),(e8),(e7),(e6),(e5), \
9266309124Sdim                (e4),(e3),(e2),(e1),(e0))
9267309124Sdim
9268341825Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS512
9269314564Sdim_mm512_abs_ps(__m512 __A)
9270309124Sdim{
9271314564Sdim  return (__m512)_mm512_and_epi32(_mm512_set1_epi32(0x7FFFFFFF),(__m512i)__A) ;
9272309124Sdim}
9273309124Sdim
9274341825Sdimstatic __inline__ __m512 __DEFAULT_FN_ATTRS512
9275314564Sdim_mm512_mask_abs_ps(__m512 __W, __mmask16 __K, __m512 __A)
9276309124Sdim{
9277314564Sdim  return (__m512)_mm512_mask_and_epi32((__m512i)__W, __K, _mm512_set1_epi32(0x7FFFFFFF),(__m512i)__A) ;
9278309124Sdim}
9279309124Sdim
9280341825Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS512
9281314564Sdim_mm512_abs_pd(__m512d __A)
9282309124Sdim{
9283314564Sdim  return (__m512d)_mm512_and_epi64(_mm512_set1_epi64(0x7FFFFFFFFFFFFFFF),(__v8di)__A) ;
9284309124Sdim}
9285309124Sdim
9286341825Sdimstatic __inline__ __m512d __DEFAULT_FN_ATTRS512
9287314564Sdim_mm512_mask_abs_pd(__m512d __W, __mmask8 __K, __m512d __A)
9288309124Sdim{
9289314564Sdim  return (__m512d)_mm512_mask_and_epi64((__v8di)__W, __K, _mm512_set1_epi64(0x7FFFFFFFFFFFFFFF),(__v8di)__A);
9290309124Sdim}
9291309124Sdim
9292341825Sdim/* Vector-reduction arithmetic accepts vectors as inputs and produces scalars as
9293341825Sdim * outputs. This class of vector operation forms the basis of many scientific
9294341825Sdim * computations. In vector-reduction arithmetic, the evaluation off is
9295341825Sdim * independent of the order of the input elements of V.
9296314564Sdim
9297341825Sdim * Used bisection method. At each step, we partition the vector with previous
9298341825Sdim * step in half, and the operation is performed on its two halves.
9299341825Sdim * This takes log2(n) steps where n is the number of elements in the vector.
9300341825Sdim */
9301314564Sdim
9302341825Sdim#define _mm512_mask_reduce_operator(op) \
9303341825Sdim  __v4du __t1 = (__v4du)_mm512_extracti64x4_epi64(__W, 0); \
9304341825Sdim  __v4du __t2 = (__v4du)_mm512_extracti64x4_epi64(__W, 1); \
9305341825Sdim  __m256i __t3 = (__m256i)(__t1 op __t2); \
9306341825Sdim  __v2du __t4 = (__v2du)_mm256_extracti128_si256(__t3, 0); \
9307341825Sdim  __v2du __t5 = (__v2du)_mm256_extracti128_si256(__t3, 1); \
9308341825Sdim  __v2du __t6 = __t4 op __t5; \
9309341825Sdim  __v2du __t7 = __builtin_shufflevector(__t6, __t6, 1, 0); \
9310341825Sdim  __v2du __t8 = __t6 op __t7; \
9311353358Sdim  return __t8[0]
9312314564Sdim
9313341825Sdimstatic __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_reduce_add_epi64(__m512i __W) {
9314341825Sdim  _mm512_mask_reduce_operator(+);
9315314564Sdim}
9316314564Sdim
9317341825Sdimstatic __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_reduce_mul_epi64(__m512i __W) {
9318341825Sdim  _mm512_mask_reduce_operator(*);
9319314564Sdim}
9320314564Sdim
9321341825Sdimstatic __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_reduce_and_epi64(__m512i __W) {
9322341825Sdim  _mm512_mask_reduce_operator(&);
9323314564Sdim}
9324314564Sdim
9325341825Sdimstatic __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_reduce_or_epi64(__m512i __W) {
9326341825Sdim  _mm512_mask_reduce_operator(|);
9327314564Sdim}
9328314564Sdim
9329341825Sdimstatic __inline__ long long __DEFAULT_FN_ATTRS512
9330314564Sdim_mm512_mask_reduce_add_epi64(__mmask8 __M, __m512i __W) {
9331341825Sdim  __W = _mm512_maskz_mov_epi64(__M, __W);
9332341825Sdim  _mm512_mask_reduce_operator(+);
9333314564Sdim}
9334314564Sdim
9335341825Sdimstatic __inline__ long long __DEFAULT_FN_ATTRS512
9336314564Sdim_mm512_mask_reduce_mul_epi64(__mmask8 __M, __m512i __W) {
9337341825Sdim  __W = _mm512_mask_mov_epi64(_mm512_set1_epi64(1), __M, __W);
9338341825Sdim  _mm512_mask_reduce_operator(*);
9339314564Sdim}
9340314564Sdim
9341341825Sdimstatic __inline__ long long __DEFAULT_FN_ATTRS512
9342314564Sdim_mm512_mask_reduce_and_epi64(__mmask8 __M, __m512i __W) {
9343341825Sdim  __W = _mm512_mask_mov_epi64(_mm512_set1_epi64(~0ULL), __M, __W);
9344341825Sdim  _mm512_mask_reduce_operator(&);
9345314564Sdim}
9346314564Sdim
9347341825Sdimstatic __inline__ long long __DEFAULT_FN_ATTRS512
9348314564Sdim_mm512_mask_reduce_or_epi64(__mmask8 __M, __m512i __W) {
9349341825Sdim  __W = _mm512_maskz_mov_epi64(__M, __W);
9350341825Sdim  _mm512_mask_reduce_operator(|);
9351314564Sdim}
9352341825Sdim#undef _mm512_mask_reduce_operator
9353314564Sdim
9354341825Sdim#define _mm512_mask_reduce_operator(op) \
9355341825Sdim  __m256d __t1 = _mm512_extractf64x4_pd(__W, 0); \
9356341825Sdim  __m256d __t2 = _mm512_extractf64x4_pd(__W, 1); \
9357341825Sdim  __m256d __t3 = __t1 op __t2; \
9358341825Sdim  __m128d __t4 = _mm256_extractf128_pd(__t3, 0); \
9359341825Sdim  __m128d __t5 = _mm256_extractf128_pd(__t3, 1); \
9360341825Sdim  __m128d __t6 = __t4 op __t5; \
9361341825Sdim  __m128d __t7 = __builtin_shufflevector(__t6, __t6, 1, 0); \
9362341825Sdim  __m128d __t8 = __t6 op __t7; \
9363353358Sdim  return __t8[0]
9364341825Sdim
9365341825Sdimstatic __inline__ double __DEFAULT_FN_ATTRS512 _mm512_reduce_add_pd(__m512d __W) {
9366341825Sdim  _mm512_mask_reduce_operator(+);
9367341825Sdim}
9368341825Sdim
9369341825Sdimstatic __inline__ double __DEFAULT_FN_ATTRS512 _mm512_reduce_mul_pd(__m512d __W) {
9370341825Sdim  _mm512_mask_reduce_operator(*);
9371341825Sdim}
9372341825Sdim
9373341825Sdimstatic __inline__ double __DEFAULT_FN_ATTRS512
9374314564Sdim_mm512_mask_reduce_add_pd(__mmask8 __M, __m512d __W) {
9375341825Sdim  __W = _mm512_maskz_mov_pd(__M, __W);
9376341825Sdim  _mm512_mask_reduce_operator(+);
9377314564Sdim}
9378314564Sdim
9379341825Sdimstatic __inline__ double __DEFAULT_FN_ATTRS512
9380314564Sdim_mm512_mask_reduce_mul_pd(__mmask8 __M, __m512d __W) {
9381341825Sdim  __W = _mm512_mask_mov_pd(_mm512_set1_pd(1.0), __M, __W);
9382341825Sdim  _mm512_mask_reduce_operator(*);
9383314564Sdim}
9384341825Sdim#undef _mm512_mask_reduce_operator
9385314564Sdim
9386341825Sdim#define _mm512_mask_reduce_operator(op) \
9387341825Sdim  __v8su __t1 = (__v8su)_mm512_extracti64x4_epi64(__W, 0); \
9388341825Sdim  __v8su __t2 = (__v8su)_mm512_extracti64x4_epi64(__W, 1); \
9389341825Sdim  __m256i __t3 = (__m256i)(__t1 op __t2); \
9390341825Sdim  __v4su __t4 = (__v4su)_mm256_extracti128_si256(__t3, 0); \
9391341825Sdim  __v4su __t5 = (__v4su)_mm256_extracti128_si256(__t3, 1); \
9392341825Sdim  __v4su __t6 = __t4 op __t5; \
9393341825Sdim  __v4su __t7 = __builtin_shufflevector(__t6, __t6, 2, 3, 0, 1); \
9394341825Sdim  __v4su __t8 = __t6 op __t7; \
9395341825Sdim  __v4su __t9 = __builtin_shufflevector(__t8, __t8, 1, 0, 3, 2); \
9396341825Sdim  __v4su __t10 = __t8 op __t9; \
9397353358Sdim  return __t10[0]
9398314564Sdim
9399341825Sdimstatic __inline__ int __DEFAULT_FN_ATTRS512
9400314564Sdim_mm512_reduce_add_epi32(__m512i __W) {
9401341825Sdim  _mm512_mask_reduce_operator(+);
9402314564Sdim}
9403314564Sdim
9404341825Sdimstatic __inline__ int __DEFAULT_FN_ATTRS512
9405314564Sdim_mm512_reduce_mul_epi32(__m512i __W) {
9406341825Sdim  _mm512_mask_reduce_operator(*);
9407314564Sdim}
9408314564Sdim
9409341825Sdimstatic __inline__ int __DEFAULT_FN_ATTRS512
9410314564Sdim_mm512_reduce_and_epi32(__m512i __W) {
9411341825Sdim  _mm512_mask_reduce_operator(&);
9412314564Sdim}
9413314564Sdim
9414341825Sdimstatic __inline__ int __DEFAULT_FN_ATTRS512
9415314564Sdim_mm512_reduce_or_epi32(__m512i __W) {
9416341825Sdim  _mm512_mask_reduce_operator(|);
9417314564Sdim}
9418314564Sdim
9419341825Sdimstatic __inline__ int __DEFAULT_FN_ATTRS512
9420314564Sdim_mm512_mask_reduce_add_epi32( __mmask16 __M, __m512i __W) {
9421341825Sdim  __W = _mm512_maskz_mov_epi32(__M, __W);
9422341825Sdim  _mm512_mask_reduce_operator(+);
9423314564Sdim}
9424314564Sdim
9425341825Sdimstatic __inline__ int __DEFAULT_FN_ATTRS512
9426314564Sdim_mm512_mask_reduce_mul_epi32( __mmask16 __M, __m512i __W) {
9427341825Sdim  __W = _mm512_mask_mov_epi32(_mm512_set1_epi32(1), __M, __W);
9428341825Sdim  _mm512_mask_reduce_operator(*);
9429314564Sdim}
9430314564Sdim
9431341825Sdimstatic __inline__ int __DEFAULT_FN_ATTRS512
9432314564Sdim_mm512_mask_reduce_and_epi32( __mmask16 __M, __m512i __W) {
9433341825Sdim  __W = _mm512_mask_mov_epi32(_mm512_set1_epi32(~0U), __M, __W);
9434341825Sdim  _mm512_mask_reduce_operator(&);
9435314564Sdim}
9436314564Sdim
9437341825Sdimstatic __inline__ int __DEFAULT_FN_ATTRS512
9438314564Sdim_mm512_mask_reduce_or_epi32(__mmask16 __M, __m512i __W) {
9439341825Sdim  __W = _mm512_maskz_mov_epi32(__M, __W);
9440341825Sdim  _mm512_mask_reduce_operator(|);
9441314564Sdim}
9442341825Sdim#undef _mm512_mask_reduce_operator
9443314564Sdim
9444341825Sdim#define _mm512_mask_reduce_operator(op) \
9445341825Sdim  __m256 __t1 = (__m256)_mm512_extractf64x4_pd((__m512d)__W, 0); \
9446341825Sdim  __m256 __t2 = (__m256)_mm512_extractf64x4_pd((__m512d)__W, 1); \
9447341825Sdim  __m256 __t3 = __t1 op __t2; \
9448341825Sdim  __m128 __t4 = _mm256_extractf128_ps(__t3, 0); \
9449341825Sdim  __m128 __t5 = _mm256_extractf128_ps(__t3, 1); \
9450341825Sdim  __m128 __t6 = __t4 op __t5; \
9451341825Sdim  __m128 __t7 = __builtin_shufflevector(__t6, __t6, 2, 3, 0, 1); \
9452341825Sdim  __m128 __t8 = __t6 op __t7; \
9453341825Sdim  __m128 __t9 = __builtin_shufflevector(__t8, __t8, 1, 0, 3, 2); \
9454341825Sdim  __m128 __t10 = __t8 op __t9; \
9455353358Sdim  return __t10[0]
9456341825Sdim
9457341825Sdimstatic __inline__ float __DEFAULT_FN_ATTRS512
9458341825Sdim_mm512_reduce_add_ps(__m512 __W) {
9459341825Sdim  _mm512_mask_reduce_operator(+);
9460341825Sdim}
9461341825Sdim
9462341825Sdimstatic __inline__ float __DEFAULT_FN_ATTRS512
9463341825Sdim_mm512_reduce_mul_ps(__m512 __W) {
9464341825Sdim  _mm512_mask_reduce_operator(*);
9465341825Sdim}
9466341825Sdim
9467341825Sdimstatic __inline__ float __DEFAULT_FN_ATTRS512
9468314564Sdim_mm512_mask_reduce_add_ps(__mmask16 __M, __m512 __W) {
9469341825Sdim  __W = _mm512_maskz_mov_ps(__M, __W);
9470341825Sdim  _mm512_mask_reduce_operator(+);
9471314564Sdim}
9472314564Sdim
9473341825Sdimstatic __inline__ float __DEFAULT_FN_ATTRS512
9474314564Sdim_mm512_mask_reduce_mul_ps(__mmask16 __M, __m512 __W) {
9475341825Sdim  __W = _mm512_mask_mov_ps(_mm512_set1_ps(1.0f), __M, __W);
9476341825Sdim  _mm512_mask_reduce_operator(*);
9477314564Sdim}
9478341825Sdim#undef _mm512_mask_reduce_operator
9479314564Sdim
9480341825Sdim#define _mm512_mask_reduce_operator(op) \
9481341825Sdim  __m512i __t1 = (__m512i)__builtin_shufflevector((__v8di)__V, (__v8di)__V, 4, 5, 6, 7, 0, 1, 2, 3); \
9482341825Sdim  __m512i __t2 = _mm512_##op(__V, __t1); \
9483341825Sdim  __m512i __t3 = (__m512i)__builtin_shufflevector((__v8di)__t2, (__v8di)__t2, 2, 3, 0, 1, 6, 7, 4, 5); \
9484341825Sdim  __m512i __t4 = _mm512_##op(__t2, __t3); \
9485341825Sdim  __m512i __t5 = (__m512i)__builtin_shufflevector((__v8di)__t4, (__v8di)__t4, 1, 0, 3, 2, 5, 4, 7, 6); \
9486341825Sdim  __v8di __t6 = (__v8di)_mm512_##op(__t4, __t5); \
9487353358Sdim  return __t6[0]
9488314564Sdim
9489341825Sdimstatic __inline__ long long __DEFAULT_FN_ATTRS512
9490314564Sdim_mm512_reduce_max_epi64(__m512i __V) {
9491341825Sdim  _mm512_mask_reduce_operator(max_epi64);
9492314564Sdim}
9493314564Sdim
9494341825Sdimstatic __inline__ unsigned long long __DEFAULT_FN_ATTRS512
9495314564Sdim_mm512_reduce_max_epu64(__m512i __V) {
9496341825Sdim  _mm512_mask_reduce_operator(max_epu64);
9497314564Sdim}
9498314564Sdim
9499341825Sdimstatic __inline__ long long __DEFAULT_FN_ATTRS512
9500341825Sdim_mm512_reduce_min_epi64(__m512i __V) {
9501341825Sdim  _mm512_mask_reduce_operator(min_epi64);
9502314564Sdim}
9503314564Sdim
9504341825Sdimstatic __inline__ unsigned long long __DEFAULT_FN_ATTRS512
9505314564Sdim_mm512_reduce_min_epu64(__m512i __V) {
9506341825Sdim  _mm512_mask_reduce_operator(min_epu64);
9507314564Sdim}
9508314564Sdim
9509341825Sdimstatic __inline__ long long __DEFAULT_FN_ATTRS512
9510314564Sdim_mm512_mask_reduce_max_epi64(__mmask8 __M, __m512i __V) {
9511341825Sdim  __V = _mm512_mask_mov_epi64(_mm512_set1_epi64(-__LONG_LONG_MAX__ - 1LL), __M, __V);
9512341825Sdim  _mm512_mask_reduce_operator(max_epi64);
9513314564Sdim}
9514314564Sdim
9515341825Sdimstatic __inline__ unsigned long long __DEFAULT_FN_ATTRS512
9516314564Sdim_mm512_mask_reduce_max_epu64(__mmask8 __M, __m512i __V) {
9517341825Sdim  __V = _mm512_maskz_mov_epi64(__M, __V);
9518341825Sdim  _mm512_mask_reduce_operator(max_epu64);
9519314564Sdim}
9520314564Sdim
9521341825Sdimstatic __inline__ long long __DEFAULT_FN_ATTRS512
9522314564Sdim_mm512_mask_reduce_min_epi64(__mmask8 __M, __m512i __V) {
9523341825Sdim  __V = _mm512_mask_mov_epi64(_mm512_set1_epi64(__LONG_LONG_MAX__), __M, __V);
9524341825Sdim  _mm512_mask_reduce_operator(min_epi64);
9525314564Sdim}
9526314564Sdim
9527341825Sdimstatic __inline__ unsigned long long __DEFAULT_FN_ATTRS512
9528314564Sdim_mm512_mask_reduce_min_epu64(__mmask8 __M, __m512i __V) {
9529341825Sdim  __V = _mm512_mask_mov_epi64(_mm512_set1_epi64(~0ULL), __M, __V);
9530341825Sdim  _mm512_mask_reduce_operator(min_epu64);
9531314564Sdim}
9532341825Sdim#undef _mm512_mask_reduce_operator
9533314564Sdim
9534341825Sdim#define _mm512_mask_reduce_operator(op) \
9535341825Sdim  __m256i __t1 = _mm512_extracti64x4_epi64(__V, 0); \
9536341825Sdim  __m256i __t2 = _mm512_extracti64x4_epi64(__V, 1); \
9537341825Sdim  __m256i __t3 = _mm256_##op(__t1, __t2); \
9538341825Sdim  __m128i __t4 = _mm256_extracti128_si256(__t3, 0); \
9539341825Sdim  __m128i __t5 = _mm256_extracti128_si256(__t3, 1); \
9540341825Sdim  __m128i __t6 = _mm_##op(__t4, __t5); \
9541341825Sdim  __m128i __t7 = (__m128i)__builtin_shufflevector((__v4si)__t6, (__v4si)__t6, 2, 3, 0, 1); \
9542341825Sdim  __m128i __t8 = _mm_##op(__t6, __t7); \
9543341825Sdim  __m128i __t9 = (__m128i)__builtin_shufflevector((__v4si)__t8, (__v4si)__t8, 1, 0, 3, 2); \
9544341825Sdim  __v4si __t10 = (__v4si)_mm_##op(__t8, __t9); \
9545353358Sdim  return __t10[0]
9546341825Sdim
9547341825Sdimstatic __inline__ int __DEFAULT_FN_ATTRS512
9548341825Sdim_mm512_reduce_max_epi32(__m512i __V) {
9549341825Sdim  _mm512_mask_reduce_operator(max_epi32);
9550314564Sdim}
9551314564Sdim
9552341825Sdimstatic __inline__ unsigned int __DEFAULT_FN_ATTRS512
9553341825Sdim_mm512_reduce_max_epu32(__m512i __V) {
9554341825Sdim  _mm512_mask_reduce_operator(max_epu32);
9555341825Sdim}
9556314564Sdim
9557341825Sdimstatic __inline__ int __DEFAULT_FN_ATTRS512
9558341825Sdim_mm512_reduce_min_epi32(__m512i __V) {
9559341825Sdim  _mm512_mask_reduce_operator(min_epi32);
9560314564Sdim}
9561314564Sdim
9562341825Sdimstatic __inline__ unsigned int __DEFAULT_FN_ATTRS512
9563341825Sdim_mm512_reduce_min_epu32(__m512i __V) {
9564341825Sdim  _mm512_mask_reduce_operator(min_epu32);
9565314564Sdim}
9566314564Sdim
9567341825Sdimstatic __inline__ int __DEFAULT_FN_ATTRS512
9568341825Sdim_mm512_mask_reduce_max_epi32(__mmask16 __M, __m512i __V) {
9569341825Sdim  __V = _mm512_mask_mov_epi32(_mm512_set1_epi32(-__INT_MAX__ - 1), __M, __V);
9570341825Sdim  _mm512_mask_reduce_operator(max_epi32);
9571314564Sdim}
9572314564Sdim
9573341825Sdimstatic __inline__ unsigned int __DEFAULT_FN_ATTRS512
9574341825Sdim_mm512_mask_reduce_max_epu32(__mmask16 __M, __m512i __V) {
9575341825Sdim  __V = _mm512_maskz_mov_epi32(__M, __V);
9576341825Sdim  _mm512_mask_reduce_operator(max_epu32);
9577314564Sdim}
9578314564Sdim
9579341825Sdimstatic __inline__ int __DEFAULT_FN_ATTRS512
9580341825Sdim_mm512_mask_reduce_min_epi32(__mmask16 __M, __m512i __V) {
9581341825Sdim  __V = _mm512_mask_mov_epi32(_mm512_set1_epi32(__INT_MAX__), __M, __V);
9582341825Sdim  _mm512_mask_reduce_operator(min_epi32);
9583314564Sdim}
9584314564Sdim
9585341825Sdimstatic __inline__ unsigned int __DEFAULT_FN_ATTRS512
9586341825Sdim_mm512_mask_reduce_min_epu32(__mmask16 __M, __m512i __V) {
9587341825Sdim  __V = _mm512_mask_mov_epi32(_mm512_set1_epi32(~0U), __M, __V);
9588341825Sdim  _mm512_mask_reduce_operator(min_epu32);
9589314564Sdim}
9590341825Sdim#undef _mm512_mask_reduce_operator
9591314564Sdim
9592341825Sdim#define _mm512_mask_reduce_operator(op) \
9593341825Sdim  __m256d __t1 = _mm512_extractf64x4_pd(__V, 0); \
9594341825Sdim  __m256d __t2 = _mm512_extractf64x4_pd(__V, 1); \
9595341825Sdim  __m256d __t3 = _mm256_##op(__t1, __t2); \
9596341825Sdim  __m128d __t4 = _mm256_extractf128_pd(__t3, 0); \
9597341825Sdim  __m128d __t5 = _mm256_extractf128_pd(__t3, 1); \
9598341825Sdim  __m128d __t6 = _mm_##op(__t4, __t5); \
9599341825Sdim  __m128d __t7 = __builtin_shufflevector(__t6, __t6, 1, 0); \
9600341825Sdim  __m128d __t8 = _mm_##op(__t6, __t7); \
9601353358Sdim  return __t8[0]
9602314564Sdim
9603341825Sdimstatic __inline__ double __DEFAULT_FN_ATTRS512
9604341825Sdim_mm512_reduce_max_pd(__m512d __V) {
9605341825Sdim  _mm512_mask_reduce_operator(max_pd);
9606341825Sdim}
9607314564Sdim
9608341825Sdimstatic __inline__ double __DEFAULT_FN_ATTRS512
9609341825Sdim_mm512_reduce_min_pd(__m512d __V) {
9610341825Sdim  _mm512_mask_reduce_operator(min_pd);
9611314564Sdim}
9612314564Sdim
9613341825Sdimstatic __inline__ double __DEFAULT_FN_ATTRS512
9614341825Sdim_mm512_mask_reduce_max_pd(__mmask8 __M, __m512d __V) {
9615341825Sdim  __V = _mm512_mask_mov_pd(_mm512_set1_pd(-__builtin_inf()), __M, __V);
9616341825Sdim  _mm512_mask_reduce_operator(max_pd);
9617314564Sdim}
9618314564Sdim
9619341825Sdimstatic __inline__ double __DEFAULT_FN_ATTRS512
9620341825Sdim_mm512_mask_reduce_min_pd(__mmask8 __M, __m512d __V) {
9621341825Sdim  __V = _mm512_mask_mov_pd(_mm512_set1_pd(__builtin_inf()), __M, __V);
9622341825Sdim  _mm512_mask_reduce_operator(min_pd);
9623314564Sdim}
9624341825Sdim#undef _mm512_mask_reduce_operator
9625314564Sdim
9626341825Sdim#define _mm512_mask_reduce_operator(op) \
9627341825Sdim  __m256 __t1 = (__m256)_mm512_extractf64x4_pd((__m512d)__V, 0); \
9628341825Sdim  __m256 __t2 = (__m256)_mm512_extractf64x4_pd((__m512d)__V, 1); \
9629341825Sdim  __m256 __t3 = _mm256_##op(__t1, __t2); \
9630341825Sdim  __m128 __t4 = _mm256_extractf128_ps(__t3, 0); \
9631341825Sdim  __m128 __t5 = _mm256_extractf128_ps(__t3, 1); \
9632341825Sdim  __m128 __t6 = _mm_##op(__t4, __t5); \
9633341825Sdim  __m128 __t7 = __builtin_shufflevector(__t6, __t6, 2, 3, 0, 1); \
9634341825Sdim  __m128 __t8 = _mm_##op(__t6, __t7); \
9635341825Sdim  __m128 __t9 = __builtin_shufflevector(__t8, __t8, 1, 0, 3, 2); \
9636341825Sdim  __m128 __t10 = _mm_##op(__t8, __t9); \
9637353358Sdim  return __t10[0]
9638341825Sdim
9639341825Sdimstatic __inline__ float __DEFAULT_FN_ATTRS512
9640341825Sdim_mm512_reduce_max_ps(__m512 __V) {
9641341825Sdim  _mm512_mask_reduce_operator(max_ps);
9642314564Sdim}
9643314564Sdim
9644341825Sdimstatic __inline__ float __DEFAULT_FN_ATTRS512
9645341825Sdim_mm512_reduce_min_ps(__m512 __V) {
9646341825Sdim  _mm512_mask_reduce_operator(min_ps);
9647314564Sdim}
9648314564Sdim
9649341825Sdimstatic __inline__ float __DEFAULT_FN_ATTRS512
9650341825Sdim_mm512_mask_reduce_max_ps(__mmask16 __M, __m512 __V) {
9651341825Sdim  __V = _mm512_mask_mov_ps(_mm512_set1_ps(-__builtin_inff()), __M, __V);
9652341825Sdim  _mm512_mask_reduce_operator(max_ps);
9653341825Sdim}
9654341825Sdim
9655341825Sdimstatic __inline__ float __DEFAULT_FN_ATTRS512
9656314564Sdim_mm512_mask_reduce_min_ps(__mmask16 __M, __m512 __V) {
9657341825Sdim  __V = _mm512_mask_mov_ps(_mm512_set1_ps(__builtin_inff()), __M, __V);
9658341825Sdim  _mm512_mask_reduce_operator(min_ps);
9659314564Sdim}
9660341825Sdim#undef _mm512_mask_reduce_operator
9661314564Sdim
9662360784Sdim/// Moves the least significant 32 bits of a vector of [16 x i32] to a
9663360784Sdim///    32-bit signed integer value.
9664360784Sdim///
9665360784Sdim/// \headerfile <x86intrin.h>
9666360784Sdim///
9667360784Sdim/// This intrinsic corresponds to the <c> VMOVD / MOVD </c> instruction.
9668360784Sdim///
9669360784Sdim/// \param __A
9670360784Sdim///    A vector of [16 x i32]. The least significant 32 bits are moved to the
9671360784Sdim///    destination.
9672360784Sdim/// \returns A 32-bit signed integer containing the moved value.
9673360784Sdimstatic __inline__ int __DEFAULT_FN_ATTRS512
9674360784Sdim_mm512_cvtsi512_si32(__m512i __A) {
9675360784Sdim  __v16si __b = (__v16si)__A;
9676360784Sdim  return __b[0];
9677360784Sdim}
9678360784Sdim
9679341825Sdim#undef __DEFAULT_FN_ATTRS512
9680341825Sdim#undef __DEFAULT_FN_ATTRS128
9681344779Sdim#undef __DEFAULT_FN_ATTRS
9682288943Sdim
9683341825Sdim#endif /* __AVX512FINTRIN_H */
9684