1/*===---- avx512fintrin.h - AVX512F intrinsics -----------------------------===
2 *
3 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 * See https://llvm.org/LICENSE.txt for license information.
5 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 *
7 *===-----------------------------------------------------------------------===
8 */
9#ifndef __IMMINTRIN_H
10#error "Never use <avx512fintrin.h> directly; include <immintrin.h> instead."
11#endif
12
13#ifndef __AVX512FINTRIN_H
14#define __AVX512FINTRIN_H
15
16typedef char __v64qi __attribute__((__vector_size__(64)));
17typedef short __v32hi __attribute__((__vector_size__(64)));
18typedef double __v8df __attribute__((__vector_size__(64)));
19typedef float __v16sf __attribute__((__vector_size__(64)));
20typedef long long __v8di __attribute__((__vector_size__(64)));
21typedef int __v16si __attribute__((__vector_size__(64)));
22
23/* Unsigned types */
24typedef unsigned char __v64qu __attribute__((__vector_size__(64)));
25typedef unsigned short __v32hu __attribute__((__vector_size__(64)));
26typedef unsigned long long __v8du __attribute__((__vector_size__(64)));
27typedef unsigned int __v16su __attribute__((__vector_size__(64)));
28
29typedef float __m512 __attribute__((__vector_size__(64), __aligned__(64)));
30typedef double __m512d __attribute__((__vector_size__(64), __aligned__(64)));
31typedef long long __m512i __attribute__((__vector_size__(64), __aligned__(64)));
32
33typedef float __m512_u __attribute__((__vector_size__(64), __aligned__(1)));
34typedef double __m512d_u __attribute__((__vector_size__(64), __aligned__(1)));
35typedef long long __m512i_u __attribute__((__vector_size__(64), __aligned__(1)));
36
37typedef unsigned char __mmask8;
38typedef unsigned short __mmask16;
39
40/* Rounding mode macros.  */
41#define _MM_FROUND_TO_NEAREST_INT   0x00
42#define _MM_FROUND_TO_NEG_INF       0x01
43#define _MM_FROUND_TO_POS_INF       0x02
44#define _MM_FROUND_TO_ZERO          0x03
45#define _MM_FROUND_CUR_DIRECTION    0x04
46
47/* Constants for integer comparison predicates */
48typedef enum {
49    _MM_CMPINT_EQ,      /* Equal */
50    _MM_CMPINT_LT,      /* Less than */
51    _MM_CMPINT_LE,      /* Less than or Equal */
52    _MM_CMPINT_UNUSED,
53    _MM_CMPINT_NE,      /* Not Equal */
54    _MM_CMPINT_NLT,     /* Not Less than */
55#define _MM_CMPINT_GE   _MM_CMPINT_NLT  /* Greater than or Equal */
56    _MM_CMPINT_NLE      /* Not Less than or Equal */
57#define _MM_CMPINT_GT   _MM_CMPINT_NLE  /* Greater than */
58} _MM_CMPINT_ENUM;
59
60typedef enum
61{
62  _MM_PERM_AAAA = 0x00, _MM_PERM_AAAB = 0x01, _MM_PERM_AAAC = 0x02,
63  _MM_PERM_AAAD = 0x03, _MM_PERM_AABA = 0x04, _MM_PERM_AABB = 0x05,
64  _MM_PERM_AABC = 0x06, _MM_PERM_AABD = 0x07, _MM_PERM_AACA = 0x08,
65  _MM_PERM_AACB = 0x09, _MM_PERM_AACC = 0x0A, _MM_PERM_AACD = 0x0B,
66  _MM_PERM_AADA = 0x0C, _MM_PERM_AADB = 0x0D, _MM_PERM_AADC = 0x0E,
67  _MM_PERM_AADD = 0x0F, _MM_PERM_ABAA = 0x10, _MM_PERM_ABAB = 0x11,
68  _MM_PERM_ABAC = 0x12, _MM_PERM_ABAD = 0x13, _MM_PERM_ABBA = 0x14,
69  _MM_PERM_ABBB = 0x15, _MM_PERM_ABBC = 0x16, _MM_PERM_ABBD = 0x17,
70  _MM_PERM_ABCA = 0x18, _MM_PERM_ABCB = 0x19, _MM_PERM_ABCC = 0x1A,
71  _MM_PERM_ABCD = 0x1B, _MM_PERM_ABDA = 0x1C, _MM_PERM_ABDB = 0x1D,
72  _MM_PERM_ABDC = 0x1E, _MM_PERM_ABDD = 0x1F, _MM_PERM_ACAA = 0x20,
73  _MM_PERM_ACAB = 0x21, _MM_PERM_ACAC = 0x22, _MM_PERM_ACAD = 0x23,
74  _MM_PERM_ACBA = 0x24, _MM_PERM_ACBB = 0x25, _MM_PERM_ACBC = 0x26,
75  _MM_PERM_ACBD = 0x27, _MM_PERM_ACCA = 0x28, _MM_PERM_ACCB = 0x29,
76  _MM_PERM_ACCC = 0x2A, _MM_PERM_ACCD = 0x2B, _MM_PERM_ACDA = 0x2C,
77  _MM_PERM_ACDB = 0x2D, _MM_PERM_ACDC = 0x2E, _MM_PERM_ACDD = 0x2F,
78  _MM_PERM_ADAA = 0x30, _MM_PERM_ADAB = 0x31, _MM_PERM_ADAC = 0x32,
79  _MM_PERM_ADAD = 0x33, _MM_PERM_ADBA = 0x34, _MM_PERM_ADBB = 0x35,
80  _MM_PERM_ADBC = 0x36, _MM_PERM_ADBD = 0x37, _MM_PERM_ADCA = 0x38,
81  _MM_PERM_ADCB = 0x39, _MM_PERM_ADCC = 0x3A, _MM_PERM_ADCD = 0x3B,
82  _MM_PERM_ADDA = 0x3C, _MM_PERM_ADDB = 0x3D, _MM_PERM_ADDC = 0x3E,
83  _MM_PERM_ADDD = 0x3F, _MM_PERM_BAAA = 0x40, _MM_PERM_BAAB = 0x41,
84  _MM_PERM_BAAC = 0x42, _MM_PERM_BAAD = 0x43, _MM_PERM_BABA = 0x44,
85  _MM_PERM_BABB = 0x45, _MM_PERM_BABC = 0x46, _MM_PERM_BABD = 0x47,
86  _MM_PERM_BACA = 0x48, _MM_PERM_BACB = 0x49, _MM_PERM_BACC = 0x4A,
87  _MM_PERM_BACD = 0x4B, _MM_PERM_BADA = 0x4C, _MM_PERM_BADB = 0x4D,
88  _MM_PERM_BADC = 0x4E, _MM_PERM_BADD = 0x4F, _MM_PERM_BBAA = 0x50,
89  _MM_PERM_BBAB = 0x51, _MM_PERM_BBAC = 0x52, _MM_PERM_BBAD = 0x53,
90  _MM_PERM_BBBA = 0x54, _MM_PERM_BBBB = 0x55, _MM_PERM_BBBC = 0x56,
91  _MM_PERM_BBBD = 0x57, _MM_PERM_BBCA = 0x58, _MM_PERM_BBCB = 0x59,
92  _MM_PERM_BBCC = 0x5A, _MM_PERM_BBCD = 0x5B, _MM_PERM_BBDA = 0x5C,
93  _MM_PERM_BBDB = 0x5D, _MM_PERM_BBDC = 0x5E, _MM_PERM_BBDD = 0x5F,
94  _MM_PERM_BCAA = 0x60, _MM_PERM_BCAB = 0x61, _MM_PERM_BCAC = 0x62,
95  _MM_PERM_BCAD = 0x63, _MM_PERM_BCBA = 0x64, _MM_PERM_BCBB = 0x65,
96  _MM_PERM_BCBC = 0x66, _MM_PERM_BCBD = 0x67, _MM_PERM_BCCA = 0x68,
97  _MM_PERM_BCCB = 0x69, _MM_PERM_BCCC = 0x6A, _MM_PERM_BCCD = 0x6B,
98  _MM_PERM_BCDA = 0x6C, _MM_PERM_BCDB = 0x6D, _MM_PERM_BCDC = 0x6E,
99  _MM_PERM_BCDD = 0x6F, _MM_PERM_BDAA = 0x70, _MM_PERM_BDAB = 0x71,
100  _MM_PERM_BDAC = 0x72, _MM_PERM_BDAD = 0x73, _MM_PERM_BDBA = 0x74,
101  _MM_PERM_BDBB = 0x75, _MM_PERM_BDBC = 0x76, _MM_PERM_BDBD = 0x77,
102  _MM_PERM_BDCA = 0x78, _MM_PERM_BDCB = 0x79, _MM_PERM_BDCC = 0x7A,
103  _MM_PERM_BDCD = 0x7B, _MM_PERM_BDDA = 0x7C, _MM_PERM_BDDB = 0x7D,
104  _MM_PERM_BDDC = 0x7E, _MM_PERM_BDDD = 0x7F, _MM_PERM_CAAA = 0x80,
105  _MM_PERM_CAAB = 0x81, _MM_PERM_CAAC = 0x82, _MM_PERM_CAAD = 0x83,
106  _MM_PERM_CABA = 0x84, _MM_PERM_CABB = 0x85, _MM_PERM_CABC = 0x86,
107  _MM_PERM_CABD = 0x87, _MM_PERM_CACA = 0x88, _MM_PERM_CACB = 0x89,
108  _MM_PERM_CACC = 0x8A, _MM_PERM_CACD = 0x8B, _MM_PERM_CADA = 0x8C,
109  _MM_PERM_CADB = 0x8D, _MM_PERM_CADC = 0x8E, _MM_PERM_CADD = 0x8F,
110  _MM_PERM_CBAA = 0x90, _MM_PERM_CBAB = 0x91, _MM_PERM_CBAC = 0x92,
111  _MM_PERM_CBAD = 0x93, _MM_PERM_CBBA = 0x94, _MM_PERM_CBBB = 0x95,
112  _MM_PERM_CBBC = 0x96, _MM_PERM_CBBD = 0x97, _MM_PERM_CBCA = 0x98,
113  _MM_PERM_CBCB = 0x99, _MM_PERM_CBCC = 0x9A, _MM_PERM_CBCD = 0x9B,
114  _MM_PERM_CBDA = 0x9C, _MM_PERM_CBDB = 0x9D, _MM_PERM_CBDC = 0x9E,
115  _MM_PERM_CBDD = 0x9F, _MM_PERM_CCAA = 0xA0, _MM_PERM_CCAB = 0xA1,
116  _MM_PERM_CCAC = 0xA2, _MM_PERM_CCAD = 0xA3, _MM_PERM_CCBA = 0xA4,
117  _MM_PERM_CCBB = 0xA5, _MM_PERM_CCBC = 0xA6, _MM_PERM_CCBD = 0xA7,
118  _MM_PERM_CCCA = 0xA8, _MM_PERM_CCCB = 0xA9, _MM_PERM_CCCC = 0xAA,
119  _MM_PERM_CCCD = 0xAB, _MM_PERM_CCDA = 0xAC, _MM_PERM_CCDB = 0xAD,
120  _MM_PERM_CCDC = 0xAE, _MM_PERM_CCDD = 0xAF, _MM_PERM_CDAA = 0xB0,
121  _MM_PERM_CDAB = 0xB1, _MM_PERM_CDAC = 0xB2, _MM_PERM_CDAD = 0xB3,
122  _MM_PERM_CDBA = 0xB4, _MM_PERM_CDBB = 0xB5, _MM_PERM_CDBC = 0xB6,
123  _MM_PERM_CDBD = 0xB7, _MM_PERM_CDCA = 0xB8, _MM_PERM_CDCB = 0xB9,
124  _MM_PERM_CDCC = 0xBA, _MM_PERM_CDCD = 0xBB, _MM_PERM_CDDA = 0xBC,
125  _MM_PERM_CDDB = 0xBD, _MM_PERM_CDDC = 0xBE, _MM_PERM_CDDD = 0xBF,
126  _MM_PERM_DAAA = 0xC0, _MM_PERM_DAAB = 0xC1, _MM_PERM_DAAC = 0xC2,
127  _MM_PERM_DAAD = 0xC3, _MM_PERM_DABA = 0xC4, _MM_PERM_DABB = 0xC5,
128  _MM_PERM_DABC = 0xC6, _MM_PERM_DABD = 0xC7, _MM_PERM_DACA = 0xC8,
129  _MM_PERM_DACB = 0xC9, _MM_PERM_DACC = 0xCA, _MM_PERM_DACD = 0xCB,
130  _MM_PERM_DADA = 0xCC, _MM_PERM_DADB = 0xCD, _MM_PERM_DADC = 0xCE,
131  _MM_PERM_DADD = 0xCF, _MM_PERM_DBAA = 0xD0, _MM_PERM_DBAB = 0xD1,
132  _MM_PERM_DBAC = 0xD2, _MM_PERM_DBAD = 0xD3, _MM_PERM_DBBA = 0xD4,
133  _MM_PERM_DBBB = 0xD5, _MM_PERM_DBBC = 0xD6, _MM_PERM_DBBD = 0xD7,
134  _MM_PERM_DBCA = 0xD8, _MM_PERM_DBCB = 0xD9, _MM_PERM_DBCC = 0xDA,
135  _MM_PERM_DBCD = 0xDB, _MM_PERM_DBDA = 0xDC, _MM_PERM_DBDB = 0xDD,
136  _MM_PERM_DBDC = 0xDE, _MM_PERM_DBDD = 0xDF, _MM_PERM_DCAA = 0xE0,
137  _MM_PERM_DCAB = 0xE1, _MM_PERM_DCAC = 0xE2, _MM_PERM_DCAD = 0xE3,
138  _MM_PERM_DCBA = 0xE4, _MM_PERM_DCBB = 0xE5, _MM_PERM_DCBC = 0xE6,
139  _MM_PERM_DCBD = 0xE7, _MM_PERM_DCCA = 0xE8, _MM_PERM_DCCB = 0xE9,
140  _MM_PERM_DCCC = 0xEA, _MM_PERM_DCCD = 0xEB, _MM_PERM_DCDA = 0xEC,
141  _MM_PERM_DCDB = 0xED, _MM_PERM_DCDC = 0xEE, _MM_PERM_DCDD = 0xEF,
142  _MM_PERM_DDAA = 0xF0, _MM_PERM_DDAB = 0xF1, _MM_PERM_DDAC = 0xF2,
143  _MM_PERM_DDAD = 0xF3, _MM_PERM_DDBA = 0xF4, _MM_PERM_DDBB = 0xF5,
144  _MM_PERM_DDBC = 0xF6, _MM_PERM_DDBD = 0xF7, _MM_PERM_DDCA = 0xF8,
145  _MM_PERM_DDCB = 0xF9, _MM_PERM_DDCC = 0xFA, _MM_PERM_DDCD = 0xFB,
146  _MM_PERM_DDDA = 0xFC, _MM_PERM_DDDB = 0xFD, _MM_PERM_DDDC = 0xFE,
147  _MM_PERM_DDDD = 0xFF
148} _MM_PERM_ENUM;
149
150typedef enum
151{
152  _MM_MANT_NORM_1_2,    /* interval [1, 2)      */
153  _MM_MANT_NORM_p5_2,   /* interval [0.5, 2)    */
154  _MM_MANT_NORM_p5_1,   /* interval [0.5, 1)    */
155  _MM_MANT_NORM_p75_1p5   /* interval [0.75, 1.5) */
156} _MM_MANTISSA_NORM_ENUM;
157
158typedef enum
159{
160  _MM_MANT_SIGN_src,    /* sign = sign(SRC)     */
161  _MM_MANT_SIGN_zero,   /* sign = 0             */
162  _MM_MANT_SIGN_nan   /* DEST = NaN if sign(SRC) = 1 */
163} _MM_MANTISSA_SIGN_ENUM;
164
165/* Define the default attributes for the functions in this file. */
166#define __DEFAULT_FN_ATTRS512 __attribute__((__always_inline__, __nodebug__, __target__("avx512f"), __min_vector_width__(512)))
167#define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("avx512f"), __min_vector_width__(128)))
168#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512f")))
169
170/* Create vectors with repeated elements */
171
172static  __inline __m512i __DEFAULT_FN_ATTRS512
173_mm512_setzero_si512(void)
174{
175  return __extension__ (__m512i)(__v8di){ 0, 0, 0, 0, 0, 0, 0, 0 };
176}
177
178#define _mm512_setzero_epi32 _mm512_setzero_si512
179
180static __inline__ __m512d __DEFAULT_FN_ATTRS512
181_mm512_undefined_pd(void)
182{
183  return (__m512d)__builtin_ia32_undef512();
184}
185
186static __inline__ __m512 __DEFAULT_FN_ATTRS512
187_mm512_undefined(void)
188{
189  return (__m512)__builtin_ia32_undef512();
190}
191
192static __inline__ __m512 __DEFAULT_FN_ATTRS512
193_mm512_undefined_ps(void)
194{
195  return (__m512)__builtin_ia32_undef512();
196}
197
198static __inline__ __m512i __DEFAULT_FN_ATTRS512
199_mm512_undefined_epi32(void)
200{
201  return (__m512i)__builtin_ia32_undef512();
202}
203
204static __inline__ __m512i __DEFAULT_FN_ATTRS512
205_mm512_broadcastd_epi32 (__m128i __A)
206{
207  return (__m512i)__builtin_shufflevector((__v4si) __A, (__v4si) __A,
208                                          0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
209}
210
211static __inline__ __m512i __DEFAULT_FN_ATTRS512
212_mm512_mask_broadcastd_epi32 (__m512i __O, __mmask16 __M, __m128i __A)
213{
214  return (__m512i)__builtin_ia32_selectd_512(__M,
215                                             (__v16si) _mm512_broadcastd_epi32(__A),
216                                             (__v16si) __O);
217}
218
219static __inline__ __m512i __DEFAULT_FN_ATTRS512
220_mm512_maskz_broadcastd_epi32 (__mmask16 __M, __m128i __A)
221{
222  return (__m512i)__builtin_ia32_selectd_512(__M,
223                                             (__v16si) _mm512_broadcastd_epi32(__A),
224                                             (__v16si) _mm512_setzero_si512());
225}
226
227static __inline__ __m512i __DEFAULT_FN_ATTRS512
228_mm512_broadcastq_epi64 (__m128i __A)
229{
230  return (__m512i)__builtin_shufflevector((__v2di) __A, (__v2di) __A,
231                                          0, 0, 0, 0, 0, 0, 0, 0);
232}
233
234static __inline__ __m512i __DEFAULT_FN_ATTRS512
235_mm512_mask_broadcastq_epi64 (__m512i __O, __mmask8 __M, __m128i __A)
236{
237  return (__m512i)__builtin_ia32_selectq_512(__M,
238                                             (__v8di) _mm512_broadcastq_epi64(__A),
239                                             (__v8di) __O);
240
241}
242
243static __inline__ __m512i __DEFAULT_FN_ATTRS512
244_mm512_maskz_broadcastq_epi64 (__mmask8 __M, __m128i __A)
245{
246  return (__m512i)__builtin_ia32_selectq_512(__M,
247                                             (__v8di) _mm512_broadcastq_epi64(__A),
248                                             (__v8di) _mm512_setzero_si512());
249}
250
251
252static __inline __m512 __DEFAULT_FN_ATTRS512
253_mm512_setzero_ps(void)
254{
255  return __extension__ (__m512){ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
256                                 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 };
257}
258
259#define _mm512_setzero _mm512_setzero_ps
260
261static  __inline __m512d __DEFAULT_FN_ATTRS512
262_mm512_setzero_pd(void)
263{
264  return __extension__ (__m512d){ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 };
265}
266
267static __inline __m512 __DEFAULT_FN_ATTRS512
268_mm512_set1_ps(float __w)
269{
270  return __extension__ (__m512){ __w, __w, __w, __w, __w, __w, __w, __w,
271                                 __w, __w, __w, __w, __w, __w, __w, __w  };
272}
273
274static __inline __m512d __DEFAULT_FN_ATTRS512
275_mm512_set1_pd(double __w)
276{
277  return __extension__ (__m512d){ __w, __w, __w, __w, __w, __w, __w, __w };
278}
279
280static __inline __m512i __DEFAULT_FN_ATTRS512
281_mm512_set1_epi8(char __w)
282{
283  return __extension__ (__m512i)(__v64qi){
284    __w, __w, __w, __w, __w, __w, __w, __w,
285    __w, __w, __w, __w, __w, __w, __w, __w,
286    __w, __w, __w, __w, __w, __w, __w, __w,
287    __w, __w, __w, __w, __w, __w, __w, __w,
288    __w, __w, __w, __w, __w, __w, __w, __w,
289    __w, __w, __w, __w, __w, __w, __w, __w,
290    __w, __w, __w, __w, __w, __w, __w, __w,
291    __w, __w, __w, __w, __w, __w, __w, __w  };
292}
293
294static __inline __m512i __DEFAULT_FN_ATTRS512
295_mm512_set1_epi16(short __w)
296{
297  return __extension__ (__m512i)(__v32hi){
298    __w, __w, __w, __w, __w, __w, __w, __w,
299    __w, __w, __w, __w, __w, __w, __w, __w,
300    __w, __w, __w, __w, __w, __w, __w, __w,
301    __w, __w, __w, __w, __w, __w, __w, __w };
302}
303
304static __inline __m512i __DEFAULT_FN_ATTRS512
305_mm512_set1_epi32(int __s)
306{
307  return __extension__ (__m512i)(__v16si){
308    __s, __s, __s, __s, __s, __s, __s, __s,
309    __s, __s, __s, __s, __s, __s, __s, __s };
310}
311
312static __inline __m512i __DEFAULT_FN_ATTRS512
313_mm512_maskz_set1_epi32(__mmask16 __M, int __A)
314{
315  return (__m512i)__builtin_ia32_selectd_512(__M,
316                                             (__v16si)_mm512_set1_epi32(__A),
317                                             (__v16si)_mm512_setzero_si512());
318}
319
320static __inline __m512i __DEFAULT_FN_ATTRS512
321_mm512_set1_epi64(long long __d)
322{
323  return __extension__(__m512i)(__v8di){ __d, __d, __d, __d, __d, __d, __d, __d };
324}
325
326static __inline __m512i __DEFAULT_FN_ATTRS512
327_mm512_maskz_set1_epi64(__mmask8 __M, long long __A)
328{
329  return (__m512i)__builtin_ia32_selectq_512(__M,
330                                             (__v8di)_mm512_set1_epi64(__A),
331                                             (__v8di)_mm512_setzero_si512());
332}
333
334static __inline__ __m512 __DEFAULT_FN_ATTRS512
335_mm512_broadcastss_ps(__m128 __A)
336{
337  return (__m512)__builtin_shufflevector((__v4sf) __A, (__v4sf) __A,
338                                         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
339}
340
341static __inline __m512i __DEFAULT_FN_ATTRS512
342_mm512_set4_epi32 (int __A, int __B, int __C, int __D)
343{
344  return __extension__ (__m512i)(__v16si)
345   { __D, __C, __B, __A, __D, __C, __B, __A,
346     __D, __C, __B, __A, __D, __C, __B, __A };
347}
348
349static __inline __m512i __DEFAULT_FN_ATTRS512
350_mm512_set4_epi64 (long long __A, long long __B, long long __C,
351       long long __D)
352{
353  return __extension__ (__m512i) (__v8di)
354   { __D, __C, __B, __A, __D, __C, __B, __A };
355}
356
357static __inline __m512d __DEFAULT_FN_ATTRS512
358_mm512_set4_pd (double __A, double __B, double __C, double __D)
359{
360  return __extension__ (__m512d)
361   { __D, __C, __B, __A, __D, __C, __B, __A };
362}
363
364static __inline __m512 __DEFAULT_FN_ATTRS512
365_mm512_set4_ps (float __A, float __B, float __C, float __D)
366{
367  return __extension__ (__m512)
368   { __D, __C, __B, __A, __D, __C, __B, __A,
369     __D, __C, __B, __A, __D, __C, __B, __A };
370}
371
372#define _mm512_setr4_epi32(e0,e1,e2,e3)               \
373  _mm512_set4_epi32((e3),(e2),(e1),(e0))
374
375#define _mm512_setr4_epi64(e0,e1,e2,e3)               \
376  _mm512_set4_epi64((e3),(e2),(e1),(e0))
377
378#define _mm512_setr4_pd(e0,e1,e2,e3)                \
379  _mm512_set4_pd((e3),(e2),(e1),(e0))
380
381#define _mm512_setr4_ps(e0,e1,e2,e3)                \
382  _mm512_set4_ps((e3),(e2),(e1),(e0))
383
384static __inline__ __m512d __DEFAULT_FN_ATTRS512
385_mm512_broadcastsd_pd(__m128d __A)
386{
387  return (__m512d)__builtin_shufflevector((__v2df) __A, (__v2df) __A,
388                                          0, 0, 0, 0, 0, 0, 0, 0);
389}
390
391/* Cast between vector types */
392
393static __inline __m512d __DEFAULT_FN_ATTRS512
394_mm512_castpd256_pd512(__m256d __a)
395{
396  return __builtin_shufflevector(__a, __a, 0, 1, 2, 3, -1, -1, -1, -1);
397}
398
399static __inline __m512 __DEFAULT_FN_ATTRS512
400_mm512_castps256_ps512(__m256 __a)
401{
402  return __builtin_shufflevector(__a, __a, 0,  1,  2,  3,  4,  5,  6,  7,
403                                          -1, -1, -1, -1, -1, -1, -1, -1);
404}
405
406static __inline __m128d __DEFAULT_FN_ATTRS512
407_mm512_castpd512_pd128(__m512d __a)
408{
409  return __builtin_shufflevector(__a, __a, 0, 1);
410}
411
412static __inline __m256d __DEFAULT_FN_ATTRS512
413_mm512_castpd512_pd256 (__m512d __A)
414{
415  return __builtin_shufflevector(__A, __A, 0, 1, 2, 3);
416}
417
418static __inline __m128 __DEFAULT_FN_ATTRS512
419_mm512_castps512_ps128(__m512 __a)
420{
421  return __builtin_shufflevector(__a, __a, 0, 1, 2, 3);
422}
423
424static __inline __m256 __DEFAULT_FN_ATTRS512
425_mm512_castps512_ps256 (__m512 __A)
426{
427  return __builtin_shufflevector(__A, __A, 0, 1, 2, 3, 4, 5, 6, 7);
428}
429
430static __inline __m512 __DEFAULT_FN_ATTRS512
431_mm512_castpd_ps (__m512d __A)
432{
433  return (__m512) (__A);
434}
435
436static __inline __m512i __DEFAULT_FN_ATTRS512
437_mm512_castpd_si512 (__m512d __A)
438{
439  return (__m512i) (__A);
440}
441
442static __inline__ __m512d __DEFAULT_FN_ATTRS512
443_mm512_castpd128_pd512 (__m128d __A)
444{
445  return __builtin_shufflevector( __A, __A, 0, 1, -1, -1, -1, -1, -1, -1);
446}
447
448static __inline __m512d __DEFAULT_FN_ATTRS512
449_mm512_castps_pd (__m512 __A)
450{
451  return (__m512d) (__A);
452}
453
454static __inline __m512i __DEFAULT_FN_ATTRS512
455_mm512_castps_si512 (__m512 __A)
456{
457  return (__m512i) (__A);
458}
459
460static __inline__ __m512 __DEFAULT_FN_ATTRS512
461_mm512_castps128_ps512 (__m128 __A)
462{
463    return  __builtin_shufflevector( __A, __A, 0, 1, 2, 3, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1);
464}
465
466static __inline__ __m512i __DEFAULT_FN_ATTRS512
467_mm512_castsi128_si512 (__m128i __A)
468{
469   return  __builtin_shufflevector( __A, __A, 0, 1, -1, -1, -1, -1, -1, -1);
470}
471
472static __inline__ __m512i __DEFAULT_FN_ATTRS512
473_mm512_castsi256_si512 (__m256i __A)
474{
475   return  __builtin_shufflevector( __A, __A, 0, 1, 2, 3, -1, -1, -1, -1);
476}
477
478static __inline __m512 __DEFAULT_FN_ATTRS512
479_mm512_castsi512_ps (__m512i __A)
480{
481  return (__m512) (__A);
482}
483
484static __inline __m512d __DEFAULT_FN_ATTRS512
485_mm512_castsi512_pd (__m512i __A)
486{
487  return (__m512d) (__A);
488}
489
490static __inline __m128i __DEFAULT_FN_ATTRS512
491_mm512_castsi512_si128 (__m512i __A)
492{
493  return (__m128i)__builtin_shufflevector(__A, __A , 0, 1);
494}
495
496static __inline __m256i __DEFAULT_FN_ATTRS512
497_mm512_castsi512_si256 (__m512i __A)
498{
499  return (__m256i)__builtin_shufflevector(__A, __A , 0, 1, 2, 3);
500}
501
502static __inline__ __mmask16 __DEFAULT_FN_ATTRS
503_mm512_int2mask(int __a)
504{
505  return (__mmask16)__a;
506}
507
508static __inline__ int __DEFAULT_FN_ATTRS
509_mm512_mask2int(__mmask16 __a)
510{
511  return (int)__a;
512}
513
514/// Constructs a 512-bit floating-point vector of [8 x double] from a
515///    128-bit floating-point vector of [2 x double]. The lower 128 bits
516///    contain the value of the source vector. The upper 384 bits are set
517///    to zero.
518///
519/// \headerfile <x86intrin.h>
520///
521/// This intrinsic has no corresponding instruction.
522///
523/// \param __a
524///    A 128-bit vector of [2 x double].
525/// \returns A 512-bit floating-point vector of [8 x double]. The lower 128 bits
526///    contain the value of the parameter. The upper 384 bits are set to zero.
527static __inline __m512d __DEFAULT_FN_ATTRS512
528_mm512_zextpd128_pd512(__m128d __a)
529{
530  return __builtin_shufflevector((__v2df)__a, (__v2df)_mm_setzero_pd(), 0, 1, 2, 3, 2, 3, 2, 3);
531}
532
533/// Constructs a 512-bit floating-point vector of [8 x double] from a
534///    256-bit floating-point vector of [4 x double]. The lower 256 bits
535///    contain the value of the source vector. The upper 256 bits are set
536///    to zero.
537///
538/// \headerfile <x86intrin.h>
539///
540/// This intrinsic has no corresponding instruction.
541///
542/// \param __a
543///    A 256-bit vector of [4 x double].
544/// \returns A 512-bit floating-point vector of [8 x double]. The lower 256 bits
545///    contain the value of the parameter. The upper 256 bits are set to zero.
546static __inline __m512d __DEFAULT_FN_ATTRS512
547_mm512_zextpd256_pd512(__m256d __a)
548{
549  return __builtin_shufflevector((__v4df)__a, (__v4df)_mm256_setzero_pd(), 0, 1, 2, 3, 4, 5, 6, 7);
550}
551
552/// Constructs a 512-bit floating-point vector of [16 x float] from a
553///    128-bit floating-point vector of [4 x float]. The lower 128 bits contain
554///    the value of the source vector. The upper 384 bits are set to zero.
555///
556/// \headerfile <x86intrin.h>
557///
558/// This intrinsic has no corresponding instruction.
559///
560/// \param __a
561///    A 128-bit vector of [4 x float].
562/// \returns A 512-bit floating-point vector of [16 x float]. The lower 128 bits
563///    contain the value of the parameter. The upper 384 bits are set to zero.
564static __inline __m512 __DEFAULT_FN_ATTRS512
565_mm512_zextps128_ps512(__m128 __a)
566{
567  return __builtin_shufflevector((__v4sf)__a, (__v4sf)_mm_setzero_ps(), 0, 1, 2, 3, 4, 5, 6, 7, 4, 5, 6, 7, 4, 5, 6, 7);
568}
569
570/// Constructs a 512-bit floating-point vector of [16 x float] from a
571///    256-bit floating-point vector of [8 x float]. The lower 256 bits contain
572///    the value of the source vector. The upper 256 bits are set to zero.
573///
574/// \headerfile <x86intrin.h>
575///
576/// This intrinsic has no corresponding instruction.
577///
578/// \param __a
579///    A 256-bit vector of [8 x float].
580/// \returns A 512-bit floating-point vector of [16 x float]. The lower 256 bits
581///    contain the value of the parameter. The upper 256 bits are set to zero.
582static __inline __m512 __DEFAULT_FN_ATTRS512
583_mm512_zextps256_ps512(__m256 __a)
584{
585  return __builtin_shufflevector((__v8sf)__a, (__v8sf)_mm256_setzero_ps(), 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
586}
587
588/// Constructs a 512-bit integer vector from a 128-bit integer vector.
589///    The lower 128 bits contain the value of the source vector. The upper
590///    384 bits are set to zero.
591///
592/// \headerfile <x86intrin.h>
593///
594/// This intrinsic has no corresponding instruction.
595///
596/// \param __a
597///    A 128-bit integer vector.
598/// \returns A 512-bit integer vector. The lower 128 bits contain the value of
599///    the parameter. The upper 384 bits are set to zero.
600static __inline __m512i __DEFAULT_FN_ATTRS512
601_mm512_zextsi128_si512(__m128i __a)
602{
603  return __builtin_shufflevector((__v2di)__a, (__v2di)_mm_setzero_si128(), 0, 1, 2, 3, 2, 3, 2, 3);
604}
605
606/// Constructs a 512-bit integer vector from a 256-bit integer vector.
607///    The lower 256 bits contain the value of the source vector. The upper
608///    256 bits are set to zero.
609///
610/// \headerfile <x86intrin.h>
611///
612/// This intrinsic has no corresponding instruction.
613///
614/// \param __a
615///    A 256-bit integer vector.
616/// \returns A 512-bit integer vector. The lower 256 bits contain the value of
617///    the parameter. The upper 256 bits are set to zero.
618static __inline __m512i __DEFAULT_FN_ATTRS512
619_mm512_zextsi256_si512(__m256i __a)
620{
621  return __builtin_shufflevector((__v4di)__a, (__v4di)_mm256_setzero_si256(), 0, 1, 2, 3, 4, 5, 6, 7);
622}
623
624/* Bitwise operators */
625static __inline__ __m512i __DEFAULT_FN_ATTRS512
626_mm512_and_epi32(__m512i __a, __m512i __b)
627{
628  return (__m512i)((__v16su)__a & (__v16su)__b);
629}
630
631static __inline__ __m512i __DEFAULT_FN_ATTRS512
632_mm512_mask_and_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b)
633{
634  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__k,
635                (__v16si) _mm512_and_epi32(__a, __b),
636                (__v16si) __src);
637}
638
639static __inline__ __m512i __DEFAULT_FN_ATTRS512
640_mm512_maskz_and_epi32(__mmask16 __k, __m512i __a, __m512i __b)
641{
642  return (__m512i) _mm512_mask_and_epi32(_mm512_setzero_si512 (),
643                                         __k, __a, __b);
644}
645
646static __inline__ __m512i __DEFAULT_FN_ATTRS512
647_mm512_and_epi64(__m512i __a, __m512i __b)
648{
649  return (__m512i)((__v8du)__a & (__v8du)__b);
650}
651
652static __inline__ __m512i __DEFAULT_FN_ATTRS512
653_mm512_mask_and_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b)
654{
655    return (__m512i) __builtin_ia32_selectq_512 ((__mmask8) __k,
656                (__v8di) _mm512_and_epi64(__a, __b),
657                (__v8di) __src);
658}
659
660static __inline__ __m512i __DEFAULT_FN_ATTRS512
661_mm512_maskz_and_epi64(__mmask8 __k, __m512i __a, __m512i __b)
662{
663  return (__m512i) _mm512_mask_and_epi64(_mm512_setzero_si512 (),
664                                         __k, __a, __b);
665}
666
667static __inline__ __m512i __DEFAULT_FN_ATTRS512
668_mm512_andnot_si512 (__m512i __A, __m512i __B)
669{
670  return (__m512i)(~(__v8du)__A & (__v8du)__B);
671}
672
673static __inline__ __m512i __DEFAULT_FN_ATTRS512
674_mm512_andnot_epi32 (__m512i __A, __m512i __B)
675{
676  return (__m512i)(~(__v16su)__A & (__v16su)__B);
677}
678
679static __inline__ __m512i __DEFAULT_FN_ATTRS512
680_mm512_mask_andnot_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
681{
682  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
683                                         (__v16si)_mm512_andnot_epi32(__A, __B),
684                                         (__v16si)__W);
685}
686
687static __inline__ __m512i __DEFAULT_FN_ATTRS512
688_mm512_maskz_andnot_epi32(__mmask16 __U, __m512i __A, __m512i __B)
689{
690  return (__m512i)_mm512_mask_andnot_epi32(_mm512_setzero_si512(),
691                                           __U, __A, __B);
692}
693
694static __inline__ __m512i __DEFAULT_FN_ATTRS512
695_mm512_andnot_epi64(__m512i __A, __m512i __B)
696{
697  return (__m512i)(~(__v8du)__A & (__v8du)__B);
698}
699
700static __inline__ __m512i __DEFAULT_FN_ATTRS512
701_mm512_mask_andnot_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
702{
703  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
704                                          (__v8di)_mm512_andnot_epi64(__A, __B),
705                                          (__v8di)__W);
706}
707
708static __inline__ __m512i __DEFAULT_FN_ATTRS512
709_mm512_maskz_andnot_epi64(__mmask8 __U, __m512i __A, __m512i __B)
710{
711  return (__m512i)_mm512_mask_andnot_epi64(_mm512_setzero_si512(),
712                                           __U, __A, __B);
713}
714
715static __inline__ __m512i __DEFAULT_FN_ATTRS512
716_mm512_or_epi32(__m512i __a, __m512i __b)
717{
718  return (__m512i)((__v16su)__a | (__v16su)__b);
719}
720
721static __inline__ __m512i __DEFAULT_FN_ATTRS512
722_mm512_mask_or_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b)
723{
724  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__k,
725                                             (__v16si)_mm512_or_epi32(__a, __b),
726                                             (__v16si)__src);
727}
728
729static __inline__ __m512i __DEFAULT_FN_ATTRS512
730_mm512_maskz_or_epi32(__mmask16 __k, __m512i __a, __m512i __b)
731{
732  return (__m512i)_mm512_mask_or_epi32(_mm512_setzero_si512(), __k, __a, __b);
733}
734
735static __inline__ __m512i __DEFAULT_FN_ATTRS512
736_mm512_or_epi64(__m512i __a, __m512i __b)
737{
738  return (__m512i)((__v8du)__a | (__v8du)__b);
739}
740
741static __inline__ __m512i __DEFAULT_FN_ATTRS512
742_mm512_mask_or_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b)
743{
744  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__k,
745                                             (__v8di)_mm512_or_epi64(__a, __b),
746                                             (__v8di)__src);
747}
748
749static __inline__ __m512i __DEFAULT_FN_ATTRS512
750_mm512_maskz_or_epi64(__mmask8 __k, __m512i __a, __m512i __b)
751{
752  return (__m512i)_mm512_mask_or_epi64(_mm512_setzero_si512(), __k, __a, __b);
753}
754
755static __inline__ __m512i __DEFAULT_FN_ATTRS512
756_mm512_xor_epi32(__m512i __a, __m512i __b)
757{
758  return (__m512i)((__v16su)__a ^ (__v16su)__b);
759}
760
761static __inline__ __m512i __DEFAULT_FN_ATTRS512
762_mm512_mask_xor_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b)
763{
764  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__k,
765                                            (__v16si)_mm512_xor_epi32(__a, __b),
766                                            (__v16si)__src);
767}
768
769static __inline__ __m512i __DEFAULT_FN_ATTRS512
770_mm512_maskz_xor_epi32(__mmask16 __k, __m512i __a, __m512i __b)
771{
772  return (__m512i)_mm512_mask_xor_epi32(_mm512_setzero_si512(), __k, __a, __b);
773}
774
775static __inline__ __m512i __DEFAULT_FN_ATTRS512
776_mm512_xor_epi64(__m512i __a, __m512i __b)
777{
778  return (__m512i)((__v8du)__a ^ (__v8du)__b);
779}
780
781static __inline__ __m512i __DEFAULT_FN_ATTRS512
782_mm512_mask_xor_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b)
783{
784  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__k,
785                                             (__v8di)_mm512_xor_epi64(__a, __b),
786                                             (__v8di)__src);
787}
788
789static __inline__ __m512i __DEFAULT_FN_ATTRS512
790_mm512_maskz_xor_epi64(__mmask8 __k, __m512i __a, __m512i __b)
791{
792  return (__m512i)_mm512_mask_xor_epi64(_mm512_setzero_si512(), __k, __a, __b);
793}
794
795static __inline__ __m512i __DEFAULT_FN_ATTRS512
796_mm512_and_si512(__m512i __a, __m512i __b)
797{
798  return (__m512i)((__v8du)__a & (__v8du)__b);
799}
800
801static __inline__ __m512i __DEFAULT_FN_ATTRS512
802_mm512_or_si512(__m512i __a, __m512i __b)
803{
804  return (__m512i)((__v8du)__a | (__v8du)__b);
805}
806
807static __inline__ __m512i __DEFAULT_FN_ATTRS512
808_mm512_xor_si512(__m512i __a, __m512i __b)
809{
810  return (__m512i)((__v8du)__a ^ (__v8du)__b);
811}
812
813/* Arithmetic */
814
815static __inline __m512d __DEFAULT_FN_ATTRS512
816_mm512_add_pd(__m512d __a, __m512d __b)
817{
818  return (__m512d)((__v8df)__a + (__v8df)__b);
819}
820
821static __inline __m512 __DEFAULT_FN_ATTRS512
822_mm512_add_ps(__m512 __a, __m512 __b)
823{
824  return (__m512)((__v16sf)__a + (__v16sf)__b);
825}
826
827static __inline __m512d __DEFAULT_FN_ATTRS512
828_mm512_mul_pd(__m512d __a, __m512d __b)
829{
830  return (__m512d)((__v8df)__a * (__v8df)__b);
831}
832
833static __inline __m512 __DEFAULT_FN_ATTRS512
834_mm512_mul_ps(__m512 __a, __m512 __b)
835{
836  return (__m512)((__v16sf)__a * (__v16sf)__b);
837}
838
839static __inline __m512d __DEFAULT_FN_ATTRS512
840_mm512_sub_pd(__m512d __a, __m512d __b)
841{
842  return (__m512d)((__v8df)__a - (__v8df)__b);
843}
844
845static __inline __m512 __DEFAULT_FN_ATTRS512
846_mm512_sub_ps(__m512 __a, __m512 __b)
847{
848  return (__m512)((__v16sf)__a - (__v16sf)__b);
849}
850
851static __inline__ __m512i __DEFAULT_FN_ATTRS512
852_mm512_add_epi64 (__m512i __A, __m512i __B)
853{
854  return (__m512i) ((__v8du) __A + (__v8du) __B);
855}
856
857static __inline__ __m512i __DEFAULT_FN_ATTRS512
858_mm512_mask_add_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
859{
860  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
861                                             (__v8di)_mm512_add_epi64(__A, __B),
862                                             (__v8di)__W);
863}
864
865static __inline__ __m512i __DEFAULT_FN_ATTRS512
866_mm512_maskz_add_epi64(__mmask8 __U, __m512i __A, __m512i __B)
867{
868  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
869                                             (__v8di)_mm512_add_epi64(__A, __B),
870                                             (__v8di)_mm512_setzero_si512());
871}
872
873static __inline__ __m512i __DEFAULT_FN_ATTRS512
874_mm512_sub_epi64 (__m512i __A, __m512i __B)
875{
876  return (__m512i) ((__v8du) __A - (__v8du) __B);
877}
878
879static __inline__ __m512i __DEFAULT_FN_ATTRS512
880_mm512_mask_sub_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
881{
882  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
883                                             (__v8di)_mm512_sub_epi64(__A, __B),
884                                             (__v8di)__W);
885}
886
887static __inline__ __m512i __DEFAULT_FN_ATTRS512
888_mm512_maskz_sub_epi64(__mmask8 __U, __m512i __A, __m512i __B)
889{
890  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
891                                             (__v8di)_mm512_sub_epi64(__A, __B),
892                                             (__v8di)_mm512_setzero_si512());
893}
894
895static __inline__ __m512i __DEFAULT_FN_ATTRS512
896_mm512_add_epi32 (__m512i __A, __m512i __B)
897{
898  return (__m512i) ((__v16su) __A + (__v16su) __B);
899}
900
901static __inline__ __m512i __DEFAULT_FN_ATTRS512
902_mm512_mask_add_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
903{
904  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
905                                             (__v16si)_mm512_add_epi32(__A, __B),
906                                             (__v16si)__W);
907}
908
909static __inline__ __m512i __DEFAULT_FN_ATTRS512
910_mm512_maskz_add_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
911{
912  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
913                                             (__v16si)_mm512_add_epi32(__A, __B),
914                                             (__v16si)_mm512_setzero_si512());
915}
916
917static __inline__ __m512i __DEFAULT_FN_ATTRS512
918_mm512_sub_epi32 (__m512i __A, __m512i __B)
919{
920  return (__m512i) ((__v16su) __A - (__v16su) __B);
921}
922
923static __inline__ __m512i __DEFAULT_FN_ATTRS512
924_mm512_mask_sub_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
925{
926  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
927                                             (__v16si)_mm512_sub_epi32(__A, __B),
928                                             (__v16si)__W);
929}
930
931static __inline__ __m512i __DEFAULT_FN_ATTRS512
932_mm512_maskz_sub_epi32(__mmask16 __U, __m512i __A, __m512i __B)
933{
934  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
935                                             (__v16si)_mm512_sub_epi32(__A, __B),
936                                             (__v16si)_mm512_setzero_si512());
937}
938
939#define _mm512_max_round_pd(A, B, R) \
940  (__m512d)__builtin_ia32_maxpd512((__v8df)(__m512d)(A), \
941                                   (__v8df)(__m512d)(B), (int)(R))
942
943#define _mm512_mask_max_round_pd(W, U, A, B, R) \
944  (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
945                                   (__v8df)_mm512_max_round_pd((A), (B), (R)), \
946                                   (__v8df)(W))
947
948#define _mm512_maskz_max_round_pd(U, A, B, R) \
949  (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
950                                   (__v8df)_mm512_max_round_pd((A), (B), (R)), \
951                                   (__v8df)_mm512_setzero_pd())
952
953static  __inline__ __m512d __DEFAULT_FN_ATTRS512
954_mm512_max_pd(__m512d __A, __m512d __B)
955{
956  return (__m512d) __builtin_ia32_maxpd512((__v8df) __A, (__v8df) __B,
957                                           _MM_FROUND_CUR_DIRECTION);
958}
959
960static __inline__ __m512d __DEFAULT_FN_ATTRS512
961_mm512_mask_max_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
962{
963  return (__m512d)__builtin_ia32_selectpd_512(__U,
964                                              (__v8df)_mm512_max_pd(__A, __B),
965                                              (__v8df)__W);
966}
967
968static __inline__ __m512d __DEFAULT_FN_ATTRS512
969_mm512_maskz_max_pd (__mmask8 __U, __m512d __A, __m512d __B)
970{
971  return (__m512d)__builtin_ia32_selectpd_512(__U,
972                                              (__v8df)_mm512_max_pd(__A, __B),
973                                              (__v8df)_mm512_setzero_pd());
974}
975
976#define _mm512_max_round_ps(A, B, R) \
977  (__m512)__builtin_ia32_maxps512((__v16sf)(__m512)(A), \
978                                  (__v16sf)(__m512)(B), (int)(R))
979
980#define _mm512_mask_max_round_ps(W, U, A, B, R) \
981  (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
982                                  (__v16sf)_mm512_max_round_ps((A), (B), (R)), \
983                                  (__v16sf)(W))
984
985#define _mm512_maskz_max_round_ps(U, A, B, R) \
986  (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
987                                  (__v16sf)_mm512_max_round_ps((A), (B), (R)), \
988                                  (__v16sf)_mm512_setzero_ps())
989
990static  __inline__ __m512 __DEFAULT_FN_ATTRS512
991_mm512_max_ps(__m512 __A, __m512 __B)
992{
993  return (__m512) __builtin_ia32_maxps512((__v16sf) __A, (__v16sf) __B,
994                                          _MM_FROUND_CUR_DIRECTION);
995}
996
997static __inline__ __m512 __DEFAULT_FN_ATTRS512
998_mm512_mask_max_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
999{
1000  return (__m512)__builtin_ia32_selectps_512(__U,
1001                                             (__v16sf)_mm512_max_ps(__A, __B),
1002                                             (__v16sf)__W);
1003}
1004
1005static __inline__ __m512 __DEFAULT_FN_ATTRS512
1006_mm512_maskz_max_ps (__mmask16 __U, __m512 __A, __m512 __B)
1007{
1008  return (__m512)__builtin_ia32_selectps_512(__U,
1009                                             (__v16sf)_mm512_max_ps(__A, __B),
1010                                             (__v16sf)_mm512_setzero_ps());
1011}
1012
1013static __inline__ __m128 __DEFAULT_FN_ATTRS128
1014_mm_mask_max_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
1015  return (__m128) __builtin_ia32_maxss_round_mask ((__v4sf) __A,
1016                (__v4sf) __B,
1017                (__v4sf) __W,
1018                (__mmask8) __U,
1019                _MM_FROUND_CUR_DIRECTION);
1020}
1021
1022static __inline__ __m128 __DEFAULT_FN_ATTRS128
1023_mm_maskz_max_ss(__mmask8 __U,__m128 __A, __m128 __B) {
1024  return (__m128) __builtin_ia32_maxss_round_mask ((__v4sf) __A,
1025                (__v4sf) __B,
1026                (__v4sf)  _mm_setzero_ps (),
1027                (__mmask8) __U,
1028                _MM_FROUND_CUR_DIRECTION);
1029}
1030
1031#define _mm_max_round_ss(A, B, R) \
1032  (__m128)__builtin_ia32_maxss_round_mask((__v4sf)(__m128)(A), \
1033                                          (__v4sf)(__m128)(B), \
1034                                          (__v4sf)_mm_setzero_ps(), \
1035                                          (__mmask8)-1, (int)(R))
1036
1037#define _mm_mask_max_round_ss(W, U, A, B, R) \
1038  (__m128)__builtin_ia32_maxss_round_mask((__v4sf)(__m128)(A), \
1039                                          (__v4sf)(__m128)(B), \
1040                                          (__v4sf)(__m128)(W), (__mmask8)(U), \
1041                                          (int)(R))
1042
1043#define _mm_maskz_max_round_ss(U, A, B, R) \
1044  (__m128)__builtin_ia32_maxss_round_mask((__v4sf)(__m128)(A), \
1045                                          (__v4sf)(__m128)(B), \
1046                                          (__v4sf)_mm_setzero_ps(), \
1047                                          (__mmask8)(U), (int)(R))
1048
1049static __inline__ __m128d __DEFAULT_FN_ATTRS128
1050_mm_mask_max_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
1051  return (__m128d) __builtin_ia32_maxsd_round_mask ((__v2df) __A,
1052                (__v2df) __B,
1053                (__v2df) __W,
1054                (__mmask8) __U,
1055                _MM_FROUND_CUR_DIRECTION);
1056}
1057
1058static __inline__ __m128d __DEFAULT_FN_ATTRS128
1059_mm_maskz_max_sd(__mmask8 __U,__m128d __A, __m128d __B) {
1060  return (__m128d) __builtin_ia32_maxsd_round_mask ((__v2df) __A,
1061                (__v2df) __B,
1062                (__v2df)  _mm_setzero_pd (),
1063                (__mmask8) __U,
1064                _MM_FROUND_CUR_DIRECTION);
1065}
1066
1067#define _mm_max_round_sd(A, B, R) \
1068  (__m128d)__builtin_ia32_maxsd_round_mask((__v2df)(__m128d)(A), \
1069                                           (__v2df)(__m128d)(B), \
1070                                           (__v2df)_mm_setzero_pd(), \
1071                                           (__mmask8)-1, (int)(R))
1072
1073#define _mm_mask_max_round_sd(W, U, A, B, R) \
1074  (__m128d)__builtin_ia32_maxsd_round_mask((__v2df)(__m128d)(A), \
1075                                           (__v2df)(__m128d)(B), \
1076                                           (__v2df)(__m128d)(W), \
1077                                           (__mmask8)(U), (int)(R))
1078
1079#define _mm_maskz_max_round_sd(U, A, B, R) \
1080  (__m128d)__builtin_ia32_maxsd_round_mask((__v2df)(__m128d)(A), \
1081                                           (__v2df)(__m128d)(B), \
1082                                           (__v2df)_mm_setzero_pd(), \
1083                                           (__mmask8)(U), (int)(R))
1084
1085static __inline __m512i
1086__DEFAULT_FN_ATTRS512
1087_mm512_max_epi32(__m512i __A, __m512i __B)
1088{
1089  return (__m512i)__builtin_ia32_pmaxsd512((__v16si)__A, (__v16si)__B);
1090}
1091
1092static __inline__ __m512i __DEFAULT_FN_ATTRS512
1093_mm512_mask_max_epi32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
1094{
1095  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1096                                            (__v16si)_mm512_max_epi32(__A, __B),
1097                                            (__v16si)__W);
1098}
1099
1100static __inline__ __m512i __DEFAULT_FN_ATTRS512
1101_mm512_maskz_max_epi32 (__mmask16 __M, __m512i __A, __m512i __B)
1102{
1103  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1104                                            (__v16si)_mm512_max_epi32(__A, __B),
1105                                            (__v16si)_mm512_setzero_si512());
1106}
1107
1108static __inline __m512i __DEFAULT_FN_ATTRS512
1109_mm512_max_epu32(__m512i __A, __m512i __B)
1110{
1111  return (__m512i)__builtin_ia32_pmaxud512((__v16si)__A, (__v16si)__B);
1112}
1113
1114static __inline__ __m512i __DEFAULT_FN_ATTRS512
1115_mm512_mask_max_epu32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
1116{
1117  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1118                                            (__v16si)_mm512_max_epu32(__A, __B),
1119                                            (__v16si)__W);
1120}
1121
1122static __inline__ __m512i __DEFAULT_FN_ATTRS512
1123_mm512_maskz_max_epu32 (__mmask16 __M, __m512i __A, __m512i __B)
1124{
1125  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1126                                            (__v16si)_mm512_max_epu32(__A, __B),
1127                                            (__v16si)_mm512_setzero_si512());
1128}
1129
1130static __inline __m512i __DEFAULT_FN_ATTRS512
1131_mm512_max_epi64(__m512i __A, __m512i __B)
1132{
1133  return (__m512i)__builtin_ia32_pmaxsq512((__v8di)__A, (__v8di)__B);
1134}
1135
1136static __inline__ __m512i __DEFAULT_FN_ATTRS512
1137_mm512_mask_max_epi64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
1138{
1139  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1140                                             (__v8di)_mm512_max_epi64(__A, __B),
1141                                             (__v8di)__W);
1142}
1143
1144static __inline__ __m512i __DEFAULT_FN_ATTRS512
1145_mm512_maskz_max_epi64 (__mmask8 __M, __m512i __A, __m512i __B)
1146{
1147  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1148                                             (__v8di)_mm512_max_epi64(__A, __B),
1149                                             (__v8di)_mm512_setzero_si512());
1150}
1151
1152static __inline __m512i __DEFAULT_FN_ATTRS512
1153_mm512_max_epu64(__m512i __A, __m512i __B)
1154{
1155  return (__m512i)__builtin_ia32_pmaxuq512((__v8di)__A, (__v8di)__B);
1156}
1157
1158static __inline__ __m512i __DEFAULT_FN_ATTRS512
1159_mm512_mask_max_epu64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
1160{
1161  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1162                                             (__v8di)_mm512_max_epu64(__A, __B),
1163                                             (__v8di)__W);
1164}
1165
1166static __inline__ __m512i __DEFAULT_FN_ATTRS512
1167_mm512_maskz_max_epu64 (__mmask8 __M, __m512i __A, __m512i __B)
1168{
1169  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1170                                             (__v8di)_mm512_max_epu64(__A, __B),
1171                                             (__v8di)_mm512_setzero_si512());
1172}
1173
1174#define _mm512_min_round_pd(A, B, R) \
1175  (__m512d)__builtin_ia32_minpd512((__v8df)(__m512d)(A), \
1176                                   (__v8df)(__m512d)(B), (int)(R))
1177
1178#define _mm512_mask_min_round_pd(W, U, A, B, R) \
1179  (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
1180                                   (__v8df)_mm512_min_round_pd((A), (B), (R)), \
1181                                   (__v8df)(W))
1182
1183#define _mm512_maskz_min_round_pd(U, A, B, R) \
1184  (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
1185                                   (__v8df)_mm512_min_round_pd((A), (B), (R)), \
1186                                   (__v8df)_mm512_setzero_pd())
1187
1188static  __inline__ __m512d __DEFAULT_FN_ATTRS512
1189_mm512_min_pd(__m512d __A, __m512d __B)
1190{
1191  return (__m512d) __builtin_ia32_minpd512((__v8df) __A, (__v8df) __B,
1192                                           _MM_FROUND_CUR_DIRECTION);
1193}
1194
1195static __inline__ __m512d __DEFAULT_FN_ATTRS512
1196_mm512_mask_min_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
1197{
1198  return (__m512d)__builtin_ia32_selectpd_512(__U,
1199                                              (__v8df)_mm512_min_pd(__A, __B),
1200                                              (__v8df)__W);
1201}
1202
1203static __inline__ __m512d __DEFAULT_FN_ATTRS512
1204_mm512_maskz_min_pd (__mmask8 __U, __m512d __A, __m512d __B)
1205{
1206  return (__m512d)__builtin_ia32_selectpd_512(__U,
1207                                              (__v8df)_mm512_min_pd(__A, __B),
1208                                              (__v8df)_mm512_setzero_pd());
1209}
1210
1211#define _mm512_min_round_ps(A, B, R) \
1212  (__m512)__builtin_ia32_minps512((__v16sf)(__m512)(A), \
1213                                  (__v16sf)(__m512)(B), (int)(R))
1214
1215#define _mm512_mask_min_round_ps(W, U, A, B, R) \
1216  (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
1217                                  (__v16sf)_mm512_min_round_ps((A), (B), (R)), \
1218                                  (__v16sf)(W))
1219
1220#define _mm512_maskz_min_round_ps(U, A, B, R) \
1221  (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
1222                                  (__v16sf)_mm512_min_round_ps((A), (B), (R)), \
1223                                  (__v16sf)_mm512_setzero_ps())
1224
1225static  __inline__ __m512 __DEFAULT_FN_ATTRS512
1226_mm512_min_ps(__m512 __A, __m512 __B)
1227{
1228  return (__m512) __builtin_ia32_minps512((__v16sf) __A, (__v16sf) __B,
1229                                          _MM_FROUND_CUR_DIRECTION);
1230}
1231
1232static __inline__ __m512 __DEFAULT_FN_ATTRS512
1233_mm512_mask_min_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
1234{
1235  return (__m512)__builtin_ia32_selectps_512(__U,
1236                                             (__v16sf)_mm512_min_ps(__A, __B),
1237                                             (__v16sf)__W);
1238}
1239
1240static __inline__ __m512 __DEFAULT_FN_ATTRS512
1241_mm512_maskz_min_ps (__mmask16 __U, __m512 __A, __m512 __B)
1242{
1243  return (__m512)__builtin_ia32_selectps_512(__U,
1244                                             (__v16sf)_mm512_min_ps(__A, __B),
1245                                             (__v16sf)_mm512_setzero_ps());
1246}
1247
1248static __inline__ __m128 __DEFAULT_FN_ATTRS128
1249_mm_mask_min_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
1250  return (__m128) __builtin_ia32_minss_round_mask ((__v4sf) __A,
1251                (__v4sf) __B,
1252                (__v4sf) __W,
1253                (__mmask8) __U,
1254                _MM_FROUND_CUR_DIRECTION);
1255}
1256
1257static __inline__ __m128 __DEFAULT_FN_ATTRS128
1258_mm_maskz_min_ss(__mmask8 __U,__m128 __A, __m128 __B) {
1259  return (__m128) __builtin_ia32_minss_round_mask ((__v4sf) __A,
1260                (__v4sf) __B,
1261                (__v4sf)  _mm_setzero_ps (),
1262                (__mmask8) __U,
1263                _MM_FROUND_CUR_DIRECTION);
1264}
1265
1266#define _mm_min_round_ss(A, B, R) \
1267  (__m128)__builtin_ia32_minss_round_mask((__v4sf)(__m128)(A), \
1268                                          (__v4sf)(__m128)(B), \
1269                                          (__v4sf)_mm_setzero_ps(), \
1270                                          (__mmask8)-1, (int)(R))
1271
1272#define _mm_mask_min_round_ss(W, U, A, B, R) \
1273  (__m128)__builtin_ia32_minss_round_mask((__v4sf)(__m128)(A), \
1274                                          (__v4sf)(__m128)(B), \
1275                                          (__v4sf)(__m128)(W), (__mmask8)(U), \
1276                                          (int)(R))
1277
1278#define _mm_maskz_min_round_ss(U, A, B, R) \
1279  (__m128)__builtin_ia32_minss_round_mask((__v4sf)(__m128)(A), \
1280                                          (__v4sf)(__m128)(B), \
1281                                          (__v4sf)_mm_setzero_ps(), \
1282                                          (__mmask8)(U), (int)(R))
1283
1284static __inline__ __m128d __DEFAULT_FN_ATTRS128
1285_mm_mask_min_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
1286  return (__m128d) __builtin_ia32_minsd_round_mask ((__v2df) __A,
1287                (__v2df) __B,
1288                (__v2df) __W,
1289                (__mmask8) __U,
1290                _MM_FROUND_CUR_DIRECTION);
1291}
1292
1293static __inline__ __m128d __DEFAULT_FN_ATTRS128
1294_mm_maskz_min_sd(__mmask8 __U,__m128d __A, __m128d __B) {
1295  return (__m128d) __builtin_ia32_minsd_round_mask ((__v2df) __A,
1296                (__v2df) __B,
1297                (__v2df)  _mm_setzero_pd (),
1298                (__mmask8) __U,
1299                _MM_FROUND_CUR_DIRECTION);
1300}
1301
1302#define _mm_min_round_sd(A, B, R) \
1303  (__m128d)__builtin_ia32_minsd_round_mask((__v2df)(__m128d)(A), \
1304                                           (__v2df)(__m128d)(B), \
1305                                           (__v2df)_mm_setzero_pd(), \
1306                                           (__mmask8)-1, (int)(R))
1307
1308#define _mm_mask_min_round_sd(W, U, A, B, R) \
1309  (__m128d)__builtin_ia32_minsd_round_mask((__v2df)(__m128d)(A), \
1310                                           (__v2df)(__m128d)(B), \
1311                                           (__v2df)(__m128d)(W), \
1312                                           (__mmask8)(U), (int)(R))
1313
1314#define _mm_maskz_min_round_sd(U, A, B, R) \
1315  (__m128d)__builtin_ia32_minsd_round_mask((__v2df)(__m128d)(A), \
1316                                           (__v2df)(__m128d)(B), \
1317                                           (__v2df)_mm_setzero_pd(), \
1318                                           (__mmask8)(U), (int)(R))
1319
1320static __inline __m512i
1321__DEFAULT_FN_ATTRS512
1322_mm512_min_epi32(__m512i __A, __m512i __B)
1323{
1324  return (__m512i)__builtin_ia32_pminsd512((__v16si)__A, (__v16si)__B);
1325}
1326
1327static __inline__ __m512i __DEFAULT_FN_ATTRS512
1328_mm512_mask_min_epi32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
1329{
1330  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1331                                            (__v16si)_mm512_min_epi32(__A, __B),
1332                                            (__v16si)__W);
1333}
1334
1335static __inline__ __m512i __DEFAULT_FN_ATTRS512
1336_mm512_maskz_min_epi32 (__mmask16 __M, __m512i __A, __m512i __B)
1337{
1338  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1339                                            (__v16si)_mm512_min_epi32(__A, __B),
1340                                            (__v16si)_mm512_setzero_si512());
1341}
1342
1343static __inline __m512i __DEFAULT_FN_ATTRS512
1344_mm512_min_epu32(__m512i __A, __m512i __B)
1345{
1346  return (__m512i)__builtin_ia32_pminud512((__v16si)__A, (__v16si)__B);
1347}
1348
1349static __inline__ __m512i __DEFAULT_FN_ATTRS512
1350_mm512_mask_min_epu32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
1351{
1352  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1353                                            (__v16si)_mm512_min_epu32(__A, __B),
1354                                            (__v16si)__W);
1355}
1356
1357static __inline__ __m512i __DEFAULT_FN_ATTRS512
1358_mm512_maskz_min_epu32 (__mmask16 __M, __m512i __A, __m512i __B)
1359{
1360  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1361                                            (__v16si)_mm512_min_epu32(__A, __B),
1362                                            (__v16si)_mm512_setzero_si512());
1363}
1364
1365static __inline __m512i __DEFAULT_FN_ATTRS512
1366_mm512_min_epi64(__m512i __A, __m512i __B)
1367{
1368  return (__m512i)__builtin_ia32_pminsq512((__v8di)__A, (__v8di)__B);
1369}
1370
1371static __inline__ __m512i __DEFAULT_FN_ATTRS512
1372_mm512_mask_min_epi64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
1373{
1374  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1375                                             (__v8di)_mm512_min_epi64(__A, __B),
1376                                             (__v8di)__W);
1377}
1378
1379static __inline__ __m512i __DEFAULT_FN_ATTRS512
1380_mm512_maskz_min_epi64 (__mmask8 __M, __m512i __A, __m512i __B)
1381{
1382  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1383                                             (__v8di)_mm512_min_epi64(__A, __B),
1384                                             (__v8di)_mm512_setzero_si512());
1385}
1386
1387static __inline __m512i __DEFAULT_FN_ATTRS512
1388_mm512_min_epu64(__m512i __A, __m512i __B)
1389{
1390  return (__m512i)__builtin_ia32_pminuq512((__v8di)__A, (__v8di)__B);
1391}
1392
1393static __inline__ __m512i __DEFAULT_FN_ATTRS512
1394_mm512_mask_min_epu64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
1395{
1396  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1397                                             (__v8di)_mm512_min_epu64(__A, __B),
1398                                             (__v8di)__W);
1399}
1400
1401static __inline__ __m512i __DEFAULT_FN_ATTRS512
1402_mm512_maskz_min_epu64 (__mmask8 __M, __m512i __A, __m512i __B)
1403{
1404  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1405                                             (__v8di)_mm512_min_epu64(__A, __B),
1406                                             (__v8di)_mm512_setzero_si512());
1407}
1408
1409static __inline __m512i __DEFAULT_FN_ATTRS512
1410_mm512_mul_epi32(__m512i __X, __m512i __Y)
1411{
1412  return (__m512i)__builtin_ia32_pmuldq512((__v16si)__X, (__v16si) __Y);
1413}
1414
1415static __inline __m512i __DEFAULT_FN_ATTRS512
1416_mm512_mask_mul_epi32(__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y)
1417{
1418  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1419                                             (__v8di)_mm512_mul_epi32(__X, __Y),
1420                                             (__v8di)__W);
1421}
1422
1423static __inline __m512i __DEFAULT_FN_ATTRS512
1424_mm512_maskz_mul_epi32(__mmask8 __M, __m512i __X, __m512i __Y)
1425{
1426  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1427                                             (__v8di)_mm512_mul_epi32(__X, __Y),
1428                                             (__v8di)_mm512_setzero_si512 ());
1429}
1430
1431static __inline __m512i __DEFAULT_FN_ATTRS512
1432_mm512_mul_epu32(__m512i __X, __m512i __Y)
1433{
1434  return (__m512i)__builtin_ia32_pmuludq512((__v16si)__X, (__v16si)__Y);
1435}
1436
1437static __inline __m512i __DEFAULT_FN_ATTRS512
1438_mm512_mask_mul_epu32(__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y)
1439{
1440  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1441                                             (__v8di)_mm512_mul_epu32(__X, __Y),
1442                                             (__v8di)__W);
1443}
1444
1445static __inline __m512i __DEFAULT_FN_ATTRS512
1446_mm512_maskz_mul_epu32(__mmask8 __M, __m512i __X, __m512i __Y)
1447{
1448  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1449                                             (__v8di)_mm512_mul_epu32(__X, __Y),
1450                                             (__v8di)_mm512_setzero_si512 ());
1451}
1452
1453static __inline __m512i __DEFAULT_FN_ATTRS512
1454_mm512_mullo_epi32 (__m512i __A, __m512i __B)
1455{
1456  return (__m512i) ((__v16su) __A * (__v16su) __B);
1457}
1458
1459static __inline __m512i __DEFAULT_FN_ATTRS512
1460_mm512_maskz_mullo_epi32(__mmask16 __M, __m512i __A, __m512i __B)
1461{
1462  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1463                                             (__v16si)_mm512_mullo_epi32(__A, __B),
1464                                             (__v16si)_mm512_setzero_si512());
1465}
1466
1467static __inline __m512i __DEFAULT_FN_ATTRS512
1468_mm512_mask_mullo_epi32(__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
1469{
1470  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1471                                             (__v16si)_mm512_mullo_epi32(__A, __B),
1472                                             (__v16si)__W);
1473}
1474
1475static __inline__ __m512i __DEFAULT_FN_ATTRS512
1476_mm512_mullox_epi64 (__m512i __A, __m512i __B) {
1477  return (__m512i) ((__v8du) __A * (__v8du) __B);
1478}
1479
1480static __inline__ __m512i __DEFAULT_FN_ATTRS512
1481_mm512_mask_mullox_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) {
1482  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
1483                                             (__v8di)_mm512_mullox_epi64(__A, __B),
1484                                             (__v8di)__W);
1485}
1486
1487#define _mm512_sqrt_round_pd(A, R) \
1488  (__m512d)__builtin_ia32_sqrtpd512((__v8df)(__m512d)(A), (int)(R))
1489
1490#define _mm512_mask_sqrt_round_pd(W, U, A, R) \
1491  (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
1492                                       (__v8df)_mm512_sqrt_round_pd((A), (R)), \
1493                                       (__v8df)(__m512d)(W))
1494
1495#define _mm512_maskz_sqrt_round_pd(U, A, R) \
1496  (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
1497                                       (__v8df)_mm512_sqrt_round_pd((A), (R)), \
1498                                       (__v8df)_mm512_setzero_pd())
1499
1500static  __inline__ __m512d __DEFAULT_FN_ATTRS512
1501_mm512_sqrt_pd(__m512d __A)
1502{
1503  return (__m512d)__builtin_ia32_sqrtpd512((__v8df)__A,
1504                                           _MM_FROUND_CUR_DIRECTION);
1505}
1506
1507static __inline__ __m512d __DEFAULT_FN_ATTRS512
1508_mm512_mask_sqrt_pd (__m512d __W, __mmask8 __U, __m512d __A)
1509{
1510  return (__m512d)__builtin_ia32_selectpd_512(__U,
1511                                              (__v8df)_mm512_sqrt_pd(__A),
1512                                              (__v8df)__W);
1513}
1514
1515static __inline__ __m512d __DEFAULT_FN_ATTRS512
1516_mm512_maskz_sqrt_pd (__mmask8 __U, __m512d __A)
1517{
1518  return (__m512d)__builtin_ia32_selectpd_512(__U,
1519                                              (__v8df)_mm512_sqrt_pd(__A),
1520                                              (__v8df)_mm512_setzero_pd());
1521}
1522
1523#define _mm512_sqrt_round_ps(A, R) \
1524  (__m512)__builtin_ia32_sqrtps512((__v16sf)(__m512)(A), (int)(R))
1525
1526#define _mm512_mask_sqrt_round_ps(W, U, A, R) \
1527  (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
1528                                      (__v16sf)_mm512_sqrt_round_ps((A), (R)), \
1529                                      (__v16sf)(__m512)(W))
1530
1531#define _mm512_maskz_sqrt_round_ps(U, A, R) \
1532  (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
1533                                      (__v16sf)_mm512_sqrt_round_ps((A), (R)), \
1534                                      (__v16sf)_mm512_setzero_ps())
1535
1536static  __inline__ __m512 __DEFAULT_FN_ATTRS512
1537_mm512_sqrt_ps(__m512 __A)
1538{
1539  return (__m512)__builtin_ia32_sqrtps512((__v16sf)__A,
1540                                          _MM_FROUND_CUR_DIRECTION);
1541}
1542
1543static  __inline__ __m512 __DEFAULT_FN_ATTRS512
1544_mm512_mask_sqrt_ps(__m512 __W, __mmask16 __U, __m512 __A)
1545{
1546  return (__m512)__builtin_ia32_selectps_512(__U,
1547                                             (__v16sf)_mm512_sqrt_ps(__A),
1548                                             (__v16sf)__W);
1549}
1550
1551static  __inline__ __m512 __DEFAULT_FN_ATTRS512
1552_mm512_maskz_sqrt_ps( __mmask16 __U, __m512 __A)
1553{
1554  return (__m512)__builtin_ia32_selectps_512(__U,
1555                                             (__v16sf)_mm512_sqrt_ps(__A),
1556                                             (__v16sf)_mm512_setzero_ps());
1557}
1558
1559static  __inline__ __m512d __DEFAULT_FN_ATTRS512
1560_mm512_rsqrt14_pd(__m512d __A)
1561{
1562  return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
1563                 (__v8df)
1564                 _mm512_setzero_pd (),
1565                 (__mmask8) -1);}
1566
1567static __inline__ __m512d __DEFAULT_FN_ATTRS512
1568_mm512_mask_rsqrt14_pd (__m512d __W, __mmask8 __U, __m512d __A)
1569{
1570  return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
1571                  (__v8df) __W,
1572                  (__mmask8) __U);
1573}
1574
1575static __inline__ __m512d __DEFAULT_FN_ATTRS512
1576_mm512_maskz_rsqrt14_pd (__mmask8 __U, __m512d __A)
1577{
1578  return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
1579                  (__v8df)
1580                  _mm512_setzero_pd (),
1581                  (__mmask8) __U);
1582}
1583
1584static  __inline__ __m512 __DEFAULT_FN_ATTRS512
1585_mm512_rsqrt14_ps(__m512 __A)
1586{
1587  return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
1588                (__v16sf)
1589                _mm512_setzero_ps (),
1590                (__mmask16) -1);
1591}
1592
1593static __inline__ __m512 __DEFAULT_FN_ATTRS512
1594_mm512_mask_rsqrt14_ps (__m512 __W, __mmask16 __U, __m512 __A)
1595{
1596  return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
1597                 (__v16sf) __W,
1598                 (__mmask16) __U);
1599}
1600
1601static __inline__ __m512 __DEFAULT_FN_ATTRS512
1602_mm512_maskz_rsqrt14_ps (__mmask16 __U, __m512 __A)
1603{
1604  return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
1605                 (__v16sf)
1606                 _mm512_setzero_ps (),
1607                 (__mmask16) __U);
1608}
1609
1610static  __inline__ __m128 __DEFAULT_FN_ATTRS128
1611_mm_rsqrt14_ss(__m128 __A, __m128 __B)
1612{
1613  return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __A,
1614             (__v4sf) __B,
1615             (__v4sf)
1616             _mm_setzero_ps (),
1617             (__mmask8) -1);
1618}
1619
1620static __inline__ __m128 __DEFAULT_FN_ATTRS128
1621_mm_mask_rsqrt14_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
1622{
1623 return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __A,
1624          (__v4sf) __B,
1625          (__v4sf) __W,
1626          (__mmask8) __U);
1627}
1628
1629static __inline__ __m128 __DEFAULT_FN_ATTRS128
1630_mm_maskz_rsqrt14_ss (__mmask8 __U, __m128 __A, __m128 __B)
1631{
1632 return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __A,
1633          (__v4sf) __B,
1634          (__v4sf) _mm_setzero_ps (),
1635          (__mmask8) __U);
1636}
1637
1638static  __inline__ __m128d __DEFAULT_FN_ATTRS128
1639_mm_rsqrt14_sd(__m128d __A, __m128d __B)
1640{
1641  return (__m128d) __builtin_ia32_rsqrt14sd_mask ((__v2df) __A,
1642              (__v2df) __B,
1643              (__v2df)
1644              _mm_setzero_pd (),
1645              (__mmask8) -1);
1646}
1647
1648static __inline__ __m128d __DEFAULT_FN_ATTRS128
1649_mm_mask_rsqrt14_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
1650{
1651 return (__m128d) __builtin_ia32_rsqrt14sd_mask ( (__v2df) __A,
1652          (__v2df) __B,
1653          (__v2df) __W,
1654          (__mmask8) __U);
1655}
1656
1657static __inline__ __m128d __DEFAULT_FN_ATTRS128
1658_mm_maskz_rsqrt14_sd (__mmask8 __U, __m128d __A, __m128d __B)
1659{
1660 return (__m128d) __builtin_ia32_rsqrt14sd_mask ( (__v2df) __A,
1661          (__v2df) __B,
1662          (__v2df) _mm_setzero_pd (),
1663          (__mmask8) __U);
1664}
1665
1666static  __inline__ __m512d __DEFAULT_FN_ATTRS512
1667_mm512_rcp14_pd(__m512d __A)
1668{
1669  return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
1670               (__v8df)
1671               _mm512_setzero_pd (),
1672               (__mmask8) -1);
1673}
1674
1675static __inline__ __m512d __DEFAULT_FN_ATTRS512
1676_mm512_mask_rcp14_pd (__m512d __W, __mmask8 __U, __m512d __A)
1677{
1678  return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
1679                (__v8df) __W,
1680                (__mmask8) __U);
1681}
1682
1683static __inline__ __m512d __DEFAULT_FN_ATTRS512
1684_mm512_maskz_rcp14_pd (__mmask8 __U, __m512d __A)
1685{
1686  return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
1687                (__v8df)
1688                _mm512_setzero_pd (),
1689                (__mmask8) __U);
1690}
1691
1692static  __inline__ __m512 __DEFAULT_FN_ATTRS512
1693_mm512_rcp14_ps(__m512 __A)
1694{
1695  return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
1696              (__v16sf)
1697              _mm512_setzero_ps (),
1698              (__mmask16) -1);
1699}
1700
1701static __inline__ __m512 __DEFAULT_FN_ATTRS512
1702_mm512_mask_rcp14_ps (__m512 __W, __mmask16 __U, __m512 __A)
1703{
1704  return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
1705                   (__v16sf) __W,
1706                   (__mmask16) __U);
1707}
1708
1709static __inline__ __m512 __DEFAULT_FN_ATTRS512
1710_mm512_maskz_rcp14_ps (__mmask16 __U, __m512 __A)
1711{
1712  return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
1713                   (__v16sf)
1714                   _mm512_setzero_ps (),
1715                   (__mmask16) __U);
1716}
1717
1718static  __inline__ __m128 __DEFAULT_FN_ATTRS128
1719_mm_rcp14_ss(__m128 __A, __m128 __B)
1720{
1721  return (__m128) __builtin_ia32_rcp14ss_mask ((__v4sf) __A,
1722                 (__v4sf) __B,
1723                 (__v4sf)
1724                 _mm_setzero_ps (),
1725                 (__mmask8) -1);
1726}
1727
1728static __inline__ __m128 __DEFAULT_FN_ATTRS128
1729_mm_mask_rcp14_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
1730{
1731 return (__m128) __builtin_ia32_rcp14ss_mask ((__v4sf) __A,
1732          (__v4sf) __B,
1733          (__v4sf) __W,
1734          (__mmask8) __U);
1735}
1736
1737static __inline__ __m128 __DEFAULT_FN_ATTRS128
1738_mm_maskz_rcp14_ss (__mmask8 __U, __m128 __A, __m128 __B)
1739{
1740 return (__m128) __builtin_ia32_rcp14ss_mask ((__v4sf) __A,
1741          (__v4sf) __B,
1742          (__v4sf) _mm_setzero_ps (),
1743          (__mmask8) __U);
1744}
1745
1746static  __inline__ __m128d __DEFAULT_FN_ATTRS128
1747_mm_rcp14_sd(__m128d __A, __m128d __B)
1748{
1749  return (__m128d) __builtin_ia32_rcp14sd_mask ((__v2df) __A,
1750            (__v2df) __B,
1751            (__v2df)
1752            _mm_setzero_pd (),
1753            (__mmask8) -1);
1754}
1755
1756static __inline__ __m128d __DEFAULT_FN_ATTRS128
1757_mm_mask_rcp14_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
1758{
1759 return (__m128d) __builtin_ia32_rcp14sd_mask ( (__v2df) __A,
1760          (__v2df) __B,
1761          (__v2df) __W,
1762          (__mmask8) __U);
1763}
1764
1765static __inline__ __m128d __DEFAULT_FN_ATTRS128
1766_mm_maskz_rcp14_sd (__mmask8 __U, __m128d __A, __m128d __B)
1767{
1768 return (__m128d) __builtin_ia32_rcp14sd_mask ( (__v2df) __A,
1769          (__v2df) __B,
1770          (__v2df) _mm_setzero_pd (),
1771          (__mmask8) __U);
1772}
1773
1774static __inline __m512 __DEFAULT_FN_ATTRS512
1775_mm512_floor_ps(__m512 __A)
1776{
1777  return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
1778                                                  _MM_FROUND_FLOOR,
1779                                                  (__v16sf) __A, -1,
1780                                                  _MM_FROUND_CUR_DIRECTION);
1781}
1782
1783static __inline__ __m512 __DEFAULT_FN_ATTRS512
1784_mm512_mask_floor_ps (__m512 __W, __mmask16 __U, __m512 __A)
1785{
1786  return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
1787                   _MM_FROUND_FLOOR,
1788                   (__v16sf) __W, __U,
1789                   _MM_FROUND_CUR_DIRECTION);
1790}
1791
1792static __inline __m512d __DEFAULT_FN_ATTRS512
1793_mm512_floor_pd(__m512d __A)
1794{
1795  return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
1796                                                   _MM_FROUND_FLOOR,
1797                                                   (__v8df) __A, -1,
1798                                                   _MM_FROUND_CUR_DIRECTION);
1799}
1800
1801static __inline__ __m512d __DEFAULT_FN_ATTRS512
1802_mm512_mask_floor_pd (__m512d __W, __mmask8 __U, __m512d __A)
1803{
1804  return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
1805                _MM_FROUND_FLOOR,
1806                (__v8df) __W, __U,
1807                _MM_FROUND_CUR_DIRECTION);
1808}
1809
1810static __inline__ __m512 __DEFAULT_FN_ATTRS512
1811_mm512_mask_ceil_ps (__m512 __W, __mmask16 __U, __m512 __A)
1812{
1813  return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
1814                   _MM_FROUND_CEIL,
1815                   (__v16sf) __W, __U,
1816                   _MM_FROUND_CUR_DIRECTION);
1817}
1818
1819static __inline __m512 __DEFAULT_FN_ATTRS512
1820_mm512_ceil_ps(__m512 __A)
1821{
1822  return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
1823                                                  _MM_FROUND_CEIL,
1824                                                  (__v16sf) __A, -1,
1825                                                  _MM_FROUND_CUR_DIRECTION);
1826}
1827
1828static __inline __m512d __DEFAULT_FN_ATTRS512
1829_mm512_ceil_pd(__m512d __A)
1830{
1831  return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
1832                                                   _MM_FROUND_CEIL,
1833                                                   (__v8df) __A, -1,
1834                                                   _MM_FROUND_CUR_DIRECTION);
1835}
1836
1837static __inline__ __m512d __DEFAULT_FN_ATTRS512
1838_mm512_mask_ceil_pd (__m512d __W, __mmask8 __U, __m512d __A)
1839{
1840  return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
1841                _MM_FROUND_CEIL,
1842                (__v8df) __W, __U,
1843                _MM_FROUND_CUR_DIRECTION);
1844}
1845
1846static __inline __m512i __DEFAULT_FN_ATTRS512
1847_mm512_abs_epi64(__m512i __A)
1848{
1849  return (__m512i)__builtin_ia32_pabsq512((__v8di)__A);
1850}
1851
1852static __inline__ __m512i __DEFAULT_FN_ATTRS512
1853_mm512_mask_abs_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
1854{
1855  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
1856                                             (__v8di)_mm512_abs_epi64(__A),
1857                                             (__v8di)__W);
1858}
1859
1860static __inline__ __m512i __DEFAULT_FN_ATTRS512
1861_mm512_maskz_abs_epi64 (__mmask8 __U, __m512i __A)
1862{
1863  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
1864                                             (__v8di)_mm512_abs_epi64(__A),
1865                                             (__v8di)_mm512_setzero_si512());
1866}
1867
1868static __inline __m512i __DEFAULT_FN_ATTRS512
1869_mm512_abs_epi32(__m512i __A)
1870{
1871  return (__m512i)__builtin_ia32_pabsd512((__v16si) __A);
1872}
1873
1874static __inline__ __m512i __DEFAULT_FN_ATTRS512
1875_mm512_mask_abs_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
1876{
1877  return (__m512i)__builtin_ia32_selectd_512(__U,
1878                                             (__v16si)_mm512_abs_epi32(__A),
1879                                             (__v16si)__W);
1880}
1881
1882static __inline__ __m512i __DEFAULT_FN_ATTRS512
1883_mm512_maskz_abs_epi32 (__mmask16 __U, __m512i __A)
1884{
1885  return (__m512i)__builtin_ia32_selectd_512(__U,
1886                                             (__v16si)_mm512_abs_epi32(__A),
1887                                             (__v16si)_mm512_setzero_si512());
1888}
1889
1890static __inline__ __m128 __DEFAULT_FN_ATTRS128
1891_mm_mask_add_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
1892  __A = _mm_add_ss(__A, __B);
1893  return __builtin_ia32_selectss_128(__U, __A, __W);
1894}
1895
1896static __inline__ __m128 __DEFAULT_FN_ATTRS128
1897_mm_maskz_add_ss(__mmask8 __U,__m128 __A, __m128 __B) {
1898  __A = _mm_add_ss(__A, __B);
1899  return __builtin_ia32_selectss_128(__U, __A, _mm_setzero_ps());
1900}
1901
1902#define _mm_add_round_ss(A, B, R) \
1903  (__m128)__builtin_ia32_addss_round_mask((__v4sf)(__m128)(A), \
1904                                          (__v4sf)(__m128)(B), \
1905                                          (__v4sf)_mm_setzero_ps(), \
1906                                          (__mmask8)-1, (int)(R))
1907
1908#define _mm_mask_add_round_ss(W, U, A, B, R) \
1909  (__m128)__builtin_ia32_addss_round_mask((__v4sf)(__m128)(A), \
1910                                          (__v4sf)(__m128)(B), \
1911                                          (__v4sf)(__m128)(W), (__mmask8)(U), \
1912                                          (int)(R))
1913
1914#define _mm_maskz_add_round_ss(U, A, B, R) \
1915  (__m128)__builtin_ia32_addss_round_mask((__v4sf)(__m128)(A), \
1916                                          (__v4sf)(__m128)(B), \
1917                                          (__v4sf)_mm_setzero_ps(), \
1918                                          (__mmask8)(U), (int)(R))
1919
1920static __inline__ __m128d __DEFAULT_FN_ATTRS128
1921_mm_mask_add_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
1922  __A = _mm_add_sd(__A, __B);
1923  return __builtin_ia32_selectsd_128(__U, __A, __W);
1924}
1925
1926static __inline__ __m128d __DEFAULT_FN_ATTRS128
1927_mm_maskz_add_sd(__mmask8 __U,__m128d __A, __m128d __B) {
1928  __A = _mm_add_sd(__A, __B);
1929  return __builtin_ia32_selectsd_128(__U, __A, _mm_setzero_pd());
1930}
1931#define _mm_add_round_sd(A, B, R) \
1932  (__m128d)__builtin_ia32_addsd_round_mask((__v2df)(__m128d)(A), \
1933                                           (__v2df)(__m128d)(B), \
1934                                           (__v2df)_mm_setzero_pd(), \
1935                                           (__mmask8)-1, (int)(R))
1936
1937#define _mm_mask_add_round_sd(W, U, A, B, R) \
1938  (__m128d)__builtin_ia32_addsd_round_mask((__v2df)(__m128d)(A), \
1939                                           (__v2df)(__m128d)(B), \
1940                                           (__v2df)(__m128d)(W), \
1941                                           (__mmask8)(U), (int)(R))
1942
1943#define _mm_maskz_add_round_sd(U, A, B, R) \
1944  (__m128d)__builtin_ia32_addsd_round_mask((__v2df)(__m128d)(A), \
1945                                           (__v2df)(__m128d)(B), \
1946                                           (__v2df)_mm_setzero_pd(), \
1947                                           (__mmask8)(U), (int)(R))
1948
1949static __inline__ __m512d __DEFAULT_FN_ATTRS512
1950_mm512_mask_add_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
1951  return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
1952                                              (__v8df)_mm512_add_pd(__A, __B),
1953                                              (__v8df)__W);
1954}
1955
1956static __inline__ __m512d __DEFAULT_FN_ATTRS512
1957_mm512_maskz_add_pd(__mmask8 __U, __m512d __A, __m512d __B) {
1958  return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
1959                                              (__v8df)_mm512_add_pd(__A, __B),
1960                                              (__v8df)_mm512_setzero_pd());
1961}
1962
1963static __inline__ __m512 __DEFAULT_FN_ATTRS512
1964_mm512_mask_add_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
1965  return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
1966                                             (__v16sf)_mm512_add_ps(__A, __B),
1967                                             (__v16sf)__W);
1968}
1969
1970static __inline__ __m512 __DEFAULT_FN_ATTRS512
1971_mm512_maskz_add_ps(__mmask16 __U, __m512 __A, __m512 __B) {
1972  return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
1973                                             (__v16sf)_mm512_add_ps(__A, __B),
1974                                             (__v16sf)_mm512_setzero_ps());
1975}
1976
1977#define _mm512_add_round_pd(A, B, R) \
1978  (__m512d)__builtin_ia32_addpd512((__v8df)(__m512d)(A), \
1979                                   (__v8df)(__m512d)(B), (int)(R))
1980
1981#define _mm512_mask_add_round_pd(W, U, A, B, R) \
1982  (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
1983                                   (__v8df)_mm512_add_round_pd((A), (B), (R)), \
1984                                   (__v8df)(__m512d)(W))
1985
1986#define _mm512_maskz_add_round_pd(U, A, B, R) \
1987  (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
1988                                   (__v8df)_mm512_add_round_pd((A), (B), (R)), \
1989                                   (__v8df)_mm512_setzero_pd())
1990
1991#define _mm512_add_round_ps(A, B, R) \
1992  (__m512)__builtin_ia32_addps512((__v16sf)(__m512)(A), \
1993                                  (__v16sf)(__m512)(B), (int)(R))
1994
1995#define _mm512_mask_add_round_ps(W, U, A, B, R) \
1996  (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
1997                                  (__v16sf)_mm512_add_round_ps((A), (B), (R)), \
1998                                  (__v16sf)(__m512)(W))
1999
2000#define _mm512_maskz_add_round_ps(U, A, B, R) \
2001  (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
2002                                  (__v16sf)_mm512_add_round_ps((A), (B), (R)), \
2003                                  (__v16sf)_mm512_setzero_ps())
2004
2005static __inline__ __m128 __DEFAULT_FN_ATTRS128
2006_mm_mask_sub_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
2007  __A = _mm_sub_ss(__A, __B);
2008  return __builtin_ia32_selectss_128(__U, __A, __W);
2009}
2010
2011static __inline__ __m128 __DEFAULT_FN_ATTRS128
2012_mm_maskz_sub_ss(__mmask8 __U,__m128 __A, __m128 __B) {
2013  __A = _mm_sub_ss(__A, __B);
2014  return __builtin_ia32_selectss_128(__U, __A, _mm_setzero_ps());
2015}
2016#define _mm_sub_round_ss(A, B, R) \
2017  (__m128)__builtin_ia32_subss_round_mask((__v4sf)(__m128)(A), \
2018                                          (__v4sf)(__m128)(B), \
2019                                          (__v4sf)_mm_setzero_ps(), \
2020                                          (__mmask8)-1, (int)(R))
2021
2022#define _mm_mask_sub_round_ss(W, U, A, B, R) \
2023  (__m128)__builtin_ia32_subss_round_mask((__v4sf)(__m128)(A), \
2024                                          (__v4sf)(__m128)(B), \
2025                                          (__v4sf)(__m128)(W), (__mmask8)(U), \
2026                                          (int)(R))
2027
2028#define _mm_maskz_sub_round_ss(U, A, B, R) \
2029  (__m128)__builtin_ia32_subss_round_mask((__v4sf)(__m128)(A), \
2030                                          (__v4sf)(__m128)(B), \
2031                                          (__v4sf)_mm_setzero_ps(), \
2032                                          (__mmask8)(U), (int)(R))
2033
2034static __inline__ __m128d __DEFAULT_FN_ATTRS128
2035_mm_mask_sub_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
2036  __A = _mm_sub_sd(__A, __B);
2037  return __builtin_ia32_selectsd_128(__U, __A, __W);
2038}
2039
2040static __inline__ __m128d __DEFAULT_FN_ATTRS128
2041_mm_maskz_sub_sd(__mmask8 __U,__m128d __A, __m128d __B) {
2042  __A = _mm_sub_sd(__A, __B);
2043  return __builtin_ia32_selectsd_128(__U, __A, _mm_setzero_pd());
2044}
2045
2046#define _mm_sub_round_sd(A, B, R) \
2047  (__m128d)__builtin_ia32_subsd_round_mask((__v2df)(__m128d)(A), \
2048                                           (__v2df)(__m128d)(B), \
2049                                           (__v2df)_mm_setzero_pd(), \
2050                                           (__mmask8)-1, (int)(R))
2051
2052#define _mm_mask_sub_round_sd(W, U, A, B, R) \
2053  (__m128d)__builtin_ia32_subsd_round_mask((__v2df)(__m128d)(A), \
2054                                           (__v2df)(__m128d)(B), \
2055                                           (__v2df)(__m128d)(W), \
2056                                           (__mmask8)(U), (int)(R))
2057
2058#define _mm_maskz_sub_round_sd(U, A, B, R) \
2059  (__m128d)__builtin_ia32_subsd_round_mask((__v2df)(__m128d)(A), \
2060                                           (__v2df)(__m128d)(B), \
2061                                           (__v2df)_mm_setzero_pd(), \
2062                                           (__mmask8)(U), (int)(R))
2063
2064static __inline__ __m512d __DEFAULT_FN_ATTRS512
2065_mm512_mask_sub_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
2066  return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
2067                                              (__v8df)_mm512_sub_pd(__A, __B),
2068                                              (__v8df)__W);
2069}
2070
2071static __inline__ __m512d __DEFAULT_FN_ATTRS512
2072_mm512_maskz_sub_pd(__mmask8 __U, __m512d __A, __m512d __B) {
2073  return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
2074                                              (__v8df)_mm512_sub_pd(__A, __B),
2075                                              (__v8df)_mm512_setzero_pd());
2076}
2077
2078static __inline__ __m512 __DEFAULT_FN_ATTRS512
2079_mm512_mask_sub_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
2080  return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
2081                                             (__v16sf)_mm512_sub_ps(__A, __B),
2082                                             (__v16sf)__W);
2083}
2084
2085static __inline__ __m512 __DEFAULT_FN_ATTRS512
2086_mm512_maskz_sub_ps(__mmask16 __U, __m512 __A, __m512 __B) {
2087  return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
2088                                             (__v16sf)_mm512_sub_ps(__A, __B),
2089                                             (__v16sf)_mm512_setzero_ps());
2090}
2091
2092#define _mm512_sub_round_pd(A, B, R) \
2093  (__m512d)__builtin_ia32_subpd512((__v8df)(__m512d)(A), \
2094                                   (__v8df)(__m512d)(B), (int)(R))
2095
2096#define _mm512_mask_sub_round_pd(W, U, A, B, R) \
2097  (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
2098                                   (__v8df)_mm512_sub_round_pd((A), (B), (R)), \
2099                                   (__v8df)(__m512d)(W))
2100
2101#define _mm512_maskz_sub_round_pd(U, A, B, R) \
2102  (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
2103                                   (__v8df)_mm512_sub_round_pd((A), (B), (R)), \
2104                                   (__v8df)_mm512_setzero_pd())
2105
2106#define _mm512_sub_round_ps(A, B, R) \
2107  (__m512)__builtin_ia32_subps512((__v16sf)(__m512)(A), \
2108                                  (__v16sf)(__m512)(B), (int)(R))
2109
2110#define _mm512_mask_sub_round_ps(W, U, A, B, R) \
2111  (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
2112                                  (__v16sf)_mm512_sub_round_ps((A), (B), (R)), \
2113                                  (__v16sf)(__m512)(W))
2114
2115#define _mm512_maskz_sub_round_ps(U, A, B, R) \
2116  (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
2117                                  (__v16sf)_mm512_sub_round_ps((A), (B), (R)), \
2118                                  (__v16sf)_mm512_setzero_ps())
2119
2120static __inline__ __m128 __DEFAULT_FN_ATTRS128
2121_mm_mask_mul_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
2122  __A = _mm_mul_ss(__A, __B);
2123  return __builtin_ia32_selectss_128(__U, __A, __W);
2124}
2125
2126static __inline__ __m128 __DEFAULT_FN_ATTRS128
2127_mm_maskz_mul_ss(__mmask8 __U,__m128 __A, __m128 __B) {
2128  __A = _mm_mul_ss(__A, __B);
2129  return __builtin_ia32_selectss_128(__U, __A, _mm_setzero_ps());
2130}
2131#define _mm_mul_round_ss(A, B, R) \
2132  (__m128)__builtin_ia32_mulss_round_mask((__v4sf)(__m128)(A), \
2133                                          (__v4sf)(__m128)(B), \
2134                                          (__v4sf)_mm_setzero_ps(), \
2135                                          (__mmask8)-1, (int)(R))
2136
2137#define _mm_mask_mul_round_ss(W, U, A, B, R) \
2138  (__m128)__builtin_ia32_mulss_round_mask((__v4sf)(__m128)(A), \
2139                                          (__v4sf)(__m128)(B), \
2140                                          (__v4sf)(__m128)(W), (__mmask8)(U), \
2141                                          (int)(R))
2142
2143#define _mm_maskz_mul_round_ss(U, A, B, R) \
2144  (__m128)__builtin_ia32_mulss_round_mask((__v4sf)(__m128)(A), \
2145                                          (__v4sf)(__m128)(B), \
2146                                          (__v4sf)_mm_setzero_ps(), \
2147                                          (__mmask8)(U), (int)(R))
2148
2149static __inline__ __m128d __DEFAULT_FN_ATTRS128
2150_mm_mask_mul_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
2151  __A = _mm_mul_sd(__A, __B);
2152  return __builtin_ia32_selectsd_128(__U, __A, __W);
2153}
2154
2155static __inline__ __m128d __DEFAULT_FN_ATTRS128
2156_mm_maskz_mul_sd(__mmask8 __U,__m128d __A, __m128d __B) {
2157  __A = _mm_mul_sd(__A, __B);
2158  return __builtin_ia32_selectsd_128(__U, __A, _mm_setzero_pd());
2159}
2160
2161#define _mm_mul_round_sd(A, B, R) \
2162  (__m128d)__builtin_ia32_mulsd_round_mask((__v2df)(__m128d)(A), \
2163                                           (__v2df)(__m128d)(B), \
2164                                           (__v2df)_mm_setzero_pd(), \
2165                                           (__mmask8)-1, (int)(R))
2166
2167#define _mm_mask_mul_round_sd(W, U, A, B, R) \
2168  (__m128d)__builtin_ia32_mulsd_round_mask((__v2df)(__m128d)(A), \
2169                                           (__v2df)(__m128d)(B), \
2170                                           (__v2df)(__m128d)(W), \
2171                                           (__mmask8)(U), (int)(R))
2172
2173#define _mm_maskz_mul_round_sd(U, A, B, R) \
2174  (__m128d)__builtin_ia32_mulsd_round_mask((__v2df)(__m128d)(A), \
2175                                           (__v2df)(__m128d)(B), \
2176                                           (__v2df)_mm_setzero_pd(), \
2177                                           (__mmask8)(U), (int)(R))
2178
2179static __inline__ __m512d __DEFAULT_FN_ATTRS512
2180_mm512_mask_mul_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
2181  return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
2182                                              (__v8df)_mm512_mul_pd(__A, __B),
2183                                              (__v8df)__W);
2184}
2185
2186static __inline__ __m512d __DEFAULT_FN_ATTRS512
2187_mm512_maskz_mul_pd(__mmask8 __U, __m512d __A, __m512d __B) {
2188  return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
2189                                              (__v8df)_mm512_mul_pd(__A, __B),
2190                                              (__v8df)_mm512_setzero_pd());
2191}
2192
2193static __inline__ __m512 __DEFAULT_FN_ATTRS512
2194_mm512_mask_mul_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
2195  return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
2196                                             (__v16sf)_mm512_mul_ps(__A, __B),
2197                                             (__v16sf)__W);
2198}
2199
2200static __inline__ __m512 __DEFAULT_FN_ATTRS512
2201_mm512_maskz_mul_ps(__mmask16 __U, __m512 __A, __m512 __B) {
2202  return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
2203                                             (__v16sf)_mm512_mul_ps(__A, __B),
2204                                             (__v16sf)_mm512_setzero_ps());
2205}
2206
2207#define _mm512_mul_round_pd(A, B, R) \
2208  (__m512d)__builtin_ia32_mulpd512((__v8df)(__m512d)(A), \
2209                                   (__v8df)(__m512d)(B), (int)(R))
2210
2211#define _mm512_mask_mul_round_pd(W, U, A, B, R) \
2212  (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
2213                                   (__v8df)_mm512_mul_round_pd((A), (B), (R)), \
2214                                   (__v8df)(__m512d)(W))
2215
2216#define _mm512_maskz_mul_round_pd(U, A, B, R) \
2217  (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
2218                                   (__v8df)_mm512_mul_round_pd((A), (B), (R)), \
2219                                   (__v8df)_mm512_setzero_pd())
2220
2221#define _mm512_mul_round_ps(A, B, R) \
2222  (__m512)__builtin_ia32_mulps512((__v16sf)(__m512)(A), \
2223                                  (__v16sf)(__m512)(B), (int)(R))
2224
2225#define _mm512_mask_mul_round_ps(W, U, A, B, R) \
2226  (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
2227                                  (__v16sf)_mm512_mul_round_ps((A), (B), (R)), \
2228                                  (__v16sf)(__m512)(W))
2229
2230#define _mm512_maskz_mul_round_ps(U, A, B, R) \
2231  (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
2232                                  (__v16sf)_mm512_mul_round_ps((A), (B), (R)), \
2233                                  (__v16sf)_mm512_setzero_ps())
2234
2235static __inline__ __m128 __DEFAULT_FN_ATTRS128
2236_mm_mask_div_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
2237  __A = _mm_div_ss(__A, __B);
2238  return __builtin_ia32_selectss_128(__U, __A, __W);
2239}
2240
2241static __inline__ __m128 __DEFAULT_FN_ATTRS128
2242_mm_maskz_div_ss(__mmask8 __U,__m128 __A, __m128 __B) {
2243  __A = _mm_div_ss(__A, __B);
2244  return __builtin_ia32_selectss_128(__U, __A, _mm_setzero_ps());
2245}
2246
2247#define _mm_div_round_ss(A, B, R) \
2248  (__m128)__builtin_ia32_divss_round_mask((__v4sf)(__m128)(A), \
2249                                          (__v4sf)(__m128)(B), \
2250                                          (__v4sf)_mm_setzero_ps(), \
2251                                          (__mmask8)-1, (int)(R))
2252
2253#define _mm_mask_div_round_ss(W, U, A, B, R) \
2254  (__m128)__builtin_ia32_divss_round_mask((__v4sf)(__m128)(A), \
2255                                          (__v4sf)(__m128)(B), \
2256                                          (__v4sf)(__m128)(W), (__mmask8)(U), \
2257                                          (int)(R))
2258
2259#define _mm_maskz_div_round_ss(U, A, B, R) \
2260  (__m128)__builtin_ia32_divss_round_mask((__v4sf)(__m128)(A), \
2261                                          (__v4sf)(__m128)(B), \
2262                                          (__v4sf)_mm_setzero_ps(), \
2263                                          (__mmask8)(U), (int)(R))
2264
2265static __inline__ __m128d __DEFAULT_FN_ATTRS128
2266_mm_mask_div_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
2267  __A = _mm_div_sd(__A, __B);
2268  return __builtin_ia32_selectsd_128(__U, __A, __W);
2269}
2270
2271static __inline__ __m128d __DEFAULT_FN_ATTRS128
2272_mm_maskz_div_sd(__mmask8 __U,__m128d __A, __m128d __B) {
2273  __A = _mm_div_sd(__A, __B);
2274  return __builtin_ia32_selectsd_128(__U, __A, _mm_setzero_pd());
2275}
2276
2277#define _mm_div_round_sd(A, B, R) \
2278  (__m128d)__builtin_ia32_divsd_round_mask((__v2df)(__m128d)(A), \
2279                                           (__v2df)(__m128d)(B), \
2280                                           (__v2df)_mm_setzero_pd(), \
2281                                           (__mmask8)-1, (int)(R))
2282
2283#define _mm_mask_div_round_sd(W, U, A, B, R) \
2284  (__m128d)__builtin_ia32_divsd_round_mask((__v2df)(__m128d)(A), \
2285                                           (__v2df)(__m128d)(B), \
2286                                           (__v2df)(__m128d)(W), \
2287                                           (__mmask8)(U), (int)(R))
2288
2289#define _mm_maskz_div_round_sd(U, A, B, R) \
2290  (__m128d)__builtin_ia32_divsd_round_mask((__v2df)(__m128d)(A), \
2291                                           (__v2df)(__m128d)(B), \
2292                                           (__v2df)_mm_setzero_pd(), \
2293                                           (__mmask8)(U), (int)(R))
2294
2295static __inline __m512d __DEFAULT_FN_ATTRS512
2296_mm512_div_pd(__m512d __a, __m512d __b)
2297{
2298  return (__m512d)((__v8df)__a/(__v8df)__b);
2299}
2300
2301static __inline__ __m512d __DEFAULT_FN_ATTRS512
2302_mm512_mask_div_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
2303  return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
2304                                              (__v8df)_mm512_div_pd(__A, __B),
2305                                              (__v8df)__W);
2306}
2307
2308static __inline__ __m512d __DEFAULT_FN_ATTRS512
2309_mm512_maskz_div_pd(__mmask8 __U, __m512d __A, __m512d __B) {
2310  return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
2311                                              (__v8df)_mm512_div_pd(__A, __B),
2312                                              (__v8df)_mm512_setzero_pd());
2313}
2314
2315static __inline __m512 __DEFAULT_FN_ATTRS512
2316_mm512_div_ps(__m512 __a, __m512 __b)
2317{
2318  return (__m512)((__v16sf)__a/(__v16sf)__b);
2319}
2320
2321static __inline__ __m512 __DEFAULT_FN_ATTRS512
2322_mm512_mask_div_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
2323  return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
2324                                             (__v16sf)_mm512_div_ps(__A, __B),
2325                                             (__v16sf)__W);
2326}
2327
2328static __inline__ __m512 __DEFAULT_FN_ATTRS512
2329_mm512_maskz_div_ps(__mmask16 __U, __m512 __A, __m512 __B) {
2330  return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
2331                                             (__v16sf)_mm512_div_ps(__A, __B),
2332                                             (__v16sf)_mm512_setzero_ps());
2333}
2334
2335#define _mm512_div_round_pd(A, B, R) \
2336  (__m512d)__builtin_ia32_divpd512((__v8df)(__m512d)(A), \
2337                                   (__v8df)(__m512d)(B), (int)(R))
2338
2339#define _mm512_mask_div_round_pd(W, U, A, B, R) \
2340  (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
2341                                   (__v8df)_mm512_div_round_pd((A), (B), (R)), \
2342                                   (__v8df)(__m512d)(W))
2343
2344#define _mm512_maskz_div_round_pd(U, A, B, R) \
2345  (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
2346                                   (__v8df)_mm512_div_round_pd((A), (B), (R)), \
2347                                   (__v8df)_mm512_setzero_pd())
2348
2349#define _mm512_div_round_ps(A, B, R) \
2350  (__m512)__builtin_ia32_divps512((__v16sf)(__m512)(A), \
2351                                  (__v16sf)(__m512)(B), (int)(R))
2352
2353#define _mm512_mask_div_round_ps(W, U, A, B, R) \
2354  (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
2355                                  (__v16sf)_mm512_div_round_ps((A), (B), (R)), \
2356                                  (__v16sf)(__m512)(W))
2357
2358#define _mm512_maskz_div_round_ps(U, A, B, R) \
2359  (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
2360                                  (__v16sf)_mm512_div_round_ps((A), (B), (R)), \
2361                                  (__v16sf)_mm512_setzero_ps())
2362
2363#define _mm512_roundscale_ps(A, B) \
2364  (__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(A), (int)(B), \
2365                                         (__v16sf)_mm512_undefined_ps(), \
2366                                         (__mmask16)-1, \
2367                                         _MM_FROUND_CUR_DIRECTION)
2368
2369#define _mm512_mask_roundscale_ps(A, B, C, imm) \
2370  (__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(C), (int)(imm), \
2371                                         (__v16sf)(__m512)(A), (__mmask16)(B), \
2372                                         _MM_FROUND_CUR_DIRECTION)
2373
2374#define _mm512_maskz_roundscale_ps(A, B, imm) \
2375  (__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(B), (int)(imm), \
2376                                         (__v16sf)_mm512_setzero_ps(), \
2377                                         (__mmask16)(A), \
2378                                         _MM_FROUND_CUR_DIRECTION)
2379
2380#define _mm512_mask_roundscale_round_ps(A, B, C, imm, R) \
2381  (__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(C), (int)(imm), \
2382                                         (__v16sf)(__m512)(A), (__mmask16)(B), \
2383                                         (int)(R))
2384
2385#define _mm512_maskz_roundscale_round_ps(A, B, imm, R) \
2386  (__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(B), (int)(imm), \
2387                                         (__v16sf)_mm512_setzero_ps(), \
2388                                         (__mmask16)(A), (int)(R))
2389
2390#define _mm512_roundscale_round_ps(A, imm, R) \
2391  (__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(A), (int)(imm), \
2392                                         (__v16sf)_mm512_undefined_ps(), \
2393                                         (__mmask16)-1, (int)(R))
2394
2395#define _mm512_roundscale_pd(A, B) \
2396  (__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(A), (int)(B), \
2397                                          (__v8df)_mm512_undefined_pd(), \
2398                                          (__mmask8)-1, \
2399                                          _MM_FROUND_CUR_DIRECTION)
2400
2401#define _mm512_mask_roundscale_pd(A, B, C, imm) \
2402  (__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(C), (int)(imm), \
2403                                          (__v8df)(__m512d)(A), (__mmask8)(B), \
2404                                          _MM_FROUND_CUR_DIRECTION)
2405
2406#define _mm512_maskz_roundscale_pd(A, B, imm) \
2407  (__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(B), (int)(imm), \
2408                                          (__v8df)_mm512_setzero_pd(), \
2409                                          (__mmask8)(A), \
2410                                          _MM_FROUND_CUR_DIRECTION)
2411
2412#define _mm512_mask_roundscale_round_pd(A, B, C, imm, R) \
2413  (__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(C), (int)(imm), \
2414                                          (__v8df)(__m512d)(A), (__mmask8)(B), \
2415                                          (int)(R))
2416
2417#define _mm512_maskz_roundscale_round_pd(A, B, imm, R) \
2418  (__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(B), (int)(imm), \
2419                                          (__v8df)_mm512_setzero_pd(), \
2420                                          (__mmask8)(A), (int)(R))
2421
2422#define _mm512_roundscale_round_pd(A, imm, R) \
2423  (__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(A), (int)(imm), \
2424                                          (__v8df)_mm512_undefined_pd(), \
2425                                          (__mmask8)-1, (int)(R))
2426
2427#define _mm512_fmadd_round_pd(A, B, C, R) \
2428  (__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \
2429                                           (__v8df)(__m512d)(B), \
2430                                           (__v8df)(__m512d)(C), \
2431                                           (__mmask8)-1, (int)(R))
2432
2433
2434#define _mm512_mask_fmadd_round_pd(A, U, B, C, R) \
2435  (__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \
2436                                           (__v8df)(__m512d)(B), \
2437                                           (__v8df)(__m512d)(C), \
2438                                           (__mmask8)(U), (int)(R))
2439
2440
2441#define _mm512_mask3_fmadd_round_pd(A, B, C, U, R) \
2442  (__m512d)__builtin_ia32_vfmaddpd512_mask3((__v8df)(__m512d)(A), \
2443                                            (__v8df)(__m512d)(B), \
2444                                            (__v8df)(__m512d)(C), \
2445                                            (__mmask8)(U), (int)(R))
2446
2447
2448#define _mm512_maskz_fmadd_round_pd(U, A, B, C, R) \
2449  (__m512d)__builtin_ia32_vfmaddpd512_maskz((__v8df)(__m512d)(A), \
2450                                            (__v8df)(__m512d)(B), \
2451                                            (__v8df)(__m512d)(C), \
2452                                            (__mmask8)(U), (int)(R))
2453
2454
2455#define _mm512_fmsub_round_pd(A, B, C, R) \
2456  (__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \
2457                                           (__v8df)(__m512d)(B), \
2458                                           -(__v8df)(__m512d)(C), \
2459                                           (__mmask8)-1, (int)(R))
2460
2461
2462#define _mm512_mask_fmsub_round_pd(A, U, B, C, R) \
2463  (__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \
2464                                           (__v8df)(__m512d)(B), \
2465                                           -(__v8df)(__m512d)(C), \
2466                                           (__mmask8)(U), (int)(R))
2467
2468
2469#define _mm512_maskz_fmsub_round_pd(U, A, B, C, R) \
2470  (__m512d)__builtin_ia32_vfmaddpd512_maskz((__v8df)(__m512d)(A), \
2471                                            (__v8df)(__m512d)(B), \
2472                                            -(__v8df)(__m512d)(C), \
2473                                            (__mmask8)(U), (int)(R))
2474
2475
2476#define _mm512_fnmadd_round_pd(A, B, C, R) \
2477  (__m512d)__builtin_ia32_vfmaddpd512_mask(-(__v8df)(__m512d)(A), \
2478                                           (__v8df)(__m512d)(B), \
2479                                           (__v8df)(__m512d)(C), \
2480                                           (__mmask8)-1, (int)(R))
2481
2482
2483#define _mm512_mask3_fnmadd_round_pd(A, B, C, U, R) \
2484  (__m512d)__builtin_ia32_vfmaddpd512_mask3(-(__v8df)(__m512d)(A), \
2485                                            (__v8df)(__m512d)(B), \
2486                                            (__v8df)(__m512d)(C), \
2487                                            (__mmask8)(U), (int)(R))
2488
2489
2490#define _mm512_maskz_fnmadd_round_pd(U, A, B, C, R) \
2491  (__m512d)__builtin_ia32_vfmaddpd512_maskz(-(__v8df)(__m512d)(A), \
2492                                            (__v8df)(__m512d)(B), \
2493                                            (__v8df)(__m512d)(C), \
2494                                            (__mmask8)(U), (int)(R))
2495
2496
2497#define _mm512_fnmsub_round_pd(A, B, C, R) \
2498  (__m512d)__builtin_ia32_vfmaddpd512_mask(-(__v8df)(__m512d)(A), \
2499                                           (__v8df)(__m512d)(B), \
2500                                           -(__v8df)(__m512d)(C), \
2501                                           (__mmask8)-1, (int)(R))
2502
2503
2504#define _mm512_maskz_fnmsub_round_pd(U, A, B, C, R) \
2505  (__m512d)__builtin_ia32_vfmaddpd512_maskz(-(__v8df)(__m512d)(A), \
2506                                            (__v8df)(__m512d)(B), \
2507                                            -(__v8df)(__m512d)(C), \
2508                                            (__mmask8)(U), (int)(R))
2509
2510
2511static __inline__ __m512d __DEFAULT_FN_ATTRS512
2512_mm512_fmadd_pd(__m512d __A, __m512d __B, __m512d __C)
2513{
2514  return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
2515                                                    (__v8df) __B,
2516                                                    (__v8df) __C,
2517                                                    (__mmask8) -1,
2518                                                    _MM_FROUND_CUR_DIRECTION);
2519}
2520
2521static __inline__ __m512d __DEFAULT_FN_ATTRS512
2522_mm512_mask_fmadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
2523{
2524  return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
2525                                                    (__v8df) __B,
2526                                                    (__v8df) __C,
2527                                                    (__mmask8) __U,
2528                                                    _MM_FROUND_CUR_DIRECTION);
2529}
2530
2531static __inline__ __m512d __DEFAULT_FN_ATTRS512
2532_mm512_mask3_fmadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
2533{
2534  return (__m512d) __builtin_ia32_vfmaddpd512_mask3 ((__v8df) __A,
2535                                                     (__v8df) __B,
2536                                                     (__v8df) __C,
2537                                                     (__mmask8) __U,
2538                                                     _MM_FROUND_CUR_DIRECTION);
2539}
2540
2541static __inline__ __m512d __DEFAULT_FN_ATTRS512
2542_mm512_maskz_fmadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
2543{
2544  return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A,
2545                                                     (__v8df) __B,
2546                                                     (__v8df) __C,
2547                                                     (__mmask8) __U,
2548                                                     _MM_FROUND_CUR_DIRECTION);
2549}
2550
2551static __inline__ __m512d __DEFAULT_FN_ATTRS512
2552_mm512_fmsub_pd(__m512d __A, __m512d __B, __m512d __C)
2553{
2554  return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
2555                                                    (__v8df) __B,
2556                                                    -(__v8df) __C,
2557                                                    (__mmask8) -1,
2558                                                    _MM_FROUND_CUR_DIRECTION);
2559}
2560
2561static __inline__ __m512d __DEFAULT_FN_ATTRS512
2562_mm512_mask_fmsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
2563{
2564  return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
2565                                                    (__v8df) __B,
2566                                                    -(__v8df) __C,
2567                                                    (__mmask8) __U,
2568                                                    _MM_FROUND_CUR_DIRECTION);
2569}
2570
2571static __inline__ __m512d __DEFAULT_FN_ATTRS512
2572_mm512_maskz_fmsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
2573{
2574  return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A,
2575                                                     (__v8df) __B,
2576                                                     -(__v8df) __C,
2577                                                     (__mmask8) __U,
2578                                                     _MM_FROUND_CUR_DIRECTION);
2579}
2580
2581static __inline__ __m512d __DEFAULT_FN_ATTRS512
2582_mm512_fnmadd_pd(__m512d __A, __m512d __B, __m512d __C)
2583{
2584  return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
2585                                                    -(__v8df) __B,
2586                                                    (__v8df) __C,
2587                                                    (__mmask8) -1,
2588                                                    _MM_FROUND_CUR_DIRECTION);
2589}
2590
2591static __inline__ __m512d __DEFAULT_FN_ATTRS512
2592_mm512_mask3_fnmadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
2593{
2594  return (__m512d) __builtin_ia32_vfmaddpd512_mask3 (-(__v8df) __A,
2595                                                     (__v8df) __B,
2596                                                     (__v8df) __C,
2597                                                     (__mmask8) __U,
2598                                                     _MM_FROUND_CUR_DIRECTION);
2599}
2600
2601static __inline__ __m512d __DEFAULT_FN_ATTRS512
2602_mm512_maskz_fnmadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
2603{
2604  return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A,
2605                                                     (__v8df) __B,
2606                                                     (__v8df) __C,
2607                                                     (__mmask8) __U,
2608                                                     _MM_FROUND_CUR_DIRECTION);
2609}
2610
2611static __inline__ __m512d __DEFAULT_FN_ATTRS512
2612_mm512_fnmsub_pd(__m512d __A, __m512d __B, __m512d __C)
2613{
2614  return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
2615                                                    -(__v8df) __B,
2616                                                    -(__v8df) __C,
2617                                                    (__mmask8) -1,
2618                                                    _MM_FROUND_CUR_DIRECTION);
2619}
2620
2621static __inline__ __m512d __DEFAULT_FN_ATTRS512
2622_mm512_maskz_fnmsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
2623{
2624  return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A,
2625                                                     (__v8df) __B,
2626                                                     -(__v8df) __C,
2627                                                     (__mmask8) __U,
2628                                                     _MM_FROUND_CUR_DIRECTION);
2629}
2630
2631#define _mm512_fmadd_round_ps(A, B, C, R) \
2632  (__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
2633                                          (__v16sf)(__m512)(B), \
2634                                          (__v16sf)(__m512)(C), \
2635                                          (__mmask16)-1, (int)(R))
2636
2637
2638#define _mm512_mask_fmadd_round_ps(A, U, B, C, R) \
2639  (__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
2640                                          (__v16sf)(__m512)(B), \
2641                                          (__v16sf)(__m512)(C), \
2642                                          (__mmask16)(U), (int)(R))
2643
2644
2645#define _mm512_mask3_fmadd_round_ps(A, B, C, U, R) \
2646  (__m512)__builtin_ia32_vfmaddps512_mask3((__v16sf)(__m512)(A), \
2647                                           (__v16sf)(__m512)(B), \
2648                                           (__v16sf)(__m512)(C), \
2649                                           (__mmask16)(U), (int)(R))
2650
2651
2652#define _mm512_maskz_fmadd_round_ps(U, A, B, C, R) \
2653  (__m512)__builtin_ia32_vfmaddps512_maskz((__v16sf)(__m512)(A), \
2654                                           (__v16sf)(__m512)(B), \
2655                                           (__v16sf)(__m512)(C), \
2656                                           (__mmask16)(U), (int)(R))
2657
2658
2659#define _mm512_fmsub_round_ps(A, B, C, R) \
2660  (__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
2661                                          (__v16sf)(__m512)(B), \
2662                                          -(__v16sf)(__m512)(C), \
2663                                          (__mmask16)-1, (int)(R))
2664
2665
2666#define _mm512_mask_fmsub_round_ps(A, U, B, C, R) \
2667  (__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
2668                                          (__v16sf)(__m512)(B), \
2669                                          -(__v16sf)(__m512)(C), \
2670                                          (__mmask16)(U), (int)(R))
2671
2672
2673#define _mm512_maskz_fmsub_round_ps(U, A, B, C, R) \
2674  (__m512)__builtin_ia32_vfmaddps512_maskz((__v16sf)(__m512)(A), \
2675                                           (__v16sf)(__m512)(B), \
2676                                           -(__v16sf)(__m512)(C), \
2677                                           (__mmask16)(U), (int)(R))
2678
2679
2680#define _mm512_fnmadd_round_ps(A, B, C, R) \
2681  (__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
2682                                          -(__v16sf)(__m512)(B), \
2683                                          (__v16sf)(__m512)(C), \
2684                                          (__mmask16)-1, (int)(R))
2685
2686
2687#define _mm512_mask3_fnmadd_round_ps(A, B, C, U, R) \
2688  (__m512)__builtin_ia32_vfmaddps512_mask3(-(__v16sf)(__m512)(A), \
2689                                           (__v16sf)(__m512)(B), \
2690                                           (__v16sf)(__m512)(C), \
2691                                           (__mmask16)(U), (int)(R))
2692
2693
2694#define _mm512_maskz_fnmadd_round_ps(U, A, B, C, R) \
2695  (__m512)__builtin_ia32_vfmaddps512_maskz(-(__v16sf)(__m512)(A), \
2696                                           (__v16sf)(__m512)(B), \
2697                                           (__v16sf)(__m512)(C), \
2698                                           (__mmask16)(U), (int)(R))
2699
2700
2701#define _mm512_fnmsub_round_ps(A, B, C, R) \
2702  (__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
2703                                          -(__v16sf)(__m512)(B), \
2704                                          -(__v16sf)(__m512)(C), \
2705                                          (__mmask16)-1, (int)(R))
2706
2707
2708#define _mm512_maskz_fnmsub_round_ps(U, A, B, C, R) \
2709  (__m512)__builtin_ia32_vfmaddps512_maskz(-(__v16sf)(__m512)(A), \
2710                                           (__v16sf)(__m512)(B), \
2711                                           -(__v16sf)(__m512)(C), \
2712                                           (__mmask16)(U), (int)(R))
2713
2714
2715static __inline__ __m512 __DEFAULT_FN_ATTRS512
2716_mm512_fmadd_ps(__m512 __A, __m512 __B, __m512 __C)
2717{
2718  return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
2719                                                   (__v16sf) __B,
2720                                                   (__v16sf) __C,
2721                                                   (__mmask16) -1,
2722                                                   _MM_FROUND_CUR_DIRECTION);
2723}
2724
2725static __inline__ __m512 __DEFAULT_FN_ATTRS512
2726_mm512_mask_fmadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
2727{
2728  return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
2729                                                   (__v16sf) __B,
2730                                                   (__v16sf) __C,
2731                                                   (__mmask16) __U,
2732                                                   _MM_FROUND_CUR_DIRECTION);
2733}
2734
2735static __inline__ __m512 __DEFAULT_FN_ATTRS512
2736_mm512_mask3_fmadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
2737{
2738  return (__m512) __builtin_ia32_vfmaddps512_mask3 ((__v16sf) __A,
2739                                                    (__v16sf) __B,
2740                                                    (__v16sf) __C,
2741                                                    (__mmask16) __U,
2742                                                    _MM_FROUND_CUR_DIRECTION);
2743}
2744
2745static __inline__ __m512 __DEFAULT_FN_ATTRS512
2746_mm512_maskz_fmadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
2747{
2748  return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A,
2749                                                    (__v16sf) __B,
2750                                                    (__v16sf) __C,
2751                                                    (__mmask16) __U,
2752                                                    _MM_FROUND_CUR_DIRECTION);
2753}
2754
2755static __inline__ __m512 __DEFAULT_FN_ATTRS512
2756_mm512_fmsub_ps(__m512 __A, __m512 __B, __m512 __C)
2757{
2758  return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
2759                                                   (__v16sf) __B,
2760                                                   -(__v16sf) __C,
2761                                                   (__mmask16) -1,
2762                                                   _MM_FROUND_CUR_DIRECTION);
2763}
2764
2765static __inline__ __m512 __DEFAULT_FN_ATTRS512
2766_mm512_mask_fmsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
2767{
2768  return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
2769                                                   (__v16sf) __B,
2770                                                   -(__v16sf) __C,
2771                                                   (__mmask16) __U,
2772                                                   _MM_FROUND_CUR_DIRECTION);
2773}
2774
2775static __inline__ __m512 __DEFAULT_FN_ATTRS512
2776_mm512_maskz_fmsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
2777{
2778  return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A,
2779                                                    (__v16sf) __B,
2780                                                    -(__v16sf) __C,
2781                                                    (__mmask16) __U,
2782                                                    _MM_FROUND_CUR_DIRECTION);
2783}
2784
2785static __inline__ __m512 __DEFAULT_FN_ATTRS512
2786_mm512_fnmadd_ps(__m512 __A, __m512 __B, __m512 __C)
2787{
2788  return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
2789                                                   -(__v16sf) __B,
2790                                                   (__v16sf) __C,
2791                                                   (__mmask16) -1,
2792                                                   _MM_FROUND_CUR_DIRECTION);
2793}
2794
2795static __inline__ __m512 __DEFAULT_FN_ATTRS512
2796_mm512_mask3_fnmadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
2797{
2798  return (__m512) __builtin_ia32_vfmaddps512_mask3 (-(__v16sf) __A,
2799                                                    (__v16sf) __B,
2800                                                    (__v16sf) __C,
2801                                                    (__mmask16) __U,
2802                                                    _MM_FROUND_CUR_DIRECTION);
2803}
2804
2805static __inline__ __m512 __DEFAULT_FN_ATTRS512
2806_mm512_maskz_fnmadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
2807{
2808  return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A,
2809                                                    (__v16sf) __B,
2810                                                    (__v16sf) __C,
2811                                                    (__mmask16) __U,
2812                                                    _MM_FROUND_CUR_DIRECTION);
2813}
2814
2815static __inline__ __m512 __DEFAULT_FN_ATTRS512
2816_mm512_fnmsub_ps(__m512 __A, __m512 __B, __m512 __C)
2817{
2818  return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
2819                                                   -(__v16sf) __B,
2820                                                   -(__v16sf) __C,
2821                                                   (__mmask16) -1,
2822                                                   _MM_FROUND_CUR_DIRECTION);
2823}
2824
2825static __inline__ __m512 __DEFAULT_FN_ATTRS512
2826_mm512_maskz_fnmsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
2827{
2828  return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A,
2829                                                    (__v16sf) __B,
2830                                                    -(__v16sf) __C,
2831                                                    (__mmask16) __U,
2832                                                    _MM_FROUND_CUR_DIRECTION);
2833}
2834
2835#define _mm512_fmaddsub_round_pd(A, B, C, R) \
2836  (__m512d)__builtin_ia32_vfmaddsubpd512_mask((__v8df)(__m512d)(A), \
2837                                              (__v8df)(__m512d)(B), \
2838                                              (__v8df)(__m512d)(C), \
2839                                              (__mmask8)-1, (int)(R))
2840
2841
2842#define _mm512_mask_fmaddsub_round_pd(A, U, B, C, R) \
2843  (__m512d)__builtin_ia32_vfmaddsubpd512_mask((__v8df)(__m512d)(A), \
2844                                              (__v8df)(__m512d)(B), \
2845                                              (__v8df)(__m512d)(C), \
2846                                              (__mmask8)(U), (int)(R))
2847
2848
2849#define _mm512_mask3_fmaddsub_round_pd(A, B, C, U, R) \
2850  (__m512d)__builtin_ia32_vfmaddsubpd512_mask3((__v8df)(__m512d)(A), \
2851                                               (__v8df)(__m512d)(B), \
2852                                               (__v8df)(__m512d)(C), \
2853                                               (__mmask8)(U), (int)(R))
2854
2855
2856#define _mm512_maskz_fmaddsub_round_pd(U, A, B, C, R) \
2857  (__m512d)__builtin_ia32_vfmaddsubpd512_maskz((__v8df)(__m512d)(A), \
2858                                               (__v8df)(__m512d)(B), \
2859                                               (__v8df)(__m512d)(C), \
2860                                               (__mmask8)(U), (int)(R))
2861
2862
2863#define _mm512_fmsubadd_round_pd(A, B, C, R) \
2864  (__m512d)__builtin_ia32_vfmaddsubpd512_mask((__v8df)(__m512d)(A), \
2865                                              (__v8df)(__m512d)(B), \
2866                                              -(__v8df)(__m512d)(C), \
2867                                              (__mmask8)-1, (int)(R))
2868
2869
2870#define _mm512_mask_fmsubadd_round_pd(A, U, B, C, R) \
2871  (__m512d)__builtin_ia32_vfmaddsubpd512_mask((__v8df)(__m512d)(A), \
2872                                              (__v8df)(__m512d)(B), \
2873                                              -(__v8df)(__m512d)(C), \
2874                                              (__mmask8)(U), (int)(R))
2875
2876
2877#define _mm512_maskz_fmsubadd_round_pd(U, A, B, C, R) \
2878  (__m512d)__builtin_ia32_vfmaddsubpd512_maskz((__v8df)(__m512d)(A), \
2879                                               (__v8df)(__m512d)(B), \
2880                                               -(__v8df)(__m512d)(C), \
2881                                               (__mmask8)(U), (int)(R))
2882
2883
2884static __inline__ __m512d __DEFAULT_FN_ATTRS512
2885_mm512_fmaddsub_pd(__m512d __A, __m512d __B, __m512d __C)
2886{
2887  return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
2888                                                      (__v8df) __B,
2889                                                      (__v8df) __C,
2890                                                      (__mmask8) -1,
2891                                                      _MM_FROUND_CUR_DIRECTION);
2892}
2893
2894static __inline__ __m512d __DEFAULT_FN_ATTRS512
2895_mm512_mask_fmaddsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
2896{
2897  return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
2898                                                      (__v8df) __B,
2899                                                      (__v8df) __C,
2900                                                      (__mmask8) __U,
2901                                                      _MM_FROUND_CUR_DIRECTION);
2902}
2903
2904static __inline__ __m512d __DEFAULT_FN_ATTRS512
2905_mm512_mask3_fmaddsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
2906{
2907  return (__m512d) __builtin_ia32_vfmaddsubpd512_mask3 ((__v8df) __A,
2908                                                       (__v8df) __B,
2909                                                       (__v8df) __C,
2910                                                       (__mmask8) __U,
2911                                                       _MM_FROUND_CUR_DIRECTION);
2912}
2913
2914static __inline__ __m512d __DEFAULT_FN_ATTRS512
2915_mm512_maskz_fmaddsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
2916{
2917  return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
2918                                                       (__v8df) __B,
2919                                                       (__v8df) __C,
2920                                                       (__mmask8) __U,
2921                                                       _MM_FROUND_CUR_DIRECTION);
2922}
2923
2924static __inline__ __m512d __DEFAULT_FN_ATTRS512
2925_mm512_fmsubadd_pd(__m512d __A, __m512d __B, __m512d __C)
2926{
2927  return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
2928                                                       (__v8df) __B,
2929                                                       -(__v8df) __C,
2930                                                       (__mmask8) -1,
2931                                                       _MM_FROUND_CUR_DIRECTION);
2932}
2933
2934static __inline__ __m512d __DEFAULT_FN_ATTRS512
2935_mm512_mask_fmsubadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
2936{
2937  return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
2938                                                       (__v8df) __B,
2939                                                       -(__v8df) __C,
2940                                                       (__mmask8) __U,
2941                                                       _MM_FROUND_CUR_DIRECTION);
2942}
2943
2944static __inline__ __m512d __DEFAULT_FN_ATTRS512
2945_mm512_maskz_fmsubadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
2946{
2947  return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
2948                                                        (__v8df) __B,
2949                                                        -(__v8df) __C,
2950                                                        (__mmask8) __U,
2951                                                        _MM_FROUND_CUR_DIRECTION);
2952}
2953
2954#define _mm512_fmaddsub_round_ps(A, B, C, R) \
2955  (__m512)__builtin_ia32_vfmaddsubps512_mask((__v16sf)(__m512)(A), \
2956                                             (__v16sf)(__m512)(B), \
2957                                             (__v16sf)(__m512)(C), \
2958                                             (__mmask16)-1, (int)(R))
2959
2960
2961#define _mm512_mask_fmaddsub_round_ps(A, U, B, C, R) \
2962  (__m512)__builtin_ia32_vfmaddsubps512_mask((__v16sf)(__m512)(A), \
2963                                             (__v16sf)(__m512)(B), \
2964                                             (__v16sf)(__m512)(C), \
2965                                             (__mmask16)(U), (int)(R))
2966
2967
2968#define _mm512_mask3_fmaddsub_round_ps(A, B, C, U, R) \
2969  (__m512)__builtin_ia32_vfmaddsubps512_mask3((__v16sf)(__m512)(A), \
2970                                              (__v16sf)(__m512)(B), \
2971                                              (__v16sf)(__m512)(C), \
2972                                              (__mmask16)(U), (int)(R))
2973
2974
2975#define _mm512_maskz_fmaddsub_round_ps(U, A, B, C, R) \
2976  (__m512)__builtin_ia32_vfmaddsubps512_maskz((__v16sf)(__m512)(A), \
2977                                              (__v16sf)(__m512)(B), \
2978                                              (__v16sf)(__m512)(C), \
2979                                              (__mmask16)(U), (int)(R))
2980
2981
2982#define _mm512_fmsubadd_round_ps(A, B, C, R) \
2983  (__m512)__builtin_ia32_vfmaddsubps512_mask((__v16sf)(__m512)(A), \
2984                                             (__v16sf)(__m512)(B), \
2985                                             -(__v16sf)(__m512)(C), \
2986                                             (__mmask16)-1, (int)(R))
2987
2988
2989#define _mm512_mask_fmsubadd_round_ps(A, U, B, C, R) \
2990  (__m512)__builtin_ia32_vfmaddsubps512_mask((__v16sf)(__m512)(A), \
2991                                             (__v16sf)(__m512)(B), \
2992                                             -(__v16sf)(__m512)(C), \
2993                                             (__mmask16)(U), (int)(R))
2994
2995
2996#define _mm512_maskz_fmsubadd_round_ps(U, A, B, C, R) \
2997  (__m512)__builtin_ia32_vfmaddsubps512_maskz((__v16sf)(__m512)(A), \
2998                                              (__v16sf)(__m512)(B), \
2999                                              -(__v16sf)(__m512)(C), \
3000                                              (__mmask16)(U), (int)(R))
3001
3002
3003static __inline__ __m512 __DEFAULT_FN_ATTRS512
3004_mm512_fmaddsub_ps(__m512 __A, __m512 __B, __m512 __C)
3005{
3006  return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
3007                                                      (__v16sf) __B,
3008                                                      (__v16sf) __C,
3009                                                      (__mmask16) -1,
3010                                                      _MM_FROUND_CUR_DIRECTION);
3011}
3012
3013static __inline__ __m512 __DEFAULT_FN_ATTRS512
3014_mm512_mask_fmaddsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
3015{
3016  return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
3017                                                      (__v16sf) __B,
3018                                                      (__v16sf) __C,
3019                                                      (__mmask16) __U,
3020                                                      _MM_FROUND_CUR_DIRECTION);
3021}
3022
3023static __inline__ __m512 __DEFAULT_FN_ATTRS512
3024_mm512_mask3_fmaddsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
3025{
3026  return (__m512) __builtin_ia32_vfmaddsubps512_mask3 ((__v16sf) __A,
3027                                                       (__v16sf) __B,
3028                                                       (__v16sf) __C,
3029                                                       (__mmask16) __U,
3030                                                       _MM_FROUND_CUR_DIRECTION);
3031}
3032
3033static __inline__ __m512 __DEFAULT_FN_ATTRS512
3034_mm512_maskz_fmaddsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
3035{
3036  return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
3037                                                       (__v16sf) __B,
3038                                                       (__v16sf) __C,
3039                                                       (__mmask16) __U,
3040                                                       _MM_FROUND_CUR_DIRECTION);
3041}
3042
3043static __inline__ __m512 __DEFAULT_FN_ATTRS512
3044_mm512_fmsubadd_ps(__m512 __A, __m512 __B, __m512 __C)
3045{
3046  return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
3047                                                      (__v16sf) __B,
3048                                                      -(__v16sf) __C,
3049                                                      (__mmask16) -1,
3050                                                      _MM_FROUND_CUR_DIRECTION);
3051}
3052
3053static __inline__ __m512 __DEFAULT_FN_ATTRS512
3054_mm512_mask_fmsubadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
3055{
3056  return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
3057                                                      (__v16sf) __B,
3058                                                      -(__v16sf) __C,
3059                                                      (__mmask16) __U,
3060                                                      _MM_FROUND_CUR_DIRECTION);
3061}
3062
3063static __inline__ __m512 __DEFAULT_FN_ATTRS512
3064_mm512_maskz_fmsubadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
3065{
3066  return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
3067                                                       (__v16sf) __B,
3068                                                       -(__v16sf) __C,
3069                                                       (__mmask16) __U,
3070                                                       _MM_FROUND_CUR_DIRECTION);
3071}
3072
3073#define _mm512_mask3_fmsub_round_pd(A, B, C, U, R) \
3074  (__m512d)__builtin_ia32_vfmsubpd512_mask3((__v8df)(__m512d)(A), \
3075                                            (__v8df)(__m512d)(B), \
3076                                            (__v8df)(__m512d)(C), \
3077                                            (__mmask8)(U), (int)(R))
3078
3079
3080static __inline__ __m512d __DEFAULT_FN_ATTRS512
3081_mm512_mask3_fmsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
3082{
3083  return (__m512d)__builtin_ia32_vfmsubpd512_mask3 ((__v8df) __A,
3084                                                    (__v8df) __B,
3085                                                    (__v8df) __C,
3086                                                    (__mmask8) __U,
3087                                                    _MM_FROUND_CUR_DIRECTION);
3088}
3089
3090#define _mm512_mask3_fmsub_round_ps(A, B, C, U, R) \
3091  (__m512)__builtin_ia32_vfmsubps512_mask3((__v16sf)(__m512)(A), \
3092                                           (__v16sf)(__m512)(B), \
3093                                           (__v16sf)(__m512)(C), \
3094                                           (__mmask16)(U), (int)(R))
3095
3096static __inline__ __m512 __DEFAULT_FN_ATTRS512
3097_mm512_mask3_fmsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
3098{
3099  return (__m512)__builtin_ia32_vfmsubps512_mask3 ((__v16sf) __A,
3100                                                   (__v16sf) __B,
3101                                                   (__v16sf) __C,
3102                                                   (__mmask16) __U,
3103                                                   _MM_FROUND_CUR_DIRECTION);
3104}
3105
3106#define _mm512_mask3_fmsubadd_round_pd(A, B, C, U, R) \
3107  (__m512d)__builtin_ia32_vfmsubaddpd512_mask3((__v8df)(__m512d)(A), \
3108                                               (__v8df)(__m512d)(B), \
3109                                               (__v8df)(__m512d)(C), \
3110                                               (__mmask8)(U), (int)(R))
3111
3112
3113static __inline__ __m512d __DEFAULT_FN_ATTRS512
3114_mm512_mask3_fmsubadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
3115{
3116  return (__m512d)__builtin_ia32_vfmsubaddpd512_mask3 ((__v8df) __A,
3117                                                       (__v8df) __B,
3118                                                       (__v8df) __C,
3119                                                       (__mmask8) __U,
3120                                                       _MM_FROUND_CUR_DIRECTION);
3121}
3122
3123#define _mm512_mask3_fmsubadd_round_ps(A, B, C, U, R) \
3124  (__m512)__builtin_ia32_vfmsubaddps512_mask3((__v16sf)(__m512)(A), \
3125                                              (__v16sf)(__m512)(B), \
3126                                              (__v16sf)(__m512)(C), \
3127                                              (__mmask16)(U), (int)(R))
3128
3129
3130static __inline__ __m512 __DEFAULT_FN_ATTRS512
3131_mm512_mask3_fmsubadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
3132{
3133  return (__m512)__builtin_ia32_vfmsubaddps512_mask3 ((__v16sf) __A,
3134                                                      (__v16sf) __B,
3135                                                      (__v16sf) __C,
3136                                                      (__mmask16) __U,
3137                                                      _MM_FROUND_CUR_DIRECTION);
3138}
3139
3140#define _mm512_mask_fnmadd_round_pd(A, U, B, C, R) \
3141  (__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \
3142                                           -(__v8df)(__m512d)(B), \
3143                                           (__v8df)(__m512d)(C), \
3144                                           (__mmask8)(U), (int)(R))
3145
3146
3147static __inline__ __m512d __DEFAULT_FN_ATTRS512
3148_mm512_mask_fnmadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
3149{
3150  return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
3151                                                    -(__v8df) __B,
3152                                                    (__v8df) __C,
3153                                                    (__mmask8) __U,
3154                                                    _MM_FROUND_CUR_DIRECTION);
3155}
3156
3157#define _mm512_mask_fnmadd_round_ps(A, U, B, C, R) \
3158  (__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
3159                                          -(__v16sf)(__m512)(B), \
3160                                          (__v16sf)(__m512)(C), \
3161                                          (__mmask16)(U), (int)(R))
3162
3163
3164static __inline__ __m512 __DEFAULT_FN_ATTRS512
3165_mm512_mask_fnmadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
3166{
3167  return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
3168                                                   -(__v16sf) __B,
3169                                                   (__v16sf) __C,
3170                                                   (__mmask16) __U,
3171                                                   _MM_FROUND_CUR_DIRECTION);
3172}
3173
3174#define _mm512_mask_fnmsub_round_pd(A, U, B, C, R) \
3175  (__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \
3176                                           -(__v8df)(__m512d)(B), \
3177                                           -(__v8df)(__m512d)(C), \
3178                                           (__mmask8)(U), (int)(R))
3179
3180
3181#define _mm512_mask3_fnmsub_round_pd(A, B, C, U, R) \
3182  (__m512d)__builtin_ia32_vfmsubpd512_mask3(-(__v8df)(__m512d)(A), \
3183                                            (__v8df)(__m512d)(B), \
3184                                            (__v8df)(__m512d)(C), \
3185                                            (__mmask8)(U), (int)(R))
3186
3187
3188static __inline__ __m512d __DEFAULT_FN_ATTRS512
3189_mm512_mask_fnmsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
3190{
3191  return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
3192                                                    -(__v8df) __B,
3193                                                    -(__v8df) __C,
3194                                                    (__mmask8) __U,
3195                                                    _MM_FROUND_CUR_DIRECTION);
3196}
3197
3198static __inline__ __m512d __DEFAULT_FN_ATTRS512
3199_mm512_mask3_fnmsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
3200{
3201  return (__m512d) __builtin_ia32_vfmsubpd512_mask3 (-(__v8df) __A,
3202                                                     (__v8df) __B,
3203                                                     (__v8df) __C,
3204                                                     (__mmask8) __U,
3205                                                     _MM_FROUND_CUR_DIRECTION);
3206}
3207
3208#define _mm512_mask_fnmsub_round_ps(A, U, B, C, R) \
3209  (__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
3210                                          -(__v16sf)(__m512)(B), \
3211                                          -(__v16sf)(__m512)(C), \
3212                                          (__mmask16)(U), (int)(R))
3213
3214
3215#define _mm512_mask3_fnmsub_round_ps(A, B, C, U, R) \
3216  (__m512)__builtin_ia32_vfmsubps512_mask3(-(__v16sf)(__m512)(A), \
3217                                           (__v16sf)(__m512)(B), \
3218                                           (__v16sf)(__m512)(C), \
3219                                           (__mmask16)(U), (int)(R))
3220
3221
3222static __inline__ __m512 __DEFAULT_FN_ATTRS512
3223_mm512_mask_fnmsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
3224{
3225  return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
3226                                                   -(__v16sf) __B,
3227                                                   -(__v16sf) __C,
3228                                                   (__mmask16) __U,
3229                                                   _MM_FROUND_CUR_DIRECTION);
3230}
3231
3232static __inline__ __m512 __DEFAULT_FN_ATTRS512
3233_mm512_mask3_fnmsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
3234{
3235  return (__m512) __builtin_ia32_vfmsubps512_mask3 (-(__v16sf) __A,
3236                                                    (__v16sf) __B,
3237                                                    (__v16sf) __C,
3238                                                    (__mmask16) __U,
3239                                                    _MM_FROUND_CUR_DIRECTION);
3240}
3241
3242
3243
3244/* Vector permutations */
3245
3246static __inline __m512i __DEFAULT_FN_ATTRS512
3247_mm512_permutex2var_epi32(__m512i __A, __m512i __I, __m512i __B)
3248{
3249  return (__m512i)__builtin_ia32_vpermi2vard512((__v16si)__A, (__v16si) __I,
3250                                                (__v16si) __B);
3251}
3252
3253static __inline__ __m512i __DEFAULT_FN_ATTRS512
3254_mm512_mask_permutex2var_epi32(__m512i __A, __mmask16 __U, __m512i __I,
3255                               __m512i __B)
3256{
3257  return (__m512i)__builtin_ia32_selectd_512(__U,
3258                              (__v16si)_mm512_permutex2var_epi32(__A, __I, __B),
3259                              (__v16si)__A);
3260}
3261
3262static __inline__ __m512i __DEFAULT_FN_ATTRS512
3263_mm512_mask2_permutex2var_epi32(__m512i __A, __m512i __I, __mmask16 __U,
3264                                __m512i __B)
3265{
3266  return (__m512i)__builtin_ia32_selectd_512(__U,
3267                              (__v16si)_mm512_permutex2var_epi32(__A, __I, __B),
3268                              (__v16si)__I);
3269}
3270
3271static __inline__ __m512i __DEFAULT_FN_ATTRS512
3272_mm512_maskz_permutex2var_epi32(__mmask16 __U, __m512i __A, __m512i __I,
3273                                __m512i __B)
3274{
3275  return (__m512i)__builtin_ia32_selectd_512(__U,
3276                              (__v16si)_mm512_permutex2var_epi32(__A, __I, __B),
3277                              (__v16si)_mm512_setzero_si512());
3278}
3279
3280static __inline __m512i __DEFAULT_FN_ATTRS512
3281_mm512_permutex2var_epi64(__m512i __A, __m512i __I, __m512i __B)
3282{
3283  return (__m512i)__builtin_ia32_vpermi2varq512((__v8di)__A, (__v8di) __I,
3284                                                (__v8di) __B);
3285}
3286
3287static __inline__ __m512i __DEFAULT_FN_ATTRS512
3288_mm512_mask_permutex2var_epi64(__m512i __A, __mmask8 __U, __m512i __I,
3289                               __m512i __B)
3290{
3291  return (__m512i)__builtin_ia32_selectq_512(__U,
3292                               (__v8di)_mm512_permutex2var_epi64(__A, __I, __B),
3293                               (__v8di)__A);
3294}
3295
3296static __inline__ __m512i __DEFAULT_FN_ATTRS512
3297_mm512_mask2_permutex2var_epi64(__m512i __A, __m512i __I, __mmask8 __U,
3298                                __m512i __B)
3299{
3300  return (__m512i)__builtin_ia32_selectq_512(__U,
3301                               (__v8di)_mm512_permutex2var_epi64(__A, __I, __B),
3302                               (__v8di)__I);
3303}
3304
3305static __inline__ __m512i __DEFAULT_FN_ATTRS512
3306_mm512_maskz_permutex2var_epi64(__mmask8 __U, __m512i __A, __m512i __I,
3307                                __m512i __B)
3308{
3309  return (__m512i)__builtin_ia32_selectq_512(__U,
3310                               (__v8di)_mm512_permutex2var_epi64(__A, __I, __B),
3311                               (__v8di)_mm512_setzero_si512());
3312}
3313
3314#define _mm512_alignr_epi64(A, B, I) \
3315  (__m512i)__builtin_ia32_alignq512((__v8di)(__m512i)(A), \
3316                                    (__v8di)(__m512i)(B), (int)(I))
3317
3318#define _mm512_mask_alignr_epi64(W, U, A, B, imm) \
3319  (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
3320                                 (__v8di)_mm512_alignr_epi64((A), (B), (imm)), \
3321                                 (__v8di)(__m512i)(W))
3322
3323#define _mm512_maskz_alignr_epi64(U, A, B, imm) \
3324  (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
3325                                 (__v8di)_mm512_alignr_epi64((A), (B), (imm)), \
3326                                 (__v8di)_mm512_setzero_si512())
3327
3328#define _mm512_alignr_epi32(A, B, I) \
3329  (__m512i)__builtin_ia32_alignd512((__v16si)(__m512i)(A), \
3330                                    (__v16si)(__m512i)(B), (int)(I))
3331
3332#define _mm512_mask_alignr_epi32(W, U, A, B, imm) \
3333  (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
3334                                (__v16si)_mm512_alignr_epi32((A), (B), (imm)), \
3335                                (__v16si)(__m512i)(W))
3336
3337#define _mm512_maskz_alignr_epi32(U, A, B, imm) \
3338  (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
3339                                (__v16si)_mm512_alignr_epi32((A), (B), (imm)), \
3340                                (__v16si)_mm512_setzero_si512())
3341/* Vector Extract */
3342
3343#define _mm512_extractf64x4_pd(A, I) \
3344  (__m256d)__builtin_ia32_extractf64x4_mask((__v8df)(__m512d)(A), (int)(I), \
3345                                            (__v4df)_mm256_undefined_pd(), \
3346                                            (__mmask8)-1)
3347
3348#define _mm512_mask_extractf64x4_pd(W, U, A, imm) \
3349  (__m256d)__builtin_ia32_extractf64x4_mask((__v8df)(__m512d)(A), (int)(imm), \
3350                                            (__v4df)(__m256d)(W), \
3351                                            (__mmask8)(U))
3352
3353#define _mm512_maskz_extractf64x4_pd(U, A, imm) \
3354  (__m256d)__builtin_ia32_extractf64x4_mask((__v8df)(__m512d)(A), (int)(imm), \
3355                                            (__v4df)_mm256_setzero_pd(), \
3356                                            (__mmask8)(U))
3357
3358#define _mm512_extractf32x4_ps(A, I) \
3359  (__m128)__builtin_ia32_extractf32x4_mask((__v16sf)(__m512)(A), (int)(I), \
3360                                           (__v4sf)_mm_undefined_ps(), \
3361                                           (__mmask8)-1)
3362
3363#define _mm512_mask_extractf32x4_ps(W, U, A, imm) \
3364  (__m128)__builtin_ia32_extractf32x4_mask((__v16sf)(__m512)(A), (int)(imm), \
3365                                           (__v4sf)(__m128)(W), \
3366                                           (__mmask8)(U))
3367
3368#define _mm512_maskz_extractf32x4_ps(U, A, imm) \
3369  (__m128)__builtin_ia32_extractf32x4_mask((__v16sf)(__m512)(A), (int)(imm), \
3370                                           (__v4sf)_mm_setzero_ps(), \
3371                                           (__mmask8)(U))
3372
3373/* Vector Blend */
3374
3375static __inline __m512d __DEFAULT_FN_ATTRS512
3376_mm512_mask_blend_pd(__mmask8 __U, __m512d __A, __m512d __W)
3377{
3378  return (__m512d) __builtin_ia32_selectpd_512 ((__mmask8) __U,
3379                 (__v8df) __W,
3380                 (__v8df) __A);
3381}
3382
3383static __inline __m512 __DEFAULT_FN_ATTRS512
3384_mm512_mask_blend_ps(__mmask16 __U, __m512 __A, __m512 __W)
3385{
3386  return (__m512) __builtin_ia32_selectps_512 ((__mmask16) __U,
3387                (__v16sf) __W,
3388                (__v16sf) __A);
3389}
3390
3391static __inline __m512i __DEFAULT_FN_ATTRS512
3392_mm512_mask_blend_epi64(__mmask8 __U, __m512i __A, __m512i __W)
3393{
3394  return (__m512i) __builtin_ia32_selectq_512 ((__mmask8) __U,
3395                (__v8di) __W,
3396                (__v8di) __A);
3397}
3398
3399static __inline __m512i __DEFAULT_FN_ATTRS512
3400_mm512_mask_blend_epi32(__mmask16 __U, __m512i __A, __m512i __W)
3401{
3402  return (__m512i) __builtin_ia32_selectd_512 ((__mmask16) __U,
3403                (__v16si) __W,
3404                (__v16si) __A);
3405}
3406
3407/* Compare */
3408
3409#define _mm512_cmp_round_ps_mask(A, B, P, R) \
3410  (__mmask16)__builtin_ia32_cmpps512_mask((__v16sf)(__m512)(A), \
3411                                          (__v16sf)(__m512)(B), (int)(P), \
3412                                          (__mmask16)-1, (int)(R))
3413
3414#define _mm512_mask_cmp_round_ps_mask(U, A, B, P, R) \
3415  (__mmask16)__builtin_ia32_cmpps512_mask((__v16sf)(__m512)(A), \
3416                                          (__v16sf)(__m512)(B), (int)(P), \
3417                                          (__mmask16)(U), (int)(R))
3418
3419#define _mm512_cmp_ps_mask(A, B, P) \
3420  _mm512_cmp_round_ps_mask((A), (B), (P), _MM_FROUND_CUR_DIRECTION)
3421#define _mm512_mask_cmp_ps_mask(U, A, B, P) \
3422  _mm512_mask_cmp_round_ps_mask((U), (A), (B), (P), _MM_FROUND_CUR_DIRECTION)
3423
3424#define _mm512_cmpeq_ps_mask(A, B) \
3425    _mm512_cmp_ps_mask((A), (B), _CMP_EQ_OQ)
3426#define _mm512_mask_cmpeq_ps_mask(k, A, B) \
3427    _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_EQ_OQ)
3428
3429#define _mm512_cmplt_ps_mask(A, B) \
3430    _mm512_cmp_ps_mask((A), (B), _CMP_LT_OS)
3431#define _mm512_mask_cmplt_ps_mask(k, A, B) \
3432    _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_LT_OS)
3433
3434#define _mm512_cmple_ps_mask(A, B) \
3435    _mm512_cmp_ps_mask((A), (B), _CMP_LE_OS)
3436#define _mm512_mask_cmple_ps_mask(k, A, B) \
3437    _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_LE_OS)
3438
3439#define _mm512_cmpunord_ps_mask(A, B) \
3440    _mm512_cmp_ps_mask((A), (B), _CMP_UNORD_Q)
3441#define _mm512_mask_cmpunord_ps_mask(k, A, B) \
3442    _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_UNORD_Q)
3443
3444#define _mm512_cmpneq_ps_mask(A, B) \
3445    _mm512_cmp_ps_mask((A), (B), _CMP_NEQ_UQ)
3446#define _mm512_mask_cmpneq_ps_mask(k, A, B) \
3447    _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_NEQ_UQ)
3448
3449#define _mm512_cmpnlt_ps_mask(A, B) \
3450    _mm512_cmp_ps_mask((A), (B), _CMP_NLT_US)
3451#define _mm512_mask_cmpnlt_ps_mask(k, A, B) \
3452    _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_NLT_US)
3453
3454#define _mm512_cmpnle_ps_mask(A, B) \
3455    _mm512_cmp_ps_mask((A), (B), _CMP_NLE_US)
3456#define _mm512_mask_cmpnle_ps_mask(k, A, B) \
3457    _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_NLE_US)
3458
3459#define _mm512_cmpord_ps_mask(A, B) \
3460    _mm512_cmp_ps_mask((A), (B), _CMP_ORD_Q)
3461#define _mm512_mask_cmpord_ps_mask(k, A, B) \
3462    _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_ORD_Q)
3463
3464#define _mm512_cmp_round_pd_mask(A, B, P, R) \
3465  (__mmask8)__builtin_ia32_cmppd512_mask((__v8df)(__m512d)(A), \
3466                                         (__v8df)(__m512d)(B), (int)(P), \
3467                                         (__mmask8)-1, (int)(R))
3468
3469#define _mm512_mask_cmp_round_pd_mask(U, A, B, P, R) \
3470  (__mmask8)__builtin_ia32_cmppd512_mask((__v8df)(__m512d)(A), \
3471                                         (__v8df)(__m512d)(B), (int)(P), \
3472                                         (__mmask8)(U), (int)(R))
3473
3474#define _mm512_cmp_pd_mask(A, B, P) \
3475  _mm512_cmp_round_pd_mask((A), (B), (P), _MM_FROUND_CUR_DIRECTION)
3476#define _mm512_mask_cmp_pd_mask(U, A, B, P) \
3477  _mm512_mask_cmp_round_pd_mask((U), (A), (B), (P), _MM_FROUND_CUR_DIRECTION)
3478
3479#define _mm512_cmpeq_pd_mask(A, B) \
3480    _mm512_cmp_pd_mask((A), (B), _CMP_EQ_OQ)
3481#define _mm512_mask_cmpeq_pd_mask(k, A, B) \
3482    _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_EQ_OQ)
3483
3484#define _mm512_cmplt_pd_mask(A, B) \
3485    _mm512_cmp_pd_mask((A), (B), _CMP_LT_OS)
3486#define _mm512_mask_cmplt_pd_mask(k, A, B) \
3487    _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_LT_OS)
3488
3489#define _mm512_cmple_pd_mask(A, B) \
3490    _mm512_cmp_pd_mask((A), (B), _CMP_LE_OS)
3491#define _mm512_mask_cmple_pd_mask(k, A, B) \
3492    _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_LE_OS)
3493
3494#define _mm512_cmpunord_pd_mask(A, B) \
3495    _mm512_cmp_pd_mask((A), (B), _CMP_UNORD_Q)
3496#define _mm512_mask_cmpunord_pd_mask(k, A, B) \
3497    _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_UNORD_Q)
3498
3499#define _mm512_cmpneq_pd_mask(A, B) \
3500    _mm512_cmp_pd_mask((A), (B), _CMP_NEQ_UQ)
3501#define _mm512_mask_cmpneq_pd_mask(k, A, B) \
3502    _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_NEQ_UQ)
3503
3504#define _mm512_cmpnlt_pd_mask(A, B) \
3505    _mm512_cmp_pd_mask((A), (B), _CMP_NLT_US)
3506#define _mm512_mask_cmpnlt_pd_mask(k, A, B) \
3507    _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_NLT_US)
3508
3509#define _mm512_cmpnle_pd_mask(A, B) \
3510    _mm512_cmp_pd_mask((A), (B), _CMP_NLE_US)
3511#define _mm512_mask_cmpnle_pd_mask(k, A, B) \
3512    _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_NLE_US)
3513
3514#define _mm512_cmpord_pd_mask(A, B) \
3515    _mm512_cmp_pd_mask((A), (B), _CMP_ORD_Q)
3516#define _mm512_mask_cmpord_pd_mask(k, A, B) \
3517    _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_ORD_Q)
3518
3519/* Conversion */
3520
3521#define _mm512_cvtt_roundps_epu32(A, R) \
3522  (__m512i)__builtin_ia32_cvttps2udq512_mask((__v16sf)(__m512)(A), \
3523                                             (__v16si)_mm512_undefined_epi32(), \
3524                                             (__mmask16)-1, (int)(R))
3525
3526#define _mm512_mask_cvtt_roundps_epu32(W, U, A, R) \
3527  (__m512i)__builtin_ia32_cvttps2udq512_mask((__v16sf)(__m512)(A), \
3528                                             (__v16si)(__m512i)(W), \
3529                                             (__mmask16)(U), (int)(R))
3530
3531#define _mm512_maskz_cvtt_roundps_epu32(U, A, R) \
3532  (__m512i)__builtin_ia32_cvttps2udq512_mask((__v16sf)(__m512)(A), \
3533                                             (__v16si)_mm512_setzero_si512(), \
3534                                             (__mmask16)(U), (int)(R))
3535
3536
3537static __inline __m512i __DEFAULT_FN_ATTRS512
3538_mm512_cvttps_epu32(__m512 __A)
3539{
3540  return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
3541                  (__v16si)
3542                  _mm512_setzero_si512 (),
3543                  (__mmask16) -1,
3544                  _MM_FROUND_CUR_DIRECTION);
3545}
3546
3547static __inline__ __m512i __DEFAULT_FN_ATTRS512
3548_mm512_mask_cvttps_epu32 (__m512i __W, __mmask16 __U, __m512 __A)
3549{
3550  return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
3551                   (__v16si) __W,
3552                   (__mmask16) __U,
3553                   _MM_FROUND_CUR_DIRECTION);
3554}
3555
3556static __inline__ __m512i __DEFAULT_FN_ATTRS512
3557_mm512_maskz_cvttps_epu32 (__mmask16 __U, __m512 __A)
3558{
3559  return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
3560                   (__v16si) _mm512_setzero_si512 (),
3561                   (__mmask16) __U,
3562                   _MM_FROUND_CUR_DIRECTION);
3563}
3564
3565#define _mm512_cvt_roundepi32_ps(A, R) \
3566  (__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(__m512i)(A), \
3567                                          (__v16sf)_mm512_setzero_ps(), \
3568                                          (__mmask16)-1, (int)(R))
3569
3570#define _mm512_mask_cvt_roundepi32_ps(W, U, A, R) \
3571  (__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(__m512i)(A), \
3572                                          (__v16sf)(__m512)(W), \
3573                                          (__mmask16)(U), (int)(R))
3574
3575#define _mm512_maskz_cvt_roundepi32_ps(U, A, R) \
3576  (__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(__m512i)(A), \
3577                                          (__v16sf)_mm512_setzero_ps(), \
3578                                          (__mmask16)(U), (int)(R))
3579
3580#define _mm512_cvt_roundepu32_ps(A, R) \
3581  (__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(__m512i)(A), \
3582                                           (__v16sf)_mm512_setzero_ps(), \
3583                                           (__mmask16)-1, (int)(R))
3584
3585#define _mm512_mask_cvt_roundepu32_ps(W, U, A, R) \
3586  (__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(__m512i)(A), \
3587                                           (__v16sf)(__m512)(W), \
3588                                           (__mmask16)(U), (int)(R))
3589
3590#define _mm512_maskz_cvt_roundepu32_ps(U, A, R) \
3591  (__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(__m512i)(A), \
3592                                           (__v16sf)_mm512_setzero_ps(), \
3593                                           (__mmask16)(U), (int)(R))
3594
3595static __inline__ __m512 __DEFAULT_FN_ATTRS512
3596_mm512_cvtepu32_ps (__m512i __A)
3597{
3598  return (__m512)__builtin_convertvector((__v16su)__A, __v16sf);
3599}
3600
3601static __inline__ __m512 __DEFAULT_FN_ATTRS512
3602_mm512_mask_cvtepu32_ps (__m512 __W, __mmask16 __U, __m512i __A)
3603{
3604  return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
3605                                             (__v16sf)_mm512_cvtepu32_ps(__A),
3606                                             (__v16sf)__W);
3607}
3608
3609static __inline__ __m512 __DEFAULT_FN_ATTRS512
3610_mm512_maskz_cvtepu32_ps (__mmask16 __U, __m512i __A)
3611{
3612  return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
3613                                             (__v16sf)_mm512_cvtepu32_ps(__A),
3614                                             (__v16sf)_mm512_setzero_ps());
3615}
3616
3617static __inline __m512d __DEFAULT_FN_ATTRS512
3618_mm512_cvtepi32_pd(__m256i __A)
3619{
3620  return (__m512d)__builtin_convertvector((__v8si)__A, __v8df);
3621}
3622
3623static __inline__ __m512d __DEFAULT_FN_ATTRS512
3624_mm512_mask_cvtepi32_pd (__m512d __W, __mmask8 __U, __m256i __A)
3625{
3626  return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
3627                                              (__v8df)_mm512_cvtepi32_pd(__A),
3628                                              (__v8df)__W);
3629}
3630
3631static __inline__ __m512d __DEFAULT_FN_ATTRS512
3632_mm512_maskz_cvtepi32_pd (__mmask8 __U, __m256i __A)
3633{
3634  return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
3635                                              (__v8df)_mm512_cvtepi32_pd(__A),
3636                                              (__v8df)_mm512_setzero_pd());
3637}
3638
3639static __inline__ __m512d __DEFAULT_FN_ATTRS512
3640_mm512_cvtepi32lo_pd(__m512i __A)
3641{
3642  return (__m512d) _mm512_cvtepi32_pd(_mm512_castsi512_si256(__A));
3643}
3644
3645static __inline__ __m512d __DEFAULT_FN_ATTRS512
3646_mm512_mask_cvtepi32lo_pd(__m512d __W, __mmask8 __U,__m512i __A)
3647{
3648  return (__m512d) _mm512_mask_cvtepi32_pd(__W, __U, _mm512_castsi512_si256(__A));
3649}
3650
3651static __inline__ __m512 __DEFAULT_FN_ATTRS512
3652_mm512_cvtepi32_ps (__m512i __A)
3653{
3654  return (__m512)__builtin_convertvector((__v16si)__A, __v16sf);
3655}
3656
3657static __inline__ __m512 __DEFAULT_FN_ATTRS512
3658_mm512_mask_cvtepi32_ps (__m512 __W, __mmask16 __U, __m512i __A)
3659{
3660  return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
3661                                             (__v16sf)_mm512_cvtepi32_ps(__A),
3662                                             (__v16sf)__W);
3663}
3664
3665static __inline__ __m512 __DEFAULT_FN_ATTRS512
3666_mm512_maskz_cvtepi32_ps (__mmask16 __U, __m512i __A)
3667{
3668  return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
3669                                             (__v16sf)_mm512_cvtepi32_ps(__A),
3670                                             (__v16sf)_mm512_setzero_ps());
3671}
3672
3673static __inline __m512d __DEFAULT_FN_ATTRS512
3674_mm512_cvtepu32_pd(__m256i __A)
3675{
3676  return (__m512d)__builtin_convertvector((__v8su)__A, __v8df);
3677}
3678
3679static __inline__ __m512d __DEFAULT_FN_ATTRS512
3680_mm512_mask_cvtepu32_pd (__m512d __W, __mmask8 __U, __m256i __A)
3681{
3682  return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
3683                                              (__v8df)_mm512_cvtepu32_pd(__A),
3684                                              (__v8df)__W);
3685}
3686
3687static __inline__ __m512d __DEFAULT_FN_ATTRS512
3688_mm512_maskz_cvtepu32_pd (__mmask8 __U, __m256i __A)
3689{
3690  return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
3691                                              (__v8df)_mm512_cvtepu32_pd(__A),
3692                                              (__v8df)_mm512_setzero_pd());
3693}
3694
3695static __inline__ __m512d __DEFAULT_FN_ATTRS512
3696_mm512_cvtepu32lo_pd(__m512i __A)
3697{
3698  return (__m512d) _mm512_cvtepu32_pd(_mm512_castsi512_si256(__A));
3699}
3700
3701static __inline__ __m512d __DEFAULT_FN_ATTRS512
3702_mm512_mask_cvtepu32lo_pd(__m512d __W, __mmask8 __U,__m512i __A)
3703{
3704  return (__m512d) _mm512_mask_cvtepu32_pd(__W, __U, _mm512_castsi512_si256(__A));
3705}
3706
3707#define _mm512_cvt_roundpd_ps(A, R) \
3708  (__m256)__builtin_ia32_cvtpd2ps512_mask((__v8df)(__m512d)(A), \
3709                                          (__v8sf)_mm256_setzero_ps(), \
3710                                          (__mmask8)-1, (int)(R))
3711
3712#define _mm512_mask_cvt_roundpd_ps(W, U, A, R) \
3713  (__m256)__builtin_ia32_cvtpd2ps512_mask((__v8df)(__m512d)(A), \
3714                                          (__v8sf)(__m256)(W), (__mmask8)(U), \
3715                                          (int)(R))
3716
3717#define _mm512_maskz_cvt_roundpd_ps(U, A, R) \
3718  (__m256)__builtin_ia32_cvtpd2ps512_mask((__v8df)(__m512d)(A), \
3719                                          (__v8sf)_mm256_setzero_ps(), \
3720                                          (__mmask8)(U), (int)(R))
3721
3722static __inline__ __m256 __DEFAULT_FN_ATTRS512
3723_mm512_cvtpd_ps (__m512d __A)
3724{
3725  return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
3726                (__v8sf) _mm256_undefined_ps (),
3727                (__mmask8) -1,
3728                _MM_FROUND_CUR_DIRECTION);
3729}
3730
3731static __inline__ __m256 __DEFAULT_FN_ATTRS512
3732_mm512_mask_cvtpd_ps (__m256 __W, __mmask8 __U, __m512d __A)
3733{
3734  return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
3735                (__v8sf) __W,
3736                (__mmask8) __U,
3737                _MM_FROUND_CUR_DIRECTION);
3738}
3739
3740static __inline__ __m256 __DEFAULT_FN_ATTRS512
3741_mm512_maskz_cvtpd_ps (__mmask8 __U, __m512d __A)
3742{
3743  return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
3744                (__v8sf) _mm256_setzero_ps (),
3745                (__mmask8) __U,
3746                _MM_FROUND_CUR_DIRECTION);
3747}
3748
3749static __inline__ __m512 __DEFAULT_FN_ATTRS512
3750_mm512_cvtpd_pslo (__m512d __A)
3751{
3752  return (__m512) __builtin_shufflevector((__v8sf) _mm512_cvtpd_ps(__A),
3753                (__v8sf) _mm256_setzero_ps (),
3754                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
3755}
3756
3757static __inline__ __m512 __DEFAULT_FN_ATTRS512
3758_mm512_mask_cvtpd_pslo (__m512 __W, __mmask8 __U,__m512d __A)
3759{
3760  return (__m512) __builtin_shufflevector (
3761                (__v8sf) _mm512_mask_cvtpd_ps (_mm512_castps512_ps256(__W),
3762                                               __U, __A),
3763                (__v8sf) _mm256_setzero_ps (),
3764                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
3765}
3766
3767#define _mm512_cvt_roundps_ph(A, I) \
3768  (__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \
3769                                            (__v16hi)_mm256_undefined_si256(), \
3770                                            (__mmask16)-1)
3771
3772#define _mm512_mask_cvt_roundps_ph(U, W, A, I) \
3773  (__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \
3774                                            (__v16hi)(__m256i)(U), \
3775                                            (__mmask16)(W))
3776
3777#define _mm512_maskz_cvt_roundps_ph(W, A, I) \
3778  (__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \
3779                                            (__v16hi)_mm256_setzero_si256(), \
3780                                            (__mmask16)(W))
3781
3782#define _mm512_cvtps_ph       _mm512_cvt_roundps_ph
3783#define _mm512_mask_cvtps_ph  _mm512_mask_cvt_roundps_ph
3784#define _mm512_maskz_cvtps_ph _mm512_maskz_cvt_roundps_ph
3785
3786#define _mm512_cvt_roundph_ps(A, R) \
3787  (__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(__m256i)(A), \
3788                                           (__v16sf)_mm512_undefined_ps(), \
3789                                           (__mmask16)-1, (int)(R))
3790
3791#define _mm512_mask_cvt_roundph_ps(W, U, A, R) \
3792  (__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(__m256i)(A), \
3793                                           (__v16sf)(__m512)(W), \
3794                                           (__mmask16)(U), (int)(R))
3795
3796#define _mm512_maskz_cvt_roundph_ps(U, A, R) \
3797  (__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(__m256i)(A), \
3798                                           (__v16sf)_mm512_setzero_ps(), \
3799                                           (__mmask16)(U), (int)(R))
3800
3801
3802static  __inline __m512 __DEFAULT_FN_ATTRS512
3803_mm512_cvtph_ps(__m256i __A)
3804{
3805  return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
3806                (__v16sf)
3807                _mm512_setzero_ps (),
3808                (__mmask16) -1,
3809                _MM_FROUND_CUR_DIRECTION);
3810}
3811
3812static __inline__ __m512 __DEFAULT_FN_ATTRS512
3813_mm512_mask_cvtph_ps (__m512 __W, __mmask16 __U, __m256i __A)
3814{
3815  return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
3816                 (__v16sf) __W,
3817                 (__mmask16) __U,
3818                 _MM_FROUND_CUR_DIRECTION);
3819}
3820
3821static __inline__ __m512 __DEFAULT_FN_ATTRS512
3822_mm512_maskz_cvtph_ps (__mmask16 __U, __m256i __A)
3823{
3824  return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
3825                 (__v16sf) _mm512_setzero_ps (),
3826                 (__mmask16) __U,
3827                 _MM_FROUND_CUR_DIRECTION);
3828}
3829
3830#define _mm512_cvtt_roundpd_epi32(A, R) \
3831  (__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df)(__m512d)(A), \
3832                                            (__v8si)_mm256_setzero_si256(), \
3833                                            (__mmask8)-1, (int)(R))
3834
3835#define _mm512_mask_cvtt_roundpd_epi32(W, U, A, R) \
3836  (__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df)(__m512d)(A), \
3837                                            (__v8si)(__m256i)(W), \
3838                                            (__mmask8)(U), (int)(R))
3839
3840#define _mm512_maskz_cvtt_roundpd_epi32(U, A, R) \
3841  (__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df)(__m512d)(A), \
3842                                            (__v8si)_mm256_setzero_si256(), \
3843                                            (__mmask8)(U), (int)(R))
3844
3845static __inline __m256i __DEFAULT_FN_ATTRS512
3846_mm512_cvttpd_epi32(__m512d __a)
3847{
3848  return (__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df) __a,
3849                                                   (__v8si)_mm256_setzero_si256(),
3850                                                   (__mmask8) -1,
3851                                                    _MM_FROUND_CUR_DIRECTION);
3852}
3853
3854static __inline__ __m256i __DEFAULT_FN_ATTRS512
3855_mm512_mask_cvttpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A)
3856{
3857  return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
3858                  (__v8si) __W,
3859                  (__mmask8) __U,
3860                  _MM_FROUND_CUR_DIRECTION);
3861}
3862
3863static __inline__ __m256i __DEFAULT_FN_ATTRS512
3864_mm512_maskz_cvttpd_epi32 (__mmask8 __U, __m512d __A)
3865{
3866  return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
3867                  (__v8si) _mm256_setzero_si256 (),
3868                  (__mmask8) __U,
3869                  _MM_FROUND_CUR_DIRECTION);
3870}
3871
3872#define _mm512_cvtt_roundps_epi32(A, R) \
3873  (__m512i)__builtin_ia32_cvttps2dq512_mask((__v16sf)(__m512)(A), \
3874                                            (__v16si)_mm512_setzero_si512(), \
3875                                            (__mmask16)-1, (int)(R))
3876
3877#define _mm512_mask_cvtt_roundps_epi32(W, U, A, R) \
3878  (__m512i)__builtin_ia32_cvttps2dq512_mask((__v16sf)(__m512)(A), \
3879                                            (__v16si)(__m512i)(W), \
3880                                            (__mmask16)(U), (int)(R))
3881
3882#define _mm512_maskz_cvtt_roundps_epi32(U, A, R) \
3883  (__m512i)__builtin_ia32_cvttps2dq512_mask((__v16sf)(__m512)(A), \
3884                                            (__v16si)_mm512_setzero_si512(), \
3885                                            (__mmask16)(U), (int)(R))
3886
3887static __inline __m512i __DEFAULT_FN_ATTRS512
3888_mm512_cvttps_epi32(__m512 __a)
3889{
3890  return (__m512i)
3891    __builtin_ia32_cvttps2dq512_mask((__v16sf) __a,
3892                                     (__v16si) _mm512_setzero_si512 (),
3893                                     (__mmask16) -1, _MM_FROUND_CUR_DIRECTION);
3894}
3895
3896static __inline__ __m512i __DEFAULT_FN_ATTRS512
3897_mm512_mask_cvttps_epi32 (__m512i __W, __mmask16 __U, __m512 __A)
3898{
3899  return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
3900                  (__v16si) __W,
3901                  (__mmask16) __U,
3902                  _MM_FROUND_CUR_DIRECTION);
3903}
3904
3905static __inline__ __m512i __DEFAULT_FN_ATTRS512
3906_mm512_maskz_cvttps_epi32 (__mmask16 __U, __m512 __A)
3907{
3908  return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
3909                  (__v16si) _mm512_setzero_si512 (),
3910                  (__mmask16) __U,
3911                  _MM_FROUND_CUR_DIRECTION);
3912}
3913
3914#define _mm512_cvt_roundps_epi32(A, R) \
3915  (__m512i)__builtin_ia32_cvtps2dq512_mask((__v16sf)(__m512)(A), \
3916                                           (__v16si)_mm512_setzero_si512(), \
3917                                           (__mmask16)-1, (int)(R))
3918
3919#define _mm512_mask_cvt_roundps_epi32(W, U, A, R) \
3920  (__m512i)__builtin_ia32_cvtps2dq512_mask((__v16sf)(__m512)(A), \
3921                                           (__v16si)(__m512i)(W), \
3922                                           (__mmask16)(U), (int)(R))
3923
3924#define _mm512_maskz_cvt_roundps_epi32(U, A, R) \
3925  (__m512i)__builtin_ia32_cvtps2dq512_mask((__v16sf)(__m512)(A), \
3926                                           (__v16si)_mm512_setzero_si512(), \
3927                                           (__mmask16)(U), (int)(R))
3928
3929static __inline__ __m512i __DEFAULT_FN_ATTRS512
3930_mm512_cvtps_epi32 (__m512 __A)
3931{
3932  return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
3933                 (__v16si) _mm512_undefined_epi32 (),
3934                 (__mmask16) -1,
3935                 _MM_FROUND_CUR_DIRECTION);
3936}
3937
3938static __inline__ __m512i __DEFAULT_FN_ATTRS512
3939_mm512_mask_cvtps_epi32 (__m512i __W, __mmask16 __U, __m512 __A)
3940{
3941  return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
3942                 (__v16si) __W,
3943                 (__mmask16) __U,
3944                 _MM_FROUND_CUR_DIRECTION);
3945}
3946
3947static __inline__ __m512i __DEFAULT_FN_ATTRS512
3948_mm512_maskz_cvtps_epi32 (__mmask16 __U, __m512 __A)
3949{
3950  return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
3951                 (__v16si)
3952                 _mm512_setzero_si512 (),
3953                 (__mmask16) __U,
3954                 _MM_FROUND_CUR_DIRECTION);
3955}
3956
3957#define _mm512_cvt_roundpd_epi32(A, R) \
3958  (__m256i)__builtin_ia32_cvtpd2dq512_mask((__v8df)(__m512d)(A), \
3959                                           (__v8si)_mm256_setzero_si256(), \
3960                                           (__mmask8)-1, (int)(R))
3961
3962#define _mm512_mask_cvt_roundpd_epi32(W, U, A, R) \
3963  (__m256i)__builtin_ia32_cvtpd2dq512_mask((__v8df)(__m512d)(A), \
3964                                           (__v8si)(__m256i)(W), \
3965                                           (__mmask8)(U), (int)(R))
3966
3967#define _mm512_maskz_cvt_roundpd_epi32(U, A, R) \
3968  (__m256i)__builtin_ia32_cvtpd2dq512_mask((__v8df)(__m512d)(A), \
3969                                           (__v8si)_mm256_setzero_si256(), \
3970                                           (__mmask8)(U), (int)(R))
3971
3972static __inline__ __m256i __DEFAULT_FN_ATTRS512
3973_mm512_cvtpd_epi32 (__m512d __A)
3974{
3975  return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
3976                 (__v8si)
3977                 _mm256_undefined_si256 (),
3978                 (__mmask8) -1,
3979                 _MM_FROUND_CUR_DIRECTION);
3980}
3981
3982static __inline__ __m256i __DEFAULT_FN_ATTRS512
3983_mm512_mask_cvtpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A)
3984{
3985  return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
3986                 (__v8si) __W,
3987                 (__mmask8) __U,
3988                 _MM_FROUND_CUR_DIRECTION);
3989}
3990
3991static __inline__ __m256i __DEFAULT_FN_ATTRS512
3992_mm512_maskz_cvtpd_epi32 (__mmask8 __U, __m512d __A)
3993{
3994  return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
3995                 (__v8si)
3996                 _mm256_setzero_si256 (),
3997                 (__mmask8) __U,
3998                 _MM_FROUND_CUR_DIRECTION);
3999}
4000
4001#define _mm512_cvt_roundps_epu32(A, R) \
4002  (__m512i)__builtin_ia32_cvtps2udq512_mask((__v16sf)(__m512)(A), \
4003                                            (__v16si)_mm512_setzero_si512(), \
4004                                            (__mmask16)-1, (int)(R))
4005
4006#define _mm512_mask_cvt_roundps_epu32(W, U, A, R) \
4007  (__m512i)__builtin_ia32_cvtps2udq512_mask((__v16sf)(__m512)(A), \
4008                                            (__v16si)(__m512i)(W), \
4009                                            (__mmask16)(U), (int)(R))
4010
4011#define _mm512_maskz_cvt_roundps_epu32(U, A, R) \
4012  (__m512i)__builtin_ia32_cvtps2udq512_mask((__v16sf)(__m512)(A), \
4013                                            (__v16si)_mm512_setzero_si512(), \
4014                                            (__mmask16)(U), (int)(R))
4015
4016static __inline__ __m512i __DEFAULT_FN_ATTRS512
4017_mm512_cvtps_epu32 ( __m512 __A)
4018{
4019  return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,\
4020                  (__v16si)\
4021                  _mm512_undefined_epi32 (),
4022                  (__mmask16) -1,\
4023                  _MM_FROUND_CUR_DIRECTION);
4024}
4025
4026static __inline__ __m512i __DEFAULT_FN_ATTRS512
4027_mm512_mask_cvtps_epu32 (__m512i __W, __mmask16 __U, __m512 __A)
4028{
4029  return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
4030                  (__v16si) __W,
4031                  (__mmask16) __U,
4032                  _MM_FROUND_CUR_DIRECTION);
4033}
4034
4035static __inline__ __m512i __DEFAULT_FN_ATTRS512
4036_mm512_maskz_cvtps_epu32 ( __mmask16 __U, __m512 __A)
4037{
4038  return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
4039                  (__v16si)
4040                  _mm512_setzero_si512 (),
4041                  (__mmask16) __U ,
4042                  _MM_FROUND_CUR_DIRECTION);
4043}
4044
4045#define _mm512_cvt_roundpd_epu32(A, R) \
4046  (__m256i)__builtin_ia32_cvtpd2udq512_mask((__v8df)(__m512d)(A), \
4047                                            (__v8si)_mm256_setzero_si256(), \
4048                                            (__mmask8)-1, (int)(R))
4049
4050#define _mm512_mask_cvt_roundpd_epu32(W, U, A, R) \
4051  (__m256i)__builtin_ia32_cvtpd2udq512_mask((__v8df)(__m512d)(A), \
4052                                            (__v8si)(__m256i)(W), \
4053                                            (__mmask8)(U), (int)(R))
4054
4055#define _mm512_maskz_cvt_roundpd_epu32(U, A, R) \
4056  (__m256i)__builtin_ia32_cvtpd2udq512_mask((__v8df)(__m512d)(A), \
4057                                            (__v8si)_mm256_setzero_si256(), \
4058                                            (__mmask8)(U), (int)(R))
4059
4060static __inline__ __m256i __DEFAULT_FN_ATTRS512
4061_mm512_cvtpd_epu32 (__m512d __A)
4062{
4063  return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
4064                  (__v8si)
4065                  _mm256_undefined_si256 (),
4066                  (__mmask8) -1,
4067                  _MM_FROUND_CUR_DIRECTION);
4068}
4069
4070static __inline__ __m256i __DEFAULT_FN_ATTRS512
4071_mm512_mask_cvtpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A)
4072{
4073  return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
4074                  (__v8si) __W,
4075                  (__mmask8) __U,
4076                  _MM_FROUND_CUR_DIRECTION);
4077}
4078
4079static __inline__ __m256i __DEFAULT_FN_ATTRS512
4080_mm512_maskz_cvtpd_epu32 (__mmask8 __U, __m512d __A)
4081{
4082  return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
4083                  (__v8si)
4084                  _mm256_setzero_si256 (),
4085                  (__mmask8) __U,
4086                  _MM_FROUND_CUR_DIRECTION);
4087}
4088
4089static __inline__ double __DEFAULT_FN_ATTRS512
4090_mm512_cvtsd_f64(__m512d __a)
4091{
4092  return __a[0];
4093}
4094
4095static __inline__ float __DEFAULT_FN_ATTRS512
4096_mm512_cvtss_f32(__m512 __a)
4097{
4098  return __a[0];
4099}
4100
4101/* Unpack and Interleave */
4102
4103static __inline __m512d __DEFAULT_FN_ATTRS512
4104_mm512_unpackhi_pd(__m512d __a, __m512d __b)
4105{
4106  return (__m512d)__builtin_shufflevector((__v8df)__a, (__v8df)__b,
4107                                          1, 9, 1+2, 9+2, 1+4, 9+4, 1+6, 9+6);
4108}
4109
4110static __inline__ __m512d __DEFAULT_FN_ATTRS512
4111_mm512_mask_unpackhi_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
4112{
4113  return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
4114                                           (__v8df)_mm512_unpackhi_pd(__A, __B),
4115                                           (__v8df)__W);
4116}
4117
4118static __inline__ __m512d __DEFAULT_FN_ATTRS512
4119_mm512_maskz_unpackhi_pd(__mmask8 __U, __m512d __A, __m512d __B)
4120{
4121  return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
4122                                           (__v8df)_mm512_unpackhi_pd(__A, __B),
4123                                           (__v8df)_mm512_setzero_pd());
4124}
4125
4126static __inline __m512d __DEFAULT_FN_ATTRS512
4127_mm512_unpacklo_pd(__m512d __a, __m512d __b)
4128{
4129  return (__m512d)__builtin_shufflevector((__v8df)__a, (__v8df)__b,
4130                                          0, 8, 0+2, 8+2, 0+4, 8+4, 0+6, 8+6);
4131}
4132
4133static __inline__ __m512d __DEFAULT_FN_ATTRS512
4134_mm512_mask_unpacklo_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
4135{
4136  return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
4137                                           (__v8df)_mm512_unpacklo_pd(__A, __B),
4138                                           (__v8df)__W);
4139}
4140
4141static __inline__ __m512d __DEFAULT_FN_ATTRS512
4142_mm512_maskz_unpacklo_pd (__mmask8 __U, __m512d __A, __m512d __B)
4143{
4144  return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
4145                                           (__v8df)_mm512_unpacklo_pd(__A, __B),
4146                                           (__v8df)_mm512_setzero_pd());
4147}
4148
4149static __inline __m512 __DEFAULT_FN_ATTRS512
4150_mm512_unpackhi_ps(__m512 __a, __m512 __b)
4151{
4152  return (__m512)__builtin_shufflevector((__v16sf)__a, (__v16sf)__b,
4153                                         2,    18,    3,    19,
4154                                         2+4,  18+4,  3+4,  19+4,
4155                                         2+8,  18+8,  3+8,  19+8,
4156                                         2+12, 18+12, 3+12, 19+12);
4157}
4158
4159static __inline__ __m512 __DEFAULT_FN_ATTRS512
4160_mm512_mask_unpackhi_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
4161{
4162  return (__m512)__builtin_ia32_selectps_512((__mmask16) __U,
4163                                          (__v16sf)_mm512_unpackhi_ps(__A, __B),
4164                                          (__v16sf)__W);
4165}
4166
4167static __inline__ __m512 __DEFAULT_FN_ATTRS512
4168_mm512_maskz_unpackhi_ps (__mmask16 __U, __m512 __A, __m512 __B)
4169{
4170  return (__m512)__builtin_ia32_selectps_512((__mmask16) __U,
4171                                          (__v16sf)_mm512_unpackhi_ps(__A, __B),
4172                                          (__v16sf)_mm512_setzero_ps());
4173}
4174
4175static __inline __m512 __DEFAULT_FN_ATTRS512
4176_mm512_unpacklo_ps(__m512 __a, __m512 __b)
4177{
4178  return (__m512)__builtin_shufflevector((__v16sf)__a, (__v16sf)__b,
4179                                         0,    16,    1,    17,
4180                                         0+4,  16+4,  1+4,  17+4,
4181                                         0+8,  16+8,  1+8,  17+8,
4182                                         0+12, 16+12, 1+12, 17+12);
4183}
4184
4185static __inline__ __m512 __DEFAULT_FN_ATTRS512
4186_mm512_mask_unpacklo_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
4187{
4188  return (__m512)__builtin_ia32_selectps_512((__mmask16) __U,
4189                                          (__v16sf)_mm512_unpacklo_ps(__A, __B),
4190                                          (__v16sf)__W);
4191}
4192
4193static __inline__ __m512 __DEFAULT_FN_ATTRS512
4194_mm512_maskz_unpacklo_ps (__mmask16 __U, __m512 __A, __m512 __B)
4195{
4196  return (__m512)__builtin_ia32_selectps_512((__mmask16) __U,
4197                                          (__v16sf)_mm512_unpacklo_ps(__A, __B),
4198                                          (__v16sf)_mm512_setzero_ps());
4199}
4200
4201static __inline__ __m512i __DEFAULT_FN_ATTRS512
4202_mm512_unpackhi_epi32(__m512i __A, __m512i __B)
4203{
4204  return (__m512i)__builtin_shufflevector((__v16si)__A, (__v16si)__B,
4205                                          2,    18,    3,    19,
4206                                          2+4,  18+4,  3+4,  19+4,
4207                                          2+8,  18+8,  3+8,  19+8,
4208                                          2+12, 18+12, 3+12, 19+12);
4209}
4210
4211static __inline__ __m512i __DEFAULT_FN_ATTRS512
4212_mm512_mask_unpackhi_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
4213{
4214  return (__m512i)__builtin_ia32_selectd_512((__mmask16) __U,
4215                                       (__v16si)_mm512_unpackhi_epi32(__A, __B),
4216                                       (__v16si)__W);
4217}
4218
4219static __inline__ __m512i __DEFAULT_FN_ATTRS512
4220_mm512_maskz_unpackhi_epi32(__mmask16 __U, __m512i __A, __m512i __B)
4221{
4222  return (__m512i)__builtin_ia32_selectd_512((__mmask16) __U,
4223                                       (__v16si)_mm512_unpackhi_epi32(__A, __B),
4224                                       (__v16si)_mm512_setzero_si512());
4225}
4226
4227static __inline__ __m512i __DEFAULT_FN_ATTRS512
4228_mm512_unpacklo_epi32(__m512i __A, __m512i __B)
4229{
4230  return (__m512i)__builtin_shufflevector((__v16si)__A, (__v16si)__B,
4231                                          0,    16,    1,    17,
4232                                          0+4,  16+4,  1+4,  17+4,
4233                                          0+8,  16+8,  1+8,  17+8,
4234                                          0+12, 16+12, 1+12, 17+12);
4235}
4236
4237static __inline__ __m512i __DEFAULT_FN_ATTRS512
4238_mm512_mask_unpacklo_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
4239{
4240  return (__m512i)__builtin_ia32_selectd_512((__mmask16) __U,
4241                                       (__v16si)_mm512_unpacklo_epi32(__A, __B),
4242                                       (__v16si)__W);
4243}
4244
4245static __inline__ __m512i __DEFAULT_FN_ATTRS512
4246_mm512_maskz_unpacklo_epi32(__mmask16 __U, __m512i __A, __m512i __B)
4247{
4248  return (__m512i)__builtin_ia32_selectd_512((__mmask16) __U,
4249                                       (__v16si)_mm512_unpacklo_epi32(__A, __B),
4250                                       (__v16si)_mm512_setzero_si512());
4251}
4252
4253static __inline__ __m512i __DEFAULT_FN_ATTRS512
4254_mm512_unpackhi_epi64(__m512i __A, __m512i __B)
4255{
4256  return (__m512i)__builtin_shufflevector((__v8di)__A, (__v8di)__B,
4257                                          1, 9, 1+2, 9+2, 1+4, 9+4, 1+6, 9+6);
4258}
4259
4260static __inline__ __m512i __DEFAULT_FN_ATTRS512
4261_mm512_mask_unpackhi_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
4262{
4263  return (__m512i)__builtin_ia32_selectq_512((__mmask8) __U,
4264                                        (__v8di)_mm512_unpackhi_epi64(__A, __B),
4265                                        (__v8di)__W);
4266}
4267
4268static __inline__ __m512i __DEFAULT_FN_ATTRS512
4269_mm512_maskz_unpackhi_epi64(__mmask8 __U, __m512i __A, __m512i __B)
4270{
4271  return (__m512i)__builtin_ia32_selectq_512((__mmask8) __U,
4272                                        (__v8di)_mm512_unpackhi_epi64(__A, __B),
4273                                        (__v8di)_mm512_setzero_si512());
4274}
4275
4276static __inline__ __m512i __DEFAULT_FN_ATTRS512
4277_mm512_unpacklo_epi64 (__m512i __A, __m512i __B)
4278{
4279  return (__m512i)__builtin_shufflevector((__v8di)__A, (__v8di)__B,
4280                                          0, 8, 0+2, 8+2, 0+4, 8+4, 0+6, 8+6);
4281}
4282
4283static __inline__ __m512i __DEFAULT_FN_ATTRS512
4284_mm512_mask_unpacklo_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
4285{
4286  return (__m512i)__builtin_ia32_selectq_512((__mmask8) __U,
4287                                        (__v8di)_mm512_unpacklo_epi64(__A, __B),
4288                                        (__v8di)__W);
4289}
4290
4291static __inline__ __m512i __DEFAULT_FN_ATTRS512
4292_mm512_maskz_unpacklo_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
4293{
4294  return (__m512i)__builtin_ia32_selectq_512((__mmask8) __U,
4295                                        (__v8di)_mm512_unpacklo_epi64(__A, __B),
4296                                        (__v8di)_mm512_setzero_si512());
4297}
4298
4299
4300/* SIMD load ops */
4301
4302static __inline __m512i __DEFAULT_FN_ATTRS512
4303_mm512_loadu_si512 (void const *__P)
4304{
4305  struct __loadu_si512 {
4306    __m512i_u __v;
4307  } __attribute__((__packed__, __may_alias__));
4308  return ((const struct __loadu_si512*)__P)->__v;
4309}
4310
4311static __inline __m512i __DEFAULT_FN_ATTRS512
4312_mm512_loadu_epi32 (void const *__P)
4313{
4314  struct __loadu_epi32 {
4315    __m512i_u __v;
4316  } __attribute__((__packed__, __may_alias__));
4317  return ((const struct __loadu_epi32*)__P)->__v;
4318}
4319
4320static __inline __m512i __DEFAULT_FN_ATTRS512
4321_mm512_mask_loadu_epi32 (__m512i __W, __mmask16 __U, void const *__P)
4322{
4323  return (__m512i) __builtin_ia32_loaddqusi512_mask ((const int *) __P,
4324                  (__v16si) __W,
4325                  (__mmask16) __U);
4326}
4327
4328
4329static __inline __m512i __DEFAULT_FN_ATTRS512
4330_mm512_maskz_loadu_epi32(__mmask16 __U, void const *__P)
4331{
4332  return (__m512i) __builtin_ia32_loaddqusi512_mask ((const int *)__P,
4333                                                     (__v16si)
4334                                                     _mm512_setzero_si512 (),
4335                                                     (__mmask16) __U);
4336}
4337
4338static __inline __m512i __DEFAULT_FN_ATTRS512
4339_mm512_loadu_epi64 (void const *__P)
4340{
4341  struct __loadu_epi64 {
4342    __m512i_u __v;
4343  } __attribute__((__packed__, __may_alias__));
4344  return ((const struct __loadu_epi64*)__P)->__v;
4345}
4346
4347static __inline __m512i __DEFAULT_FN_ATTRS512
4348_mm512_mask_loadu_epi64 (__m512i __W, __mmask8 __U, void const *__P)
4349{
4350  return (__m512i) __builtin_ia32_loaddqudi512_mask ((const long long *) __P,
4351                  (__v8di) __W,
4352                  (__mmask8) __U);
4353}
4354
4355static __inline __m512i __DEFAULT_FN_ATTRS512
4356_mm512_maskz_loadu_epi64(__mmask8 __U, void const *__P)
4357{
4358  return (__m512i) __builtin_ia32_loaddqudi512_mask ((const long long *)__P,
4359                                                     (__v8di)
4360                                                     _mm512_setzero_si512 (),
4361                                                     (__mmask8) __U);
4362}
4363
4364static __inline __m512 __DEFAULT_FN_ATTRS512
4365_mm512_mask_loadu_ps (__m512 __W, __mmask16 __U, void const *__P)
4366{
4367  return (__m512) __builtin_ia32_loadups512_mask ((const float *) __P,
4368                   (__v16sf) __W,
4369                   (__mmask16) __U);
4370}
4371
4372static __inline __m512 __DEFAULT_FN_ATTRS512
4373_mm512_maskz_loadu_ps(__mmask16 __U, void const *__P)
4374{
4375  return (__m512) __builtin_ia32_loadups512_mask ((const float *)__P,
4376                                                  (__v16sf)
4377                                                  _mm512_setzero_ps (),
4378                                                  (__mmask16) __U);
4379}
4380
4381static __inline __m512d __DEFAULT_FN_ATTRS512
4382_mm512_mask_loadu_pd (__m512d __W, __mmask8 __U, void const *__P)
4383{
4384  return (__m512d) __builtin_ia32_loadupd512_mask ((const double *) __P,
4385                (__v8df) __W,
4386                (__mmask8) __U);
4387}
4388
4389static __inline __m512d __DEFAULT_FN_ATTRS512
4390_mm512_maskz_loadu_pd(__mmask8 __U, void const *__P)
4391{
4392  return (__m512d) __builtin_ia32_loadupd512_mask ((const double *)__P,
4393                                                   (__v8df)
4394                                                   _mm512_setzero_pd (),
4395                                                   (__mmask8) __U);
4396}
4397
4398static __inline __m512d __DEFAULT_FN_ATTRS512
4399_mm512_loadu_pd(void const *__p)
4400{
4401  struct __loadu_pd {
4402    __m512d_u __v;
4403  } __attribute__((__packed__, __may_alias__));
4404  return ((const struct __loadu_pd*)__p)->__v;
4405}
4406
4407static __inline __m512 __DEFAULT_FN_ATTRS512
4408_mm512_loadu_ps(void const *__p)
4409{
4410  struct __loadu_ps {
4411    __m512_u __v;
4412  } __attribute__((__packed__, __may_alias__));
4413  return ((const struct __loadu_ps*)__p)->__v;
4414}
4415
4416static __inline __m512 __DEFAULT_FN_ATTRS512
4417_mm512_load_ps(void const *__p)
4418{
4419  return *(const __m512*)__p;
4420}
4421
4422static __inline __m512 __DEFAULT_FN_ATTRS512
4423_mm512_mask_load_ps (__m512 __W, __mmask16 __U, void const *__P)
4424{
4425  return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *) __P,
4426                   (__v16sf) __W,
4427                   (__mmask16) __U);
4428}
4429
4430static __inline __m512 __DEFAULT_FN_ATTRS512
4431_mm512_maskz_load_ps(__mmask16 __U, void const *__P)
4432{
4433  return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *)__P,
4434                                                  (__v16sf)
4435                                                  _mm512_setzero_ps (),
4436                                                  (__mmask16) __U);
4437}
4438
4439static __inline __m512d __DEFAULT_FN_ATTRS512
4440_mm512_load_pd(void const *__p)
4441{
4442  return *(const __m512d*)__p;
4443}
4444
4445static __inline __m512d __DEFAULT_FN_ATTRS512
4446_mm512_mask_load_pd (__m512d __W, __mmask8 __U, void const *__P)
4447{
4448  return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df *) __P,
4449                          (__v8df) __W,
4450                          (__mmask8) __U);
4451}
4452
4453static __inline __m512d __DEFAULT_FN_ATTRS512
4454_mm512_maskz_load_pd(__mmask8 __U, void const *__P)
4455{
4456  return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df *)__P,
4457                                                   (__v8df)
4458                                                   _mm512_setzero_pd (),
4459                                                   (__mmask8) __U);
4460}
4461
4462static __inline __m512i __DEFAULT_FN_ATTRS512
4463_mm512_load_si512 (void const *__P)
4464{
4465  return *(const __m512i *) __P;
4466}
4467
4468static __inline __m512i __DEFAULT_FN_ATTRS512
4469_mm512_load_epi32 (void const *__P)
4470{
4471  return *(const __m512i *) __P;
4472}
4473
4474static __inline __m512i __DEFAULT_FN_ATTRS512
4475_mm512_load_epi64 (void const *__P)
4476{
4477  return *(const __m512i *) __P;
4478}
4479
4480/* SIMD store ops */
4481
4482static __inline void __DEFAULT_FN_ATTRS512
4483_mm512_storeu_epi64 (void *__P, __m512i __A)
4484{
4485  struct __storeu_epi64 {
4486    __m512i_u __v;
4487  } __attribute__((__packed__, __may_alias__));
4488  ((struct __storeu_epi64*)__P)->__v = __A;
4489}
4490
4491static __inline void __DEFAULT_FN_ATTRS512
4492_mm512_mask_storeu_epi64(void *__P, __mmask8 __U, __m512i __A)
4493{
4494  __builtin_ia32_storedqudi512_mask ((long long *)__P, (__v8di) __A,
4495                                     (__mmask8) __U);
4496}
4497
4498static __inline void __DEFAULT_FN_ATTRS512
4499_mm512_storeu_si512 (void *__P, __m512i __A)
4500{
4501  struct __storeu_si512 {
4502    __m512i_u __v;
4503  } __attribute__((__packed__, __may_alias__));
4504  ((struct __storeu_si512*)__P)->__v = __A;
4505}
4506
4507static __inline void __DEFAULT_FN_ATTRS512
4508_mm512_storeu_epi32 (void *__P, __m512i __A)
4509{
4510  struct __storeu_epi32 {
4511    __m512i_u __v;
4512  } __attribute__((__packed__, __may_alias__));
4513  ((struct __storeu_epi32*)__P)->__v = __A;
4514}
4515
4516static __inline void __DEFAULT_FN_ATTRS512
4517_mm512_mask_storeu_epi32(void *__P, __mmask16 __U, __m512i __A)
4518{
4519  __builtin_ia32_storedqusi512_mask ((int *)__P, (__v16si) __A,
4520                                     (__mmask16) __U);
4521}
4522
4523static __inline void __DEFAULT_FN_ATTRS512
4524_mm512_mask_storeu_pd(void *__P, __mmask8 __U, __m512d __A)
4525{
4526  __builtin_ia32_storeupd512_mask ((double *)__P, (__v8df) __A, (__mmask8) __U);
4527}
4528
4529static __inline void __DEFAULT_FN_ATTRS512
4530_mm512_storeu_pd(void *__P, __m512d __A)
4531{
4532  struct __storeu_pd {
4533    __m512d_u __v;
4534  } __attribute__((__packed__, __may_alias__));
4535  ((struct __storeu_pd*)__P)->__v = __A;
4536}
4537
4538static __inline void __DEFAULT_FN_ATTRS512
4539_mm512_mask_storeu_ps(void *__P, __mmask16 __U, __m512 __A)
4540{
4541  __builtin_ia32_storeups512_mask ((float *)__P, (__v16sf) __A,
4542                                   (__mmask16) __U);
4543}
4544
4545static __inline void __DEFAULT_FN_ATTRS512
4546_mm512_storeu_ps(void *__P, __m512 __A)
4547{
4548  struct __storeu_ps {
4549    __m512_u __v;
4550  } __attribute__((__packed__, __may_alias__));
4551  ((struct __storeu_ps*)__P)->__v = __A;
4552}
4553
4554static __inline void __DEFAULT_FN_ATTRS512
4555_mm512_mask_store_pd(void *__P, __mmask8 __U, __m512d __A)
4556{
4557  __builtin_ia32_storeapd512_mask ((__v8df *)__P, (__v8df) __A, (__mmask8) __U);
4558}
4559
4560static __inline void __DEFAULT_FN_ATTRS512
4561_mm512_store_pd(void *__P, __m512d __A)
4562{
4563  *(__m512d*)__P = __A;
4564}
4565
4566static __inline void __DEFAULT_FN_ATTRS512
4567_mm512_mask_store_ps(void *__P, __mmask16 __U, __m512 __A)
4568{
4569  __builtin_ia32_storeaps512_mask ((__v16sf *)__P, (__v16sf) __A,
4570                                   (__mmask16) __U);
4571}
4572
4573static __inline void __DEFAULT_FN_ATTRS512
4574_mm512_store_ps(void *__P, __m512 __A)
4575{
4576  *(__m512*)__P = __A;
4577}
4578
4579static __inline void __DEFAULT_FN_ATTRS512
4580_mm512_store_si512 (void *__P, __m512i __A)
4581{
4582  *(__m512i *) __P = __A;
4583}
4584
4585static __inline void __DEFAULT_FN_ATTRS512
4586_mm512_store_epi32 (void *__P, __m512i __A)
4587{
4588  *(__m512i *) __P = __A;
4589}
4590
4591static __inline void __DEFAULT_FN_ATTRS512
4592_mm512_store_epi64 (void *__P, __m512i __A)
4593{
4594  *(__m512i *) __P = __A;
4595}
4596
4597/* Mask ops */
4598
4599static __inline __mmask16 __DEFAULT_FN_ATTRS
4600_mm512_knot(__mmask16 __M)
4601{
4602  return __builtin_ia32_knothi(__M);
4603}
4604
4605/* Integer compare */
4606
4607#define _mm512_cmpeq_epi32_mask(A, B) \
4608    _mm512_cmp_epi32_mask((A), (B), _MM_CMPINT_EQ)
4609#define _mm512_mask_cmpeq_epi32_mask(k, A, B) \
4610    _mm512_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_EQ)
4611#define _mm512_cmpge_epi32_mask(A, B) \
4612    _mm512_cmp_epi32_mask((A), (B), _MM_CMPINT_GE)
4613#define _mm512_mask_cmpge_epi32_mask(k, A, B) \
4614    _mm512_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_GE)
4615#define _mm512_cmpgt_epi32_mask(A, B) \
4616    _mm512_cmp_epi32_mask((A), (B), _MM_CMPINT_GT)
4617#define _mm512_mask_cmpgt_epi32_mask(k, A, B) \
4618    _mm512_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_GT)
4619#define _mm512_cmple_epi32_mask(A, B) \
4620    _mm512_cmp_epi32_mask((A), (B), _MM_CMPINT_LE)
4621#define _mm512_mask_cmple_epi32_mask(k, A, B) \
4622    _mm512_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_LE)
4623#define _mm512_cmplt_epi32_mask(A, B) \
4624    _mm512_cmp_epi32_mask((A), (B), _MM_CMPINT_LT)
4625#define _mm512_mask_cmplt_epi32_mask(k, A, B) \
4626    _mm512_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_LT)
4627#define _mm512_cmpneq_epi32_mask(A, B) \
4628    _mm512_cmp_epi32_mask((A), (B), _MM_CMPINT_NE)
4629#define _mm512_mask_cmpneq_epi32_mask(k, A, B) \
4630    _mm512_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_NE)
4631
4632#define _mm512_cmpeq_epu32_mask(A, B) \
4633    _mm512_cmp_epu32_mask((A), (B), _MM_CMPINT_EQ)
4634#define _mm512_mask_cmpeq_epu32_mask(k, A, B) \
4635    _mm512_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_EQ)
4636#define _mm512_cmpge_epu32_mask(A, B) \
4637    _mm512_cmp_epu32_mask((A), (B), _MM_CMPINT_GE)
4638#define _mm512_mask_cmpge_epu32_mask(k, A, B) \
4639    _mm512_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_GE)
4640#define _mm512_cmpgt_epu32_mask(A, B) \
4641    _mm512_cmp_epu32_mask((A), (B), _MM_CMPINT_GT)
4642#define _mm512_mask_cmpgt_epu32_mask(k, A, B) \
4643    _mm512_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_GT)
4644#define _mm512_cmple_epu32_mask(A, B) \
4645    _mm512_cmp_epu32_mask((A), (B), _MM_CMPINT_LE)
4646#define _mm512_mask_cmple_epu32_mask(k, A, B) \
4647    _mm512_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_LE)
4648#define _mm512_cmplt_epu32_mask(A, B) \
4649    _mm512_cmp_epu32_mask((A), (B), _MM_CMPINT_LT)
4650#define _mm512_mask_cmplt_epu32_mask(k, A, B) \
4651    _mm512_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_LT)
4652#define _mm512_cmpneq_epu32_mask(A, B) \
4653    _mm512_cmp_epu32_mask((A), (B), _MM_CMPINT_NE)
4654#define _mm512_mask_cmpneq_epu32_mask(k, A, B) \
4655    _mm512_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_NE)
4656
4657#define _mm512_cmpeq_epi64_mask(A, B) \
4658    _mm512_cmp_epi64_mask((A), (B), _MM_CMPINT_EQ)
4659#define _mm512_mask_cmpeq_epi64_mask(k, A, B) \
4660    _mm512_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_EQ)
4661#define _mm512_cmpge_epi64_mask(A, B) \
4662    _mm512_cmp_epi64_mask((A), (B), _MM_CMPINT_GE)
4663#define _mm512_mask_cmpge_epi64_mask(k, A, B) \
4664    _mm512_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_GE)
4665#define _mm512_cmpgt_epi64_mask(A, B) \
4666    _mm512_cmp_epi64_mask((A), (B), _MM_CMPINT_GT)
4667#define _mm512_mask_cmpgt_epi64_mask(k, A, B) \
4668    _mm512_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_GT)
4669#define _mm512_cmple_epi64_mask(A, B) \
4670    _mm512_cmp_epi64_mask((A), (B), _MM_CMPINT_LE)
4671#define _mm512_mask_cmple_epi64_mask(k, A, B) \
4672    _mm512_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_LE)
4673#define _mm512_cmplt_epi64_mask(A, B) \
4674    _mm512_cmp_epi64_mask((A), (B), _MM_CMPINT_LT)
4675#define _mm512_mask_cmplt_epi64_mask(k, A, B) \
4676    _mm512_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_LT)
4677#define _mm512_cmpneq_epi64_mask(A, B) \
4678    _mm512_cmp_epi64_mask((A), (B), _MM_CMPINT_NE)
4679#define _mm512_mask_cmpneq_epi64_mask(k, A, B) \
4680    _mm512_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_NE)
4681
4682#define _mm512_cmpeq_epu64_mask(A, B) \
4683    _mm512_cmp_epu64_mask((A), (B), _MM_CMPINT_EQ)
4684#define _mm512_mask_cmpeq_epu64_mask(k, A, B) \
4685    _mm512_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_EQ)
4686#define _mm512_cmpge_epu64_mask(A, B) \
4687    _mm512_cmp_epu64_mask((A), (B), _MM_CMPINT_GE)
4688#define _mm512_mask_cmpge_epu64_mask(k, A, B) \
4689    _mm512_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_GE)
4690#define _mm512_cmpgt_epu64_mask(A, B) \
4691    _mm512_cmp_epu64_mask((A), (B), _MM_CMPINT_GT)
4692#define _mm512_mask_cmpgt_epu64_mask(k, A, B) \
4693    _mm512_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_GT)
4694#define _mm512_cmple_epu64_mask(A, B) \
4695    _mm512_cmp_epu64_mask((A), (B), _MM_CMPINT_LE)
4696#define _mm512_mask_cmple_epu64_mask(k, A, B) \
4697    _mm512_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_LE)
4698#define _mm512_cmplt_epu64_mask(A, B) \
4699    _mm512_cmp_epu64_mask((A), (B), _MM_CMPINT_LT)
4700#define _mm512_mask_cmplt_epu64_mask(k, A, B) \
4701    _mm512_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_LT)
4702#define _mm512_cmpneq_epu64_mask(A, B) \
4703    _mm512_cmp_epu64_mask((A), (B), _MM_CMPINT_NE)
4704#define _mm512_mask_cmpneq_epu64_mask(k, A, B) \
4705    _mm512_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_NE)
4706
4707static __inline__ __m512i __DEFAULT_FN_ATTRS512
4708_mm512_cvtepi8_epi32(__m128i __A)
4709{
4710  /* This function always performs a signed extension, but __v16qi is a char
4711     which may be signed or unsigned, so use __v16qs. */
4712  return (__m512i)__builtin_convertvector((__v16qs)__A, __v16si);
4713}
4714
4715static __inline__ __m512i __DEFAULT_FN_ATTRS512
4716_mm512_mask_cvtepi8_epi32(__m512i __W, __mmask16 __U, __m128i __A)
4717{
4718  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
4719                                             (__v16si)_mm512_cvtepi8_epi32(__A),
4720                                             (__v16si)__W);
4721}
4722
4723static __inline__ __m512i __DEFAULT_FN_ATTRS512
4724_mm512_maskz_cvtepi8_epi32(__mmask16 __U, __m128i __A)
4725{
4726  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
4727                                             (__v16si)_mm512_cvtepi8_epi32(__A),
4728                                             (__v16si)_mm512_setzero_si512());
4729}
4730
4731static __inline__ __m512i __DEFAULT_FN_ATTRS512
4732_mm512_cvtepi8_epi64(__m128i __A)
4733{
4734  /* This function always performs a signed extension, but __v16qi is a char
4735     which may be signed or unsigned, so use __v16qs. */
4736  return (__m512i)__builtin_convertvector(__builtin_shufflevector((__v16qs)__A, (__v16qs)__A, 0, 1, 2, 3, 4, 5, 6, 7), __v8di);
4737}
4738
4739static __inline__ __m512i __DEFAULT_FN_ATTRS512
4740_mm512_mask_cvtepi8_epi64(__m512i __W, __mmask8 __U, __m128i __A)
4741{
4742  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4743                                             (__v8di)_mm512_cvtepi8_epi64(__A),
4744                                             (__v8di)__W);
4745}
4746
4747static __inline__ __m512i __DEFAULT_FN_ATTRS512
4748_mm512_maskz_cvtepi8_epi64(__mmask8 __U, __m128i __A)
4749{
4750  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4751                                             (__v8di)_mm512_cvtepi8_epi64(__A),
4752                                             (__v8di)_mm512_setzero_si512 ());
4753}
4754
4755static __inline__ __m512i __DEFAULT_FN_ATTRS512
4756_mm512_cvtepi32_epi64(__m256i __X)
4757{
4758  return (__m512i)__builtin_convertvector((__v8si)__X, __v8di);
4759}
4760
4761static __inline__ __m512i __DEFAULT_FN_ATTRS512
4762_mm512_mask_cvtepi32_epi64(__m512i __W, __mmask8 __U, __m256i __X)
4763{
4764  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4765                                             (__v8di)_mm512_cvtepi32_epi64(__X),
4766                                             (__v8di)__W);
4767}
4768
4769static __inline__ __m512i __DEFAULT_FN_ATTRS512
4770_mm512_maskz_cvtepi32_epi64(__mmask8 __U, __m256i __X)
4771{
4772  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4773                                             (__v8di)_mm512_cvtepi32_epi64(__X),
4774                                             (__v8di)_mm512_setzero_si512());
4775}
4776
4777static __inline__ __m512i __DEFAULT_FN_ATTRS512
4778_mm512_cvtepi16_epi32(__m256i __A)
4779{
4780  return (__m512i)__builtin_convertvector((__v16hi)__A, __v16si);
4781}
4782
4783static __inline__ __m512i __DEFAULT_FN_ATTRS512
4784_mm512_mask_cvtepi16_epi32(__m512i __W, __mmask16 __U, __m256i __A)
4785{
4786  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
4787                                            (__v16si)_mm512_cvtepi16_epi32(__A),
4788                                            (__v16si)__W);
4789}
4790
4791static __inline__ __m512i __DEFAULT_FN_ATTRS512
4792_mm512_maskz_cvtepi16_epi32(__mmask16 __U, __m256i __A)
4793{
4794  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
4795                                            (__v16si)_mm512_cvtepi16_epi32(__A),
4796                                            (__v16si)_mm512_setzero_si512 ());
4797}
4798
4799static __inline__ __m512i __DEFAULT_FN_ATTRS512
4800_mm512_cvtepi16_epi64(__m128i __A)
4801{
4802  return (__m512i)__builtin_convertvector((__v8hi)__A, __v8di);
4803}
4804
4805static __inline__ __m512i __DEFAULT_FN_ATTRS512
4806_mm512_mask_cvtepi16_epi64(__m512i __W, __mmask8 __U, __m128i __A)
4807{
4808  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4809                                             (__v8di)_mm512_cvtepi16_epi64(__A),
4810                                             (__v8di)__W);
4811}
4812
4813static __inline__ __m512i __DEFAULT_FN_ATTRS512
4814_mm512_maskz_cvtepi16_epi64(__mmask8 __U, __m128i __A)
4815{
4816  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4817                                             (__v8di)_mm512_cvtepi16_epi64(__A),
4818                                             (__v8di)_mm512_setzero_si512());
4819}
4820
4821static __inline__ __m512i __DEFAULT_FN_ATTRS512
4822_mm512_cvtepu8_epi32(__m128i __A)
4823{
4824  return (__m512i)__builtin_convertvector((__v16qu)__A, __v16si);
4825}
4826
4827static __inline__ __m512i __DEFAULT_FN_ATTRS512
4828_mm512_mask_cvtepu8_epi32(__m512i __W, __mmask16 __U, __m128i __A)
4829{
4830  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
4831                                             (__v16si)_mm512_cvtepu8_epi32(__A),
4832                                             (__v16si)__W);
4833}
4834
4835static __inline__ __m512i __DEFAULT_FN_ATTRS512
4836_mm512_maskz_cvtepu8_epi32(__mmask16 __U, __m128i __A)
4837{
4838  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
4839                                             (__v16si)_mm512_cvtepu8_epi32(__A),
4840                                             (__v16si)_mm512_setzero_si512());
4841}
4842
4843static __inline__ __m512i __DEFAULT_FN_ATTRS512
4844_mm512_cvtepu8_epi64(__m128i __A)
4845{
4846  return (__m512i)__builtin_convertvector(__builtin_shufflevector((__v16qu)__A, (__v16qu)__A, 0, 1, 2, 3, 4, 5, 6, 7), __v8di);
4847}
4848
4849static __inline__ __m512i __DEFAULT_FN_ATTRS512
4850_mm512_mask_cvtepu8_epi64(__m512i __W, __mmask8 __U, __m128i __A)
4851{
4852  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4853                                             (__v8di)_mm512_cvtepu8_epi64(__A),
4854                                             (__v8di)__W);
4855}
4856
4857static __inline__ __m512i __DEFAULT_FN_ATTRS512
4858_mm512_maskz_cvtepu8_epi64(__mmask8 __U, __m128i __A)
4859{
4860  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4861                                             (__v8di)_mm512_cvtepu8_epi64(__A),
4862                                             (__v8di)_mm512_setzero_si512());
4863}
4864
4865static __inline__ __m512i __DEFAULT_FN_ATTRS512
4866_mm512_cvtepu32_epi64(__m256i __X)
4867{
4868  return (__m512i)__builtin_convertvector((__v8su)__X, __v8di);
4869}
4870
4871static __inline__ __m512i __DEFAULT_FN_ATTRS512
4872_mm512_mask_cvtepu32_epi64(__m512i __W, __mmask8 __U, __m256i __X)
4873{
4874  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4875                                             (__v8di)_mm512_cvtepu32_epi64(__X),
4876                                             (__v8di)__W);
4877}
4878
4879static __inline__ __m512i __DEFAULT_FN_ATTRS512
4880_mm512_maskz_cvtepu32_epi64(__mmask8 __U, __m256i __X)
4881{
4882  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4883                                             (__v8di)_mm512_cvtepu32_epi64(__X),
4884                                             (__v8di)_mm512_setzero_si512());
4885}
4886
4887static __inline__ __m512i __DEFAULT_FN_ATTRS512
4888_mm512_cvtepu16_epi32(__m256i __A)
4889{
4890  return (__m512i)__builtin_convertvector((__v16hu)__A, __v16si);
4891}
4892
4893static __inline__ __m512i __DEFAULT_FN_ATTRS512
4894_mm512_mask_cvtepu16_epi32(__m512i __W, __mmask16 __U, __m256i __A)
4895{
4896  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
4897                                            (__v16si)_mm512_cvtepu16_epi32(__A),
4898                                            (__v16si)__W);
4899}
4900
4901static __inline__ __m512i __DEFAULT_FN_ATTRS512
4902_mm512_maskz_cvtepu16_epi32(__mmask16 __U, __m256i __A)
4903{
4904  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
4905                                            (__v16si)_mm512_cvtepu16_epi32(__A),
4906                                            (__v16si)_mm512_setzero_si512());
4907}
4908
4909static __inline__ __m512i __DEFAULT_FN_ATTRS512
4910_mm512_cvtepu16_epi64(__m128i __A)
4911{
4912  return (__m512i)__builtin_convertvector((__v8hu)__A, __v8di);
4913}
4914
4915static __inline__ __m512i __DEFAULT_FN_ATTRS512
4916_mm512_mask_cvtepu16_epi64(__m512i __W, __mmask8 __U, __m128i __A)
4917{
4918  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4919                                             (__v8di)_mm512_cvtepu16_epi64(__A),
4920                                             (__v8di)__W);
4921}
4922
4923static __inline__ __m512i __DEFAULT_FN_ATTRS512
4924_mm512_maskz_cvtepu16_epi64(__mmask8 __U, __m128i __A)
4925{
4926  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4927                                             (__v8di)_mm512_cvtepu16_epi64(__A),
4928                                             (__v8di)_mm512_setzero_si512());
4929}
4930
4931static __inline__ __m512i __DEFAULT_FN_ATTRS512
4932_mm512_rorv_epi32 (__m512i __A, __m512i __B)
4933{
4934  return (__m512i)__builtin_ia32_prorvd512((__v16si)__A, (__v16si)__B);
4935}
4936
4937static __inline__ __m512i __DEFAULT_FN_ATTRS512
4938_mm512_mask_rorv_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
4939{
4940  return (__m512i)__builtin_ia32_selectd_512(__U,
4941                                           (__v16si)_mm512_rorv_epi32(__A, __B),
4942                                           (__v16si)__W);
4943}
4944
4945static __inline__ __m512i __DEFAULT_FN_ATTRS512
4946_mm512_maskz_rorv_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
4947{
4948  return (__m512i)__builtin_ia32_selectd_512(__U,
4949                                           (__v16si)_mm512_rorv_epi32(__A, __B),
4950                                           (__v16si)_mm512_setzero_si512());
4951}
4952
4953static __inline__ __m512i __DEFAULT_FN_ATTRS512
4954_mm512_rorv_epi64 (__m512i __A, __m512i __B)
4955{
4956  return (__m512i)__builtin_ia32_prorvq512((__v8di)__A, (__v8di)__B);
4957}
4958
4959static __inline__ __m512i __DEFAULT_FN_ATTRS512
4960_mm512_mask_rorv_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
4961{
4962  return (__m512i)__builtin_ia32_selectq_512(__U,
4963                                            (__v8di)_mm512_rorv_epi64(__A, __B),
4964                                            (__v8di)__W);
4965}
4966
4967static __inline__ __m512i __DEFAULT_FN_ATTRS512
4968_mm512_maskz_rorv_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
4969{
4970  return (__m512i)__builtin_ia32_selectq_512(__U,
4971                                            (__v8di)_mm512_rorv_epi64(__A, __B),
4972                                            (__v8di)_mm512_setzero_si512());
4973}
4974
4975
4976
4977#define _mm512_cmp_epi32_mask(a, b, p) \
4978  (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)(__m512i)(a), \
4979                                         (__v16si)(__m512i)(b), (int)(p), \
4980                                         (__mmask16)-1)
4981
4982#define _mm512_cmp_epu32_mask(a, b, p) \
4983  (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)(__m512i)(a), \
4984                                          (__v16si)(__m512i)(b), (int)(p), \
4985                                          (__mmask16)-1)
4986
4987#define _mm512_cmp_epi64_mask(a, b, p) \
4988  (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)(__m512i)(a), \
4989                                        (__v8di)(__m512i)(b), (int)(p), \
4990                                        (__mmask8)-1)
4991
4992#define _mm512_cmp_epu64_mask(a, b, p) \
4993  (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)(__m512i)(a), \
4994                                         (__v8di)(__m512i)(b), (int)(p), \
4995                                         (__mmask8)-1)
4996
4997#define _mm512_mask_cmp_epi32_mask(m, a, b, p) \
4998  (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)(__m512i)(a), \
4999                                         (__v16si)(__m512i)(b), (int)(p), \
5000                                         (__mmask16)(m))
5001
5002#define _mm512_mask_cmp_epu32_mask(m, a, b, p) \
5003  (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)(__m512i)(a), \
5004                                          (__v16si)(__m512i)(b), (int)(p), \
5005                                          (__mmask16)(m))
5006
5007#define _mm512_mask_cmp_epi64_mask(m, a, b, p) \
5008  (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)(__m512i)(a), \
5009                                        (__v8di)(__m512i)(b), (int)(p), \
5010                                        (__mmask8)(m))
5011
5012#define _mm512_mask_cmp_epu64_mask(m, a, b, p) \
5013  (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)(__m512i)(a), \
5014                                         (__v8di)(__m512i)(b), (int)(p), \
5015                                         (__mmask8)(m))
5016
5017#define _mm512_rol_epi32(a, b) \
5018  (__m512i)__builtin_ia32_prold512((__v16si)(__m512i)(a), (int)(b))
5019
5020#define _mm512_mask_rol_epi32(W, U, a, b) \
5021  (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
5022                                      (__v16si)_mm512_rol_epi32((a), (b)), \
5023                                      (__v16si)(__m512i)(W))
5024
5025#define _mm512_maskz_rol_epi32(U, a, b) \
5026  (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
5027                                      (__v16si)_mm512_rol_epi32((a), (b)), \
5028                                      (__v16si)_mm512_setzero_si512())
5029
5030#define _mm512_rol_epi64(a, b) \
5031  (__m512i)__builtin_ia32_prolq512((__v8di)(__m512i)(a), (int)(b))
5032
5033#define _mm512_mask_rol_epi64(W, U, a, b) \
5034  (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
5035                                      (__v8di)_mm512_rol_epi64((a), (b)), \
5036                                      (__v8di)(__m512i)(W))
5037
5038#define _mm512_maskz_rol_epi64(U, a, b) \
5039  (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
5040                                      (__v8di)_mm512_rol_epi64((a), (b)), \
5041                                      (__v8di)_mm512_setzero_si512())
5042
5043static __inline__ __m512i __DEFAULT_FN_ATTRS512
5044_mm512_rolv_epi32 (__m512i __A, __m512i __B)
5045{
5046  return (__m512i)__builtin_ia32_prolvd512((__v16si)__A, (__v16si)__B);
5047}
5048
5049static __inline__ __m512i __DEFAULT_FN_ATTRS512
5050_mm512_mask_rolv_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
5051{
5052  return (__m512i)__builtin_ia32_selectd_512(__U,
5053                                           (__v16si)_mm512_rolv_epi32(__A, __B),
5054                                           (__v16si)__W);
5055}
5056
5057static __inline__ __m512i __DEFAULT_FN_ATTRS512
5058_mm512_maskz_rolv_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
5059{
5060  return (__m512i)__builtin_ia32_selectd_512(__U,
5061                                           (__v16si)_mm512_rolv_epi32(__A, __B),
5062                                           (__v16si)_mm512_setzero_si512());
5063}
5064
5065static __inline__ __m512i __DEFAULT_FN_ATTRS512
5066_mm512_rolv_epi64 (__m512i __A, __m512i __B)
5067{
5068  return (__m512i)__builtin_ia32_prolvq512((__v8di)__A, (__v8di)__B);
5069}
5070
5071static __inline__ __m512i __DEFAULT_FN_ATTRS512
5072_mm512_mask_rolv_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
5073{
5074  return (__m512i)__builtin_ia32_selectq_512(__U,
5075                                            (__v8di)_mm512_rolv_epi64(__A, __B),
5076                                            (__v8di)__W);
5077}
5078
5079static __inline__ __m512i __DEFAULT_FN_ATTRS512
5080_mm512_maskz_rolv_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
5081{
5082  return (__m512i)__builtin_ia32_selectq_512(__U,
5083                                            (__v8di)_mm512_rolv_epi64(__A, __B),
5084                                            (__v8di)_mm512_setzero_si512());
5085}
5086
5087#define _mm512_ror_epi32(A, B) \
5088  (__m512i)__builtin_ia32_prord512((__v16si)(__m512i)(A), (int)(B))
5089
5090#define _mm512_mask_ror_epi32(W, U, A, B) \
5091  (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
5092                                      (__v16si)_mm512_ror_epi32((A), (B)), \
5093                                      (__v16si)(__m512i)(W))
5094
5095#define _mm512_maskz_ror_epi32(U, A, B) \
5096  (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
5097                                      (__v16si)_mm512_ror_epi32((A), (B)), \
5098                                      (__v16si)_mm512_setzero_si512())
5099
5100#define _mm512_ror_epi64(A, B) \
5101  (__m512i)__builtin_ia32_prorq512((__v8di)(__m512i)(A), (int)(B))
5102
5103#define _mm512_mask_ror_epi64(W, U, A, B) \
5104  (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
5105                                      (__v8di)_mm512_ror_epi64((A), (B)), \
5106                                      (__v8di)(__m512i)(W))
5107
5108#define _mm512_maskz_ror_epi64(U, A, B) \
5109  (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
5110                                      (__v8di)_mm512_ror_epi64((A), (B)), \
5111                                      (__v8di)_mm512_setzero_si512())
5112
5113static __inline__ __m512i __DEFAULT_FN_ATTRS512
5114_mm512_slli_epi32(__m512i __A, int __B)
5115{
5116  return (__m512i)__builtin_ia32_pslldi512((__v16si)__A, __B);
5117}
5118
5119static __inline__ __m512i __DEFAULT_FN_ATTRS512
5120_mm512_mask_slli_epi32(__m512i __W, __mmask16 __U, __m512i __A, int __B)
5121{
5122  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5123                                         (__v16si)_mm512_slli_epi32(__A, __B),
5124                                         (__v16si)__W);
5125}
5126
5127static __inline__ __m512i __DEFAULT_FN_ATTRS512
5128_mm512_maskz_slli_epi32(__mmask16 __U, __m512i __A, int __B) {
5129  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5130                                         (__v16si)_mm512_slli_epi32(__A, __B),
5131                                         (__v16si)_mm512_setzero_si512());
5132}
5133
5134static __inline__ __m512i __DEFAULT_FN_ATTRS512
5135_mm512_slli_epi64(__m512i __A, int __B)
5136{
5137  return (__m512i)__builtin_ia32_psllqi512((__v8di)__A, __B);
5138}
5139
5140static __inline__ __m512i __DEFAULT_FN_ATTRS512
5141_mm512_mask_slli_epi64(__m512i __W, __mmask8 __U, __m512i __A, int __B)
5142{
5143  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5144                                          (__v8di)_mm512_slli_epi64(__A, __B),
5145                                          (__v8di)__W);
5146}
5147
5148static __inline__ __m512i __DEFAULT_FN_ATTRS512
5149_mm512_maskz_slli_epi64(__mmask8 __U, __m512i __A, int __B)
5150{
5151  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5152                                          (__v8di)_mm512_slli_epi64(__A, __B),
5153                                          (__v8di)_mm512_setzero_si512());
5154}
5155
5156static __inline__ __m512i __DEFAULT_FN_ATTRS512
5157_mm512_srli_epi32(__m512i __A, int __B)
5158{
5159  return (__m512i)__builtin_ia32_psrldi512((__v16si)__A, __B);
5160}
5161
5162static __inline__ __m512i __DEFAULT_FN_ATTRS512
5163_mm512_mask_srli_epi32(__m512i __W, __mmask16 __U, __m512i __A, int __B)
5164{
5165  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5166                                         (__v16si)_mm512_srli_epi32(__A, __B),
5167                                         (__v16si)__W);
5168}
5169
5170static __inline__ __m512i __DEFAULT_FN_ATTRS512
5171_mm512_maskz_srli_epi32(__mmask16 __U, __m512i __A, int __B) {
5172  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5173                                         (__v16si)_mm512_srli_epi32(__A, __B),
5174                                         (__v16si)_mm512_setzero_si512());
5175}
5176
5177static __inline__ __m512i __DEFAULT_FN_ATTRS512
5178_mm512_srli_epi64(__m512i __A, int __B)
5179{
5180  return (__m512i)__builtin_ia32_psrlqi512((__v8di)__A, __B);
5181}
5182
5183static __inline__ __m512i __DEFAULT_FN_ATTRS512
5184_mm512_mask_srli_epi64(__m512i __W, __mmask8 __U, __m512i __A, int __B)
5185{
5186  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5187                                          (__v8di)_mm512_srli_epi64(__A, __B),
5188                                          (__v8di)__W);
5189}
5190
5191static __inline__ __m512i __DEFAULT_FN_ATTRS512
5192_mm512_maskz_srli_epi64(__mmask8 __U, __m512i __A, int __B)
5193{
5194  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5195                                          (__v8di)_mm512_srli_epi64(__A, __B),
5196                                          (__v8di)_mm512_setzero_si512());
5197}
5198
5199static __inline__ __m512i __DEFAULT_FN_ATTRS512
5200_mm512_mask_load_epi32 (__m512i __W, __mmask16 __U, void const *__P)
5201{
5202  return (__m512i) __builtin_ia32_movdqa32load512_mask ((const __v16si *) __P,
5203              (__v16si) __W,
5204              (__mmask16) __U);
5205}
5206
5207static __inline__ __m512i __DEFAULT_FN_ATTRS512
5208_mm512_maskz_load_epi32 (__mmask16 __U, void const *__P)
5209{
5210  return (__m512i) __builtin_ia32_movdqa32load512_mask ((const __v16si *) __P,
5211              (__v16si)
5212              _mm512_setzero_si512 (),
5213              (__mmask16) __U);
5214}
5215
5216static __inline__ void __DEFAULT_FN_ATTRS512
5217_mm512_mask_store_epi32 (void *__P, __mmask16 __U, __m512i __A)
5218{
5219  __builtin_ia32_movdqa32store512_mask ((__v16si *) __P, (__v16si) __A,
5220          (__mmask16) __U);
5221}
5222
5223static __inline__ __m512i __DEFAULT_FN_ATTRS512
5224_mm512_mask_mov_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
5225{
5226  return (__m512i) __builtin_ia32_selectd_512 ((__mmask16) __U,
5227                 (__v16si) __A,
5228                 (__v16si) __W);
5229}
5230
5231static __inline__ __m512i __DEFAULT_FN_ATTRS512
5232_mm512_maskz_mov_epi32 (__mmask16 __U, __m512i __A)
5233{
5234  return (__m512i) __builtin_ia32_selectd_512 ((__mmask16) __U,
5235                 (__v16si) __A,
5236                 (__v16si) _mm512_setzero_si512 ());
5237}
5238
5239static __inline__ __m512i __DEFAULT_FN_ATTRS512
5240_mm512_mask_mov_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
5241{
5242  return (__m512i) __builtin_ia32_selectq_512 ((__mmask8) __U,
5243                 (__v8di) __A,
5244                 (__v8di) __W);
5245}
5246
5247static __inline__ __m512i __DEFAULT_FN_ATTRS512
5248_mm512_maskz_mov_epi64 (__mmask8 __U, __m512i __A)
5249{
5250  return (__m512i) __builtin_ia32_selectq_512 ((__mmask8) __U,
5251                 (__v8di) __A,
5252                 (__v8di) _mm512_setzero_si512 ());
5253}
5254
5255static __inline__ __m512i __DEFAULT_FN_ATTRS512
5256_mm512_mask_load_epi64 (__m512i __W, __mmask8 __U, void const *__P)
5257{
5258  return (__m512i) __builtin_ia32_movdqa64load512_mask ((const __v8di *) __P,
5259              (__v8di) __W,
5260              (__mmask8) __U);
5261}
5262
5263static __inline__ __m512i __DEFAULT_FN_ATTRS512
5264_mm512_maskz_load_epi64 (__mmask8 __U, void const *__P)
5265{
5266  return (__m512i) __builtin_ia32_movdqa64load512_mask ((const __v8di *) __P,
5267              (__v8di)
5268              _mm512_setzero_si512 (),
5269              (__mmask8) __U);
5270}
5271
5272static __inline__ void __DEFAULT_FN_ATTRS512
5273_mm512_mask_store_epi64 (void *__P, __mmask8 __U, __m512i __A)
5274{
5275  __builtin_ia32_movdqa64store512_mask ((__v8di *) __P, (__v8di) __A,
5276          (__mmask8) __U);
5277}
5278
5279static __inline__ __m512d __DEFAULT_FN_ATTRS512
5280_mm512_movedup_pd (__m512d __A)
5281{
5282  return (__m512d)__builtin_shufflevector((__v8df)__A, (__v8df)__A,
5283                                          0, 0, 2, 2, 4, 4, 6, 6);
5284}
5285
5286static __inline__ __m512d __DEFAULT_FN_ATTRS512
5287_mm512_mask_movedup_pd (__m512d __W, __mmask8 __U, __m512d __A)
5288{
5289  return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
5290                                              (__v8df)_mm512_movedup_pd(__A),
5291                                              (__v8df)__W);
5292}
5293
5294static __inline__ __m512d __DEFAULT_FN_ATTRS512
5295_mm512_maskz_movedup_pd (__mmask8 __U, __m512d __A)
5296{
5297  return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
5298                                              (__v8df)_mm512_movedup_pd(__A),
5299                                              (__v8df)_mm512_setzero_pd());
5300}
5301
5302#define _mm512_fixupimm_round_pd(A, B, C, imm, R) \
5303  (__m512d)__builtin_ia32_fixupimmpd512_mask((__v8df)(__m512d)(A), \
5304                                             (__v8df)(__m512d)(B), \
5305                                             (__v8di)(__m512i)(C), (int)(imm), \
5306                                             (__mmask8)-1, (int)(R))
5307
5308#define _mm512_mask_fixupimm_round_pd(A, U, B, C, imm, R) \
5309  (__m512d)__builtin_ia32_fixupimmpd512_mask((__v8df)(__m512d)(A), \
5310                                             (__v8df)(__m512d)(B), \
5311                                             (__v8di)(__m512i)(C), (int)(imm), \
5312                                             (__mmask8)(U), (int)(R))
5313
5314#define _mm512_fixupimm_pd(A, B, C, imm) \
5315  (__m512d)__builtin_ia32_fixupimmpd512_mask((__v8df)(__m512d)(A), \
5316                                             (__v8df)(__m512d)(B), \
5317                                             (__v8di)(__m512i)(C), (int)(imm), \
5318                                             (__mmask8)-1, \
5319                                             _MM_FROUND_CUR_DIRECTION)
5320
5321#define _mm512_mask_fixupimm_pd(A, U, B, C, imm) \
5322  (__m512d)__builtin_ia32_fixupimmpd512_mask((__v8df)(__m512d)(A), \
5323                                             (__v8df)(__m512d)(B), \
5324                                             (__v8di)(__m512i)(C), (int)(imm), \
5325                                             (__mmask8)(U), \
5326                                             _MM_FROUND_CUR_DIRECTION)
5327
5328#define _mm512_maskz_fixupimm_round_pd(U, A, B, C, imm, R) \
5329  (__m512d)__builtin_ia32_fixupimmpd512_maskz((__v8df)(__m512d)(A), \
5330                                              (__v8df)(__m512d)(B), \
5331                                              (__v8di)(__m512i)(C), \
5332                                              (int)(imm), (__mmask8)(U), \
5333                                              (int)(R))
5334
5335#define _mm512_maskz_fixupimm_pd(U, A, B, C, imm) \
5336  (__m512d)__builtin_ia32_fixupimmpd512_maskz((__v8df)(__m512d)(A), \
5337                                              (__v8df)(__m512d)(B), \
5338                                              (__v8di)(__m512i)(C), \
5339                                              (int)(imm), (__mmask8)(U), \
5340                                              _MM_FROUND_CUR_DIRECTION)
5341
5342#define _mm512_fixupimm_round_ps(A, B, C, imm, R) \
5343  (__m512)__builtin_ia32_fixupimmps512_mask((__v16sf)(__m512)(A), \
5344                                            (__v16sf)(__m512)(B), \
5345                                            (__v16si)(__m512i)(C), (int)(imm), \
5346                                            (__mmask16)-1, (int)(R))
5347
5348#define _mm512_mask_fixupimm_round_ps(A, U, B, C, imm, R) \
5349  (__m512)__builtin_ia32_fixupimmps512_mask((__v16sf)(__m512)(A), \
5350                                            (__v16sf)(__m512)(B), \
5351                                            (__v16si)(__m512i)(C), (int)(imm), \
5352                                            (__mmask16)(U), (int)(R))
5353
5354#define _mm512_fixupimm_ps(A, B, C, imm) \
5355  (__m512)__builtin_ia32_fixupimmps512_mask((__v16sf)(__m512)(A), \
5356                                            (__v16sf)(__m512)(B), \
5357                                            (__v16si)(__m512i)(C), (int)(imm), \
5358                                            (__mmask16)-1, \
5359                                            _MM_FROUND_CUR_DIRECTION)
5360
5361#define _mm512_mask_fixupimm_ps(A, U, B, C, imm) \
5362  (__m512)__builtin_ia32_fixupimmps512_mask((__v16sf)(__m512)(A), \
5363                                            (__v16sf)(__m512)(B), \
5364                                            (__v16si)(__m512i)(C), (int)(imm), \
5365                                            (__mmask16)(U), \
5366                                            _MM_FROUND_CUR_DIRECTION)
5367
5368#define _mm512_maskz_fixupimm_round_ps(U, A, B, C, imm, R) \
5369  (__m512)__builtin_ia32_fixupimmps512_maskz((__v16sf)(__m512)(A), \
5370                                             (__v16sf)(__m512)(B), \
5371                                             (__v16si)(__m512i)(C), \
5372                                             (int)(imm), (__mmask16)(U), \
5373                                             (int)(R))
5374
5375#define _mm512_maskz_fixupimm_ps(U, A, B, C, imm) \
5376  (__m512)__builtin_ia32_fixupimmps512_maskz((__v16sf)(__m512)(A), \
5377                                             (__v16sf)(__m512)(B), \
5378                                             (__v16si)(__m512i)(C), \
5379                                             (int)(imm), (__mmask16)(U), \
5380                                             _MM_FROUND_CUR_DIRECTION)
5381
5382#define _mm_fixupimm_round_sd(A, B, C, imm, R) \
5383  (__m128d)__builtin_ia32_fixupimmsd_mask((__v2df)(__m128d)(A), \
5384                                          (__v2df)(__m128d)(B), \
5385                                          (__v2di)(__m128i)(C), (int)(imm), \
5386                                          (__mmask8)-1, (int)(R))
5387
5388#define _mm_mask_fixupimm_round_sd(A, U, B, C, imm, R) \
5389  (__m128d)__builtin_ia32_fixupimmsd_mask((__v2df)(__m128d)(A), \
5390                                          (__v2df)(__m128d)(B), \
5391                                          (__v2di)(__m128i)(C), (int)(imm), \
5392                                          (__mmask8)(U), (int)(R))
5393
5394#define _mm_fixupimm_sd(A, B, C, imm) \
5395  (__m128d)__builtin_ia32_fixupimmsd_mask((__v2df)(__m128d)(A), \
5396                                          (__v2df)(__m128d)(B), \
5397                                          (__v2di)(__m128i)(C), (int)(imm), \
5398                                          (__mmask8)-1, \
5399                                          _MM_FROUND_CUR_DIRECTION)
5400
5401#define _mm_mask_fixupimm_sd(A, U, B, C, imm) \
5402  (__m128d)__builtin_ia32_fixupimmsd_mask((__v2df)(__m128d)(A), \
5403                                          (__v2df)(__m128d)(B), \
5404                                          (__v2di)(__m128i)(C), (int)(imm), \
5405                                          (__mmask8)(U), \
5406                                          _MM_FROUND_CUR_DIRECTION)
5407
5408#define _mm_maskz_fixupimm_round_sd(U, A, B, C, imm, R) \
5409  (__m128d)__builtin_ia32_fixupimmsd_maskz((__v2df)(__m128d)(A), \
5410                                           (__v2df)(__m128d)(B), \
5411                                           (__v2di)(__m128i)(C), (int)(imm), \
5412                                           (__mmask8)(U), (int)(R))
5413
5414#define _mm_maskz_fixupimm_sd(U, A, B, C, imm) \
5415  (__m128d)__builtin_ia32_fixupimmsd_maskz((__v2df)(__m128d)(A), \
5416                                           (__v2df)(__m128d)(B), \
5417                                           (__v2di)(__m128i)(C), (int)(imm), \
5418                                           (__mmask8)(U), \
5419                                           _MM_FROUND_CUR_DIRECTION)
5420
5421#define _mm_fixupimm_round_ss(A, B, C, imm, R) \
5422  (__m128)__builtin_ia32_fixupimmss_mask((__v4sf)(__m128)(A), \
5423                                         (__v4sf)(__m128)(B), \
5424                                         (__v4si)(__m128i)(C), (int)(imm), \
5425                                         (__mmask8)-1, (int)(R))
5426
5427#define _mm_mask_fixupimm_round_ss(A, U, B, C, imm, R) \
5428  (__m128)__builtin_ia32_fixupimmss_mask((__v4sf)(__m128)(A), \
5429                                         (__v4sf)(__m128)(B), \
5430                                         (__v4si)(__m128i)(C), (int)(imm), \
5431                                         (__mmask8)(U), (int)(R))
5432
5433#define _mm_fixupimm_ss(A, B, C, imm) \
5434  (__m128)__builtin_ia32_fixupimmss_mask((__v4sf)(__m128)(A), \
5435                                         (__v4sf)(__m128)(B), \
5436                                         (__v4si)(__m128i)(C), (int)(imm), \
5437                                         (__mmask8)-1, \
5438                                         _MM_FROUND_CUR_DIRECTION)
5439
5440#define _mm_mask_fixupimm_ss(A, U, B, C, imm) \
5441  (__m128)__builtin_ia32_fixupimmss_mask((__v4sf)(__m128)(A), \
5442                                         (__v4sf)(__m128)(B), \
5443                                         (__v4si)(__m128i)(C), (int)(imm), \
5444                                         (__mmask8)(U), \
5445                                         _MM_FROUND_CUR_DIRECTION)
5446
5447#define _mm_maskz_fixupimm_round_ss(U, A, B, C, imm, R) \
5448  (__m128)__builtin_ia32_fixupimmss_maskz((__v4sf)(__m128)(A), \
5449                                          (__v4sf)(__m128)(B), \
5450                                          (__v4si)(__m128i)(C), (int)(imm), \
5451                                          (__mmask8)(U), (int)(R))
5452
5453#define _mm_maskz_fixupimm_ss(U, A, B, C, imm) \
5454  (__m128)__builtin_ia32_fixupimmss_maskz((__v4sf)(__m128)(A), \
5455                                          (__v4sf)(__m128)(B), \
5456                                          (__v4si)(__m128i)(C), (int)(imm), \
5457                                          (__mmask8)(U), \
5458                                          _MM_FROUND_CUR_DIRECTION)
5459
5460#define _mm_getexp_round_sd(A, B, R) \
5461  (__m128d)__builtin_ia32_getexpsd128_round_mask((__v2df)(__m128d)(A), \
5462                                                 (__v2df)(__m128d)(B), \
5463                                                 (__v2df)_mm_setzero_pd(), \
5464                                                 (__mmask8)-1, (int)(R))
5465
5466
5467static __inline__ __m128d __DEFAULT_FN_ATTRS128
5468_mm_getexp_sd (__m128d __A, __m128d __B)
5469{
5470  return (__m128d) __builtin_ia32_getexpsd128_round_mask ((__v2df) __A,
5471                 (__v2df) __B, (__v2df) _mm_setzero_pd(), (__mmask8) -1, _MM_FROUND_CUR_DIRECTION);
5472}
5473
5474static __inline__ __m128d __DEFAULT_FN_ATTRS128
5475_mm_mask_getexp_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
5476{
5477 return (__m128d) __builtin_ia32_getexpsd128_round_mask ( (__v2df) __A,
5478          (__v2df) __B,
5479          (__v2df) __W,
5480          (__mmask8) __U,
5481          _MM_FROUND_CUR_DIRECTION);
5482}
5483
5484#define _mm_mask_getexp_round_sd(W, U, A, B, R) \
5485  (__m128d)__builtin_ia32_getexpsd128_round_mask((__v2df)(__m128d)(A), \
5486                                                 (__v2df)(__m128d)(B), \
5487                                                 (__v2df)(__m128d)(W), \
5488                                                 (__mmask8)(U), (int)(R))
5489
5490static __inline__ __m128d __DEFAULT_FN_ATTRS128
5491_mm_maskz_getexp_sd (__mmask8 __U, __m128d __A, __m128d __B)
5492{
5493 return (__m128d) __builtin_ia32_getexpsd128_round_mask ( (__v2df) __A,
5494          (__v2df) __B,
5495          (__v2df) _mm_setzero_pd (),
5496          (__mmask8) __U,
5497          _MM_FROUND_CUR_DIRECTION);
5498}
5499
5500#define _mm_maskz_getexp_round_sd(U, A, B, R) \
5501  (__m128d)__builtin_ia32_getexpsd128_round_mask((__v2df)(__m128d)(A), \
5502                                                 (__v2df)(__m128d)(B), \
5503                                                 (__v2df)_mm_setzero_pd(), \
5504                                                 (__mmask8)(U), (int)(R))
5505
5506#define _mm_getexp_round_ss(A, B, R) \
5507  (__m128)__builtin_ia32_getexpss128_round_mask((__v4sf)(__m128)(A), \
5508                                                (__v4sf)(__m128)(B), \
5509                                                (__v4sf)_mm_setzero_ps(), \
5510                                                (__mmask8)-1, (int)(R))
5511
5512static __inline__ __m128 __DEFAULT_FN_ATTRS128
5513_mm_getexp_ss (__m128 __A, __m128 __B)
5514{
5515  return (__m128) __builtin_ia32_getexpss128_round_mask ((__v4sf) __A,
5516                (__v4sf) __B, (__v4sf)  _mm_setzero_ps(), (__mmask8) -1, _MM_FROUND_CUR_DIRECTION);
5517}
5518
5519static __inline__ __m128 __DEFAULT_FN_ATTRS128
5520_mm_mask_getexp_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
5521{
5522 return (__m128) __builtin_ia32_getexpss128_round_mask ((__v4sf) __A,
5523          (__v4sf) __B,
5524          (__v4sf) __W,
5525          (__mmask8) __U,
5526          _MM_FROUND_CUR_DIRECTION);
5527}
5528
5529#define _mm_mask_getexp_round_ss(W, U, A, B, R) \
5530  (__m128)__builtin_ia32_getexpss128_round_mask((__v4sf)(__m128)(A), \
5531                                                (__v4sf)(__m128)(B), \
5532                                                (__v4sf)(__m128)(W), \
5533                                                (__mmask8)(U), (int)(R))
5534
5535static __inline__ __m128 __DEFAULT_FN_ATTRS128
5536_mm_maskz_getexp_ss (__mmask8 __U, __m128 __A, __m128 __B)
5537{
5538 return (__m128) __builtin_ia32_getexpss128_round_mask ((__v4sf) __A,
5539          (__v4sf) __B,
5540          (__v4sf) _mm_setzero_ps (),
5541          (__mmask8) __U,
5542          _MM_FROUND_CUR_DIRECTION);
5543}
5544
5545#define _mm_maskz_getexp_round_ss(U, A, B, R) \
5546  (__m128)__builtin_ia32_getexpss128_round_mask((__v4sf)(__m128)(A), \
5547                                                (__v4sf)(__m128)(B), \
5548                                                (__v4sf)_mm_setzero_ps(), \
5549                                                (__mmask8)(U), (int)(R))
5550
5551#define _mm_getmant_round_sd(A, B, C, D, R) \
5552  (__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \
5553                                               (__v2df)(__m128d)(B), \
5554                                               (int)(((D)<<2) | (C)), \
5555                                               (__v2df)_mm_setzero_pd(), \
5556                                               (__mmask8)-1, (int)(R))
5557
5558#define _mm_getmant_sd(A, B, C, D)  \
5559  (__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \
5560                                               (__v2df)(__m128d)(B), \
5561                                               (int)(((D)<<2) | (C)), \
5562                                               (__v2df)_mm_setzero_pd(), \
5563                                               (__mmask8)-1, \
5564                                               _MM_FROUND_CUR_DIRECTION)
5565
5566#define _mm_mask_getmant_sd(W, U, A, B, C, D) \
5567  (__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \
5568                                               (__v2df)(__m128d)(B), \
5569                                               (int)(((D)<<2) | (C)), \
5570                                               (__v2df)(__m128d)(W), \
5571                                               (__mmask8)(U), \
5572                                               _MM_FROUND_CUR_DIRECTION)
5573
5574#define _mm_mask_getmant_round_sd(W, U, A, B, C, D, R) \
5575  (__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \
5576                                               (__v2df)(__m128d)(B), \
5577                                               (int)(((D)<<2) | (C)), \
5578                                               (__v2df)(__m128d)(W), \
5579                                               (__mmask8)(U), (int)(R))
5580
5581#define _mm_maskz_getmant_sd(U, A, B, C, D) \
5582  (__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \
5583                                               (__v2df)(__m128d)(B), \
5584                                               (int)(((D)<<2) | (C)), \
5585                                               (__v2df)_mm_setzero_pd(), \
5586                                               (__mmask8)(U), \
5587                                               _MM_FROUND_CUR_DIRECTION)
5588
5589#define _mm_maskz_getmant_round_sd(U, A, B, C, D, R) \
5590  (__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \
5591                                               (__v2df)(__m128d)(B), \
5592                                               (int)(((D)<<2) | (C)), \
5593                                               (__v2df)_mm_setzero_pd(), \
5594                                               (__mmask8)(U), (int)(R))
5595
5596#define _mm_getmant_round_ss(A, B, C, D, R) \
5597  (__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \
5598                                              (__v4sf)(__m128)(B), \
5599                                              (int)(((D)<<2) | (C)), \
5600                                              (__v4sf)_mm_setzero_ps(), \
5601                                              (__mmask8)-1, (int)(R))
5602
5603#define _mm_getmant_ss(A, B, C, D) \
5604  (__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \
5605                                              (__v4sf)(__m128)(B), \
5606                                              (int)(((D)<<2) | (C)), \
5607                                              (__v4sf)_mm_setzero_ps(), \
5608                                              (__mmask8)-1, \
5609                                              _MM_FROUND_CUR_DIRECTION)
5610
5611#define _mm_mask_getmant_ss(W, U, A, B, C, D) \
5612  (__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \
5613                                              (__v4sf)(__m128)(B), \
5614                                              (int)(((D)<<2) | (C)), \
5615                                              (__v4sf)(__m128)(W), \
5616                                              (__mmask8)(U), \
5617                                              _MM_FROUND_CUR_DIRECTION)
5618
5619#define _mm_mask_getmant_round_ss(W, U, A, B, C, D, R) \
5620  (__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \
5621                                              (__v4sf)(__m128)(B), \
5622                                              (int)(((D)<<2) | (C)), \
5623                                              (__v4sf)(__m128)(W), \
5624                                              (__mmask8)(U), (int)(R))
5625
5626#define _mm_maskz_getmant_ss(U, A, B, C, D) \
5627  (__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \
5628                                              (__v4sf)(__m128)(B), \
5629                                              (int)(((D)<<2) | (C)), \
5630                                              (__v4sf)_mm_setzero_ps(), \
5631                                              (__mmask8)(U), \
5632                                              _MM_FROUND_CUR_DIRECTION)
5633
5634#define _mm_maskz_getmant_round_ss(U, A, B, C, D, R) \
5635  (__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \
5636                                              (__v4sf)(__m128)(B), \
5637                                              (int)(((D)<<2) | (C)), \
5638                                              (__v4sf)_mm_setzero_ps(), \
5639                                              (__mmask8)(U), (int)(R))
5640
5641static __inline__ __mmask16 __DEFAULT_FN_ATTRS
5642_mm512_kmov (__mmask16 __A)
5643{
5644  return  __A;
5645}
5646
5647#define _mm_comi_round_sd(A, B, P, R) \
5648  (int)__builtin_ia32_vcomisd((__v2df)(__m128d)(A), (__v2df)(__m128d)(B), \
5649                              (int)(P), (int)(R))
5650
5651#define _mm_comi_round_ss(A, B, P, R) \
5652  (int)__builtin_ia32_vcomiss((__v4sf)(__m128)(A), (__v4sf)(__m128)(B), \
5653                              (int)(P), (int)(R))
5654
5655#ifdef __x86_64__
5656#define _mm_cvt_roundsd_si64(A, R) \
5657  (long long)__builtin_ia32_vcvtsd2si64((__v2df)(__m128d)(A), (int)(R))
5658#endif
5659
5660static __inline__ __m512i __DEFAULT_FN_ATTRS512
5661_mm512_sll_epi32(__m512i __A, __m128i __B)
5662{
5663  return (__m512i)__builtin_ia32_pslld512((__v16si) __A, (__v4si)__B);
5664}
5665
5666static __inline__ __m512i __DEFAULT_FN_ATTRS512
5667_mm512_mask_sll_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
5668{
5669  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5670                                          (__v16si)_mm512_sll_epi32(__A, __B),
5671                                          (__v16si)__W);
5672}
5673
5674static __inline__ __m512i __DEFAULT_FN_ATTRS512
5675_mm512_maskz_sll_epi32(__mmask16 __U, __m512i __A, __m128i __B)
5676{
5677  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5678                                          (__v16si)_mm512_sll_epi32(__A, __B),
5679                                          (__v16si)_mm512_setzero_si512());
5680}
5681
5682static __inline__ __m512i __DEFAULT_FN_ATTRS512
5683_mm512_sll_epi64(__m512i __A, __m128i __B)
5684{
5685  return (__m512i)__builtin_ia32_psllq512((__v8di)__A, (__v2di)__B);
5686}
5687
5688static __inline__ __m512i __DEFAULT_FN_ATTRS512
5689_mm512_mask_sll_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
5690{
5691  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5692                                             (__v8di)_mm512_sll_epi64(__A, __B),
5693                                             (__v8di)__W);
5694}
5695
5696static __inline__ __m512i __DEFAULT_FN_ATTRS512
5697_mm512_maskz_sll_epi64(__mmask8 __U, __m512i __A, __m128i __B)
5698{
5699  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5700                                           (__v8di)_mm512_sll_epi64(__A, __B),
5701                                           (__v8di)_mm512_setzero_si512());
5702}
5703
5704static __inline__ __m512i __DEFAULT_FN_ATTRS512
5705_mm512_sllv_epi32(__m512i __X, __m512i __Y)
5706{
5707  return (__m512i)__builtin_ia32_psllv16si((__v16si)__X, (__v16si)__Y);
5708}
5709
5710static __inline__ __m512i __DEFAULT_FN_ATTRS512
5711_mm512_mask_sllv_epi32(__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
5712{
5713  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5714                                           (__v16si)_mm512_sllv_epi32(__X, __Y),
5715                                           (__v16si)__W);
5716}
5717
5718static __inline__ __m512i __DEFAULT_FN_ATTRS512
5719_mm512_maskz_sllv_epi32(__mmask16 __U, __m512i __X, __m512i __Y)
5720{
5721  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5722                                           (__v16si)_mm512_sllv_epi32(__X, __Y),
5723                                           (__v16si)_mm512_setzero_si512());
5724}
5725
5726static __inline__ __m512i __DEFAULT_FN_ATTRS512
5727_mm512_sllv_epi64(__m512i __X, __m512i __Y)
5728{
5729  return (__m512i)__builtin_ia32_psllv8di((__v8di)__X, (__v8di)__Y);
5730}
5731
5732static __inline__ __m512i __DEFAULT_FN_ATTRS512
5733_mm512_mask_sllv_epi64(__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
5734{
5735  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5736                                            (__v8di)_mm512_sllv_epi64(__X, __Y),
5737                                            (__v8di)__W);
5738}
5739
5740static __inline__ __m512i __DEFAULT_FN_ATTRS512
5741_mm512_maskz_sllv_epi64(__mmask8 __U, __m512i __X, __m512i __Y)
5742{
5743  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5744                                            (__v8di)_mm512_sllv_epi64(__X, __Y),
5745                                            (__v8di)_mm512_setzero_si512());
5746}
5747
5748static __inline__ __m512i __DEFAULT_FN_ATTRS512
5749_mm512_sra_epi32(__m512i __A, __m128i __B)
5750{
5751  return (__m512i)__builtin_ia32_psrad512((__v16si) __A, (__v4si)__B);
5752}
5753
5754static __inline__ __m512i __DEFAULT_FN_ATTRS512
5755_mm512_mask_sra_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
5756{
5757  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5758                                          (__v16si)_mm512_sra_epi32(__A, __B),
5759                                          (__v16si)__W);
5760}
5761
5762static __inline__ __m512i __DEFAULT_FN_ATTRS512
5763_mm512_maskz_sra_epi32(__mmask16 __U, __m512i __A, __m128i __B)
5764{
5765  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5766                                          (__v16si)_mm512_sra_epi32(__A, __B),
5767                                          (__v16si)_mm512_setzero_si512());
5768}
5769
5770static __inline__ __m512i __DEFAULT_FN_ATTRS512
5771_mm512_sra_epi64(__m512i __A, __m128i __B)
5772{
5773  return (__m512i)__builtin_ia32_psraq512((__v8di)__A, (__v2di)__B);
5774}
5775
5776static __inline__ __m512i __DEFAULT_FN_ATTRS512
5777_mm512_mask_sra_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
5778{
5779  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5780                                           (__v8di)_mm512_sra_epi64(__A, __B),
5781                                           (__v8di)__W);
5782}
5783
5784static __inline__ __m512i __DEFAULT_FN_ATTRS512
5785_mm512_maskz_sra_epi64(__mmask8 __U, __m512i __A, __m128i __B)
5786{
5787  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5788                                           (__v8di)_mm512_sra_epi64(__A, __B),
5789                                           (__v8di)_mm512_setzero_si512());
5790}
5791
5792static __inline__ __m512i __DEFAULT_FN_ATTRS512
5793_mm512_srav_epi32(__m512i __X, __m512i __Y)
5794{
5795  return (__m512i)__builtin_ia32_psrav16si((__v16si)__X, (__v16si)__Y);
5796}
5797
5798static __inline__ __m512i __DEFAULT_FN_ATTRS512
5799_mm512_mask_srav_epi32(__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
5800{
5801  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5802                                           (__v16si)_mm512_srav_epi32(__X, __Y),
5803                                           (__v16si)__W);
5804}
5805
5806static __inline__ __m512i __DEFAULT_FN_ATTRS512
5807_mm512_maskz_srav_epi32(__mmask16 __U, __m512i __X, __m512i __Y)
5808{
5809  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5810                                           (__v16si)_mm512_srav_epi32(__X, __Y),
5811                                           (__v16si)_mm512_setzero_si512());
5812}
5813
5814static __inline__ __m512i __DEFAULT_FN_ATTRS512
5815_mm512_srav_epi64(__m512i __X, __m512i __Y)
5816{
5817  return (__m512i)__builtin_ia32_psrav8di((__v8di)__X, (__v8di)__Y);
5818}
5819
5820static __inline__ __m512i __DEFAULT_FN_ATTRS512
5821_mm512_mask_srav_epi64(__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
5822{
5823  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5824                                            (__v8di)_mm512_srav_epi64(__X, __Y),
5825                                            (__v8di)__W);
5826}
5827
5828static __inline__ __m512i __DEFAULT_FN_ATTRS512
5829_mm512_maskz_srav_epi64(__mmask8 __U, __m512i __X, __m512i __Y)
5830{
5831  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5832                                            (__v8di)_mm512_srav_epi64(__X, __Y),
5833                                            (__v8di)_mm512_setzero_si512());
5834}
5835
5836static __inline__ __m512i __DEFAULT_FN_ATTRS512
5837_mm512_srl_epi32(__m512i __A, __m128i __B)
5838{
5839  return (__m512i)__builtin_ia32_psrld512((__v16si) __A, (__v4si)__B);
5840}
5841
5842static __inline__ __m512i __DEFAULT_FN_ATTRS512
5843_mm512_mask_srl_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
5844{
5845  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5846                                          (__v16si)_mm512_srl_epi32(__A, __B),
5847                                          (__v16si)__W);
5848}
5849
5850static __inline__ __m512i __DEFAULT_FN_ATTRS512
5851_mm512_maskz_srl_epi32(__mmask16 __U, __m512i __A, __m128i __B)
5852{
5853  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5854                                          (__v16si)_mm512_srl_epi32(__A, __B),
5855                                          (__v16si)_mm512_setzero_si512());
5856}
5857
5858static __inline__ __m512i __DEFAULT_FN_ATTRS512
5859_mm512_srl_epi64(__m512i __A, __m128i __B)
5860{
5861  return (__m512i)__builtin_ia32_psrlq512((__v8di)__A, (__v2di)__B);
5862}
5863
5864static __inline__ __m512i __DEFAULT_FN_ATTRS512
5865_mm512_mask_srl_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
5866{
5867  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5868                                           (__v8di)_mm512_srl_epi64(__A, __B),
5869                                           (__v8di)__W);
5870}
5871
5872static __inline__ __m512i __DEFAULT_FN_ATTRS512
5873_mm512_maskz_srl_epi64(__mmask8 __U, __m512i __A, __m128i __B)
5874{
5875  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5876                                           (__v8di)_mm512_srl_epi64(__A, __B),
5877                                           (__v8di)_mm512_setzero_si512());
5878}
5879
5880static __inline__ __m512i __DEFAULT_FN_ATTRS512
5881_mm512_srlv_epi32(__m512i __X, __m512i __Y)
5882{
5883  return (__m512i)__builtin_ia32_psrlv16si((__v16si)__X, (__v16si)__Y);
5884}
5885
5886static __inline__ __m512i __DEFAULT_FN_ATTRS512
5887_mm512_mask_srlv_epi32(__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
5888{
5889  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5890                                           (__v16si)_mm512_srlv_epi32(__X, __Y),
5891                                           (__v16si)__W);
5892}
5893
5894static __inline__ __m512i __DEFAULT_FN_ATTRS512
5895_mm512_maskz_srlv_epi32(__mmask16 __U, __m512i __X, __m512i __Y)
5896{
5897  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5898                                           (__v16si)_mm512_srlv_epi32(__X, __Y),
5899                                           (__v16si)_mm512_setzero_si512());
5900}
5901
5902static __inline__ __m512i __DEFAULT_FN_ATTRS512
5903_mm512_srlv_epi64 (__m512i __X, __m512i __Y)
5904{
5905  return (__m512i)__builtin_ia32_psrlv8di((__v8di)__X, (__v8di)__Y);
5906}
5907
5908static __inline__ __m512i __DEFAULT_FN_ATTRS512
5909_mm512_mask_srlv_epi64(__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
5910{
5911  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5912                                            (__v8di)_mm512_srlv_epi64(__X, __Y),
5913                                            (__v8di)__W);
5914}
5915
5916static __inline__ __m512i __DEFAULT_FN_ATTRS512
5917_mm512_maskz_srlv_epi64(__mmask8 __U, __m512i __X, __m512i __Y)
5918{
5919  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5920                                            (__v8di)_mm512_srlv_epi64(__X, __Y),
5921                                            (__v8di)_mm512_setzero_si512());
5922}
5923
5924#define _mm512_ternarylogic_epi32(A, B, C, imm) \
5925  (__m512i)__builtin_ia32_pternlogd512_mask((__v16si)(__m512i)(A), \
5926                                            (__v16si)(__m512i)(B), \
5927                                            (__v16si)(__m512i)(C), (int)(imm), \
5928                                            (__mmask16)-1)
5929
5930#define _mm512_mask_ternarylogic_epi32(A, U, B, C, imm) \
5931  (__m512i)__builtin_ia32_pternlogd512_mask((__v16si)(__m512i)(A), \
5932                                            (__v16si)(__m512i)(B), \
5933                                            (__v16si)(__m512i)(C), (int)(imm), \
5934                                            (__mmask16)(U))
5935
5936#define _mm512_maskz_ternarylogic_epi32(U, A, B, C, imm) \
5937  (__m512i)__builtin_ia32_pternlogd512_maskz((__v16si)(__m512i)(A), \
5938                                             (__v16si)(__m512i)(B), \
5939                                             (__v16si)(__m512i)(C), \
5940                                             (int)(imm), (__mmask16)(U))
5941
5942#define _mm512_ternarylogic_epi64(A, B, C, imm) \
5943  (__m512i)__builtin_ia32_pternlogq512_mask((__v8di)(__m512i)(A), \
5944                                            (__v8di)(__m512i)(B), \
5945                                            (__v8di)(__m512i)(C), (int)(imm), \
5946                                            (__mmask8)-1)
5947
5948#define _mm512_mask_ternarylogic_epi64(A, U, B, C, imm) \
5949  (__m512i)__builtin_ia32_pternlogq512_mask((__v8di)(__m512i)(A), \
5950                                            (__v8di)(__m512i)(B), \
5951                                            (__v8di)(__m512i)(C), (int)(imm), \
5952                                            (__mmask8)(U))
5953
5954#define _mm512_maskz_ternarylogic_epi64(U, A, B, C, imm) \
5955  (__m512i)__builtin_ia32_pternlogq512_maskz((__v8di)(__m512i)(A), \
5956                                             (__v8di)(__m512i)(B), \
5957                                             (__v8di)(__m512i)(C), (int)(imm), \
5958                                             (__mmask8)(U))
5959
5960#ifdef __x86_64__
5961#define _mm_cvt_roundsd_i64(A, R) \
5962  (long long)__builtin_ia32_vcvtsd2si64((__v2df)(__m128d)(A), (int)(R))
5963#endif
5964
5965#define _mm_cvt_roundsd_si32(A, R) \
5966  (int)__builtin_ia32_vcvtsd2si32((__v2df)(__m128d)(A), (int)(R))
5967
5968#define _mm_cvt_roundsd_i32(A, R) \
5969  (int)__builtin_ia32_vcvtsd2si32((__v2df)(__m128d)(A), (int)(R))
5970
5971#define _mm_cvt_roundsd_u32(A, R) \
5972  (unsigned int)__builtin_ia32_vcvtsd2usi32((__v2df)(__m128d)(A), (int)(R))
5973
5974static __inline__ unsigned __DEFAULT_FN_ATTRS128
5975_mm_cvtsd_u32 (__m128d __A)
5976{
5977  return (unsigned) __builtin_ia32_vcvtsd2usi32 ((__v2df) __A,
5978             _MM_FROUND_CUR_DIRECTION);
5979}
5980
5981#ifdef __x86_64__
5982#define _mm_cvt_roundsd_u64(A, R) \
5983  (unsigned long long)__builtin_ia32_vcvtsd2usi64((__v2df)(__m128d)(A), \
5984                                                  (int)(R))
5985
5986static __inline__ unsigned long long __DEFAULT_FN_ATTRS128
5987_mm_cvtsd_u64 (__m128d __A)
5988{
5989  return (unsigned long long) __builtin_ia32_vcvtsd2usi64 ((__v2df)
5990                 __A,
5991                 _MM_FROUND_CUR_DIRECTION);
5992}
5993#endif
5994
5995#define _mm_cvt_roundss_si32(A, R) \
5996  (int)__builtin_ia32_vcvtss2si32((__v4sf)(__m128)(A), (int)(R))
5997
5998#define _mm_cvt_roundss_i32(A, R) \
5999  (int)__builtin_ia32_vcvtss2si32((__v4sf)(__m128)(A), (int)(R))
6000
6001#ifdef __x86_64__
6002#define _mm_cvt_roundss_si64(A, R) \
6003  (long long)__builtin_ia32_vcvtss2si64((__v4sf)(__m128)(A), (int)(R))
6004
6005#define _mm_cvt_roundss_i64(A, R) \
6006  (long long)__builtin_ia32_vcvtss2si64((__v4sf)(__m128)(A), (int)(R))
6007#endif
6008
6009#define _mm_cvt_roundss_u32(A, R) \
6010  (unsigned int)__builtin_ia32_vcvtss2usi32((__v4sf)(__m128)(A), (int)(R))
6011
6012static __inline__ unsigned __DEFAULT_FN_ATTRS128
6013_mm_cvtss_u32 (__m128 __A)
6014{
6015  return (unsigned) __builtin_ia32_vcvtss2usi32 ((__v4sf) __A,
6016             _MM_FROUND_CUR_DIRECTION);
6017}
6018
6019#ifdef __x86_64__
6020#define _mm_cvt_roundss_u64(A, R) \
6021  (unsigned long long)__builtin_ia32_vcvtss2usi64((__v4sf)(__m128)(A), \
6022                                                  (int)(R))
6023
6024static __inline__ unsigned long long __DEFAULT_FN_ATTRS128
6025_mm_cvtss_u64 (__m128 __A)
6026{
6027  return (unsigned long long) __builtin_ia32_vcvtss2usi64 ((__v4sf)
6028                 __A,
6029                 _MM_FROUND_CUR_DIRECTION);
6030}
6031#endif
6032
6033#define _mm_cvtt_roundsd_i32(A, R) \
6034  (int)__builtin_ia32_vcvttsd2si32((__v2df)(__m128d)(A), (int)(R))
6035
6036#define _mm_cvtt_roundsd_si32(A, R) \
6037  (int)__builtin_ia32_vcvttsd2si32((__v2df)(__m128d)(A), (int)(R))
6038
6039static __inline__ int __DEFAULT_FN_ATTRS128
6040_mm_cvttsd_i32 (__m128d __A)
6041{
6042  return (int) __builtin_ia32_vcvttsd2si32 ((__v2df) __A,
6043              _MM_FROUND_CUR_DIRECTION);
6044}
6045
6046#ifdef __x86_64__
6047#define _mm_cvtt_roundsd_si64(A, R) \
6048  (long long)__builtin_ia32_vcvttsd2si64((__v2df)(__m128d)(A), (int)(R))
6049
6050#define _mm_cvtt_roundsd_i64(A, R) \
6051  (long long)__builtin_ia32_vcvttsd2si64((__v2df)(__m128d)(A), (int)(R))
6052
6053static __inline__ long long __DEFAULT_FN_ATTRS128
6054_mm_cvttsd_i64 (__m128d __A)
6055{
6056  return (long long) __builtin_ia32_vcvttsd2si64 ((__v2df) __A,
6057              _MM_FROUND_CUR_DIRECTION);
6058}
6059#endif
6060
6061#define _mm_cvtt_roundsd_u32(A, R) \
6062  (unsigned int)__builtin_ia32_vcvttsd2usi32((__v2df)(__m128d)(A), (int)(R))
6063
6064static __inline__ unsigned __DEFAULT_FN_ATTRS128
6065_mm_cvttsd_u32 (__m128d __A)
6066{
6067  return (unsigned) __builtin_ia32_vcvttsd2usi32 ((__v2df) __A,
6068              _MM_FROUND_CUR_DIRECTION);
6069}
6070
6071#ifdef __x86_64__
6072#define _mm_cvtt_roundsd_u64(A, R) \
6073  (unsigned long long)__builtin_ia32_vcvttsd2usi64((__v2df)(__m128d)(A), \
6074                                                   (int)(R))
6075
6076static __inline__ unsigned long long __DEFAULT_FN_ATTRS128
6077_mm_cvttsd_u64 (__m128d __A)
6078{
6079  return (unsigned long long) __builtin_ia32_vcvttsd2usi64 ((__v2df)
6080                  __A,
6081                  _MM_FROUND_CUR_DIRECTION);
6082}
6083#endif
6084
6085#define _mm_cvtt_roundss_i32(A, R) \
6086  (int)__builtin_ia32_vcvttss2si32((__v4sf)(__m128)(A), (int)(R))
6087
6088#define _mm_cvtt_roundss_si32(A, R) \
6089  (int)__builtin_ia32_vcvttss2si32((__v4sf)(__m128)(A), (int)(R))
6090
6091static __inline__ int __DEFAULT_FN_ATTRS128
6092_mm_cvttss_i32 (__m128 __A)
6093{
6094  return (int) __builtin_ia32_vcvttss2si32 ((__v4sf) __A,
6095              _MM_FROUND_CUR_DIRECTION);
6096}
6097
6098#ifdef __x86_64__
6099#define _mm_cvtt_roundss_i64(A, R) \
6100  (long long)__builtin_ia32_vcvttss2si64((__v4sf)(__m128)(A), (int)(R))
6101
6102#define _mm_cvtt_roundss_si64(A, R) \
6103  (long long)__builtin_ia32_vcvttss2si64((__v4sf)(__m128)(A), (int)(R))
6104
6105static __inline__ long long __DEFAULT_FN_ATTRS128
6106_mm_cvttss_i64 (__m128 __A)
6107{
6108  return (long long) __builtin_ia32_vcvttss2si64 ((__v4sf) __A,
6109              _MM_FROUND_CUR_DIRECTION);
6110}
6111#endif
6112
6113#define _mm_cvtt_roundss_u32(A, R) \
6114  (unsigned int)__builtin_ia32_vcvttss2usi32((__v4sf)(__m128)(A), (int)(R))
6115
6116static __inline__ unsigned __DEFAULT_FN_ATTRS128
6117_mm_cvttss_u32 (__m128 __A)
6118{
6119  return (unsigned) __builtin_ia32_vcvttss2usi32 ((__v4sf) __A,
6120              _MM_FROUND_CUR_DIRECTION);
6121}
6122
6123#ifdef __x86_64__
6124#define _mm_cvtt_roundss_u64(A, R) \
6125  (unsigned long long)__builtin_ia32_vcvttss2usi64((__v4sf)(__m128)(A), \
6126                                                   (int)(R))
6127
6128static __inline__ unsigned long long __DEFAULT_FN_ATTRS128
6129_mm_cvttss_u64 (__m128 __A)
6130{
6131  return (unsigned long long) __builtin_ia32_vcvttss2usi64 ((__v4sf)
6132                  __A,
6133                  _MM_FROUND_CUR_DIRECTION);
6134}
6135#endif
6136
6137#define _mm512_permute_pd(X, C) \
6138  (__m512d)__builtin_ia32_vpermilpd512((__v8df)(__m512d)(X), (int)(C))
6139
6140#define _mm512_mask_permute_pd(W, U, X, C) \
6141  (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
6142                                       (__v8df)_mm512_permute_pd((X), (C)), \
6143                                       (__v8df)(__m512d)(W))
6144
6145#define _mm512_maskz_permute_pd(U, X, C) \
6146  (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
6147                                       (__v8df)_mm512_permute_pd((X), (C)), \
6148                                       (__v8df)_mm512_setzero_pd())
6149
6150#define _mm512_permute_ps(X, C) \
6151  (__m512)__builtin_ia32_vpermilps512((__v16sf)(__m512)(X), (int)(C))
6152
6153#define _mm512_mask_permute_ps(W, U, X, C) \
6154  (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
6155                                      (__v16sf)_mm512_permute_ps((X), (C)), \
6156                                      (__v16sf)(__m512)(W))
6157
6158#define _mm512_maskz_permute_ps(U, X, C) \
6159  (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
6160                                      (__v16sf)_mm512_permute_ps((X), (C)), \
6161                                      (__v16sf)_mm512_setzero_ps())
6162
6163static __inline__ __m512d __DEFAULT_FN_ATTRS512
6164_mm512_permutevar_pd(__m512d __A, __m512i __C)
6165{
6166  return (__m512d)__builtin_ia32_vpermilvarpd512((__v8df)__A, (__v8di)__C);
6167}
6168
6169static __inline__ __m512d __DEFAULT_FN_ATTRS512
6170_mm512_mask_permutevar_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512i __C)
6171{
6172  return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
6173                                         (__v8df)_mm512_permutevar_pd(__A, __C),
6174                                         (__v8df)__W);
6175}
6176
6177static __inline__ __m512d __DEFAULT_FN_ATTRS512
6178_mm512_maskz_permutevar_pd(__mmask8 __U, __m512d __A, __m512i __C)
6179{
6180  return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
6181                                         (__v8df)_mm512_permutevar_pd(__A, __C),
6182                                         (__v8df)_mm512_setzero_pd());
6183}
6184
6185static __inline__ __m512 __DEFAULT_FN_ATTRS512
6186_mm512_permutevar_ps(__m512 __A, __m512i __C)
6187{
6188  return (__m512)__builtin_ia32_vpermilvarps512((__v16sf)__A, (__v16si)__C);
6189}
6190
6191static __inline__ __m512 __DEFAULT_FN_ATTRS512
6192_mm512_mask_permutevar_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512i __C)
6193{
6194  return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
6195                                        (__v16sf)_mm512_permutevar_ps(__A, __C),
6196                                        (__v16sf)__W);
6197}
6198
6199static __inline__ __m512 __DEFAULT_FN_ATTRS512
6200_mm512_maskz_permutevar_ps(__mmask16 __U, __m512 __A, __m512i __C)
6201{
6202  return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
6203                                        (__v16sf)_mm512_permutevar_ps(__A, __C),
6204                                        (__v16sf)_mm512_setzero_ps());
6205}
6206
6207static __inline __m512d __DEFAULT_FN_ATTRS512
6208_mm512_permutex2var_pd(__m512d __A, __m512i __I, __m512d __B)
6209{
6210  return (__m512d)__builtin_ia32_vpermi2varpd512((__v8df)__A, (__v8di)__I,
6211                                                 (__v8df)__B);
6212}
6213
6214static __inline__ __m512d __DEFAULT_FN_ATTRS512
6215_mm512_mask_permutex2var_pd(__m512d __A, __mmask8 __U, __m512i __I, __m512d __B)
6216{
6217  return (__m512d)__builtin_ia32_selectpd_512(__U,
6218                                  (__v8df)_mm512_permutex2var_pd(__A, __I, __B),
6219                                  (__v8df)__A);
6220}
6221
6222static __inline__ __m512d __DEFAULT_FN_ATTRS512
6223_mm512_mask2_permutex2var_pd(__m512d __A, __m512i __I, __mmask8 __U,
6224                             __m512d __B)
6225{
6226  return (__m512d)__builtin_ia32_selectpd_512(__U,
6227                                  (__v8df)_mm512_permutex2var_pd(__A, __I, __B),
6228                                  (__v8df)(__m512d)__I);
6229}
6230
6231static __inline__ __m512d __DEFAULT_FN_ATTRS512
6232_mm512_maskz_permutex2var_pd(__mmask8 __U, __m512d __A, __m512i __I,
6233                             __m512d __B)
6234{
6235  return (__m512d)__builtin_ia32_selectpd_512(__U,
6236                                  (__v8df)_mm512_permutex2var_pd(__A, __I, __B),
6237                                  (__v8df)_mm512_setzero_pd());
6238}
6239
6240static __inline __m512 __DEFAULT_FN_ATTRS512
6241_mm512_permutex2var_ps(__m512 __A, __m512i __I, __m512 __B)
6242{
6243  return (__m512)__builtin_ia32_vpermi2varps512((__v16sf)__A, (__v16si)__I,
6244                                                (__v16sf) __B);
6245}
6246
6247static __inline__ __m512 __DEFAULT_FN_ATTRS512
6248_mm512_mask_permutex2var_ps(__m512 __A, __mmask16 __U, __m512i __I, __m512 __B)
6249{
6250  return (__m512)__builtin_ia32_selectps_512(__U,
6251                                 (__v16sf)_mm512_permutex2var_ps(__A, __I, __B),
6252                                 (__v16sf)__A);
6253}
6254
6255static __inline__ __m512 __DEFAULT_FN_ATTRS512
6256_mm512_mask2_permutex2var_ps(__m512 __A, __m512i __I, __mmask16 __U, __m512 __B)
6257{
6258  return (__m512)__builtin_ia32_selectps_512(__U,
6259                                 (__v16sf)_mm512_permutex2var_ps(__A, __I, __B),
6260                                 (__v16sf)(__m512)__I);
6261}
6262
6263static __inline__ __m512 __DEFAULT_FN_ATTRS512
6264_mm512_maskz_permutex2var_ps(__mmask16 __U, __m512 __A, __m512i __I, __m512 __B)
6265{
6266  return (__m512)__builtin_ia32_selectps_512(__U,
6267                                 (__v16sf)_mm512_permutex2var_ps(__A, __I, __B),
6268                                 (__v16sf)_mm512_setzero_ps());
6269}
6270
6271
6272#define _mm512_cvtt_roundpd_epu32(A, R) \
6273  (__m256i)__builtin_ia32_cvttpd2udq512_mask((__v8df)(__m512d)(A), \
6274                                             (__v8si)_mm256_undefined_si256(), \
6275                                             (__mmask8)-1, (int)(R))
6276
6277#define _mm512_mask_cvtt_roundpd_epu32(W, U, A, R) \
6278  (__m256i)__builtin_ia32_cvttpd2udq512_mask((__v8df)(__m512d)(A), \
6279                                             (__v8si)(__m256i)(W), \
6280                                             (__mmask8)(U), (int)(R))
6281
6282#define _mm512_maskz_cvtt_roundpd_epu32(U, A, R) \
6283  (__m256i)__builtin_ia32_cvttpd2udq512_mask((__v8df)(__m512d)(A), \
6284                                             (__v8si)_mm256_setzero_si256(), \
6285                                             (__mmask8)(U), (int)(R))
6286
6287static __inline__ __m256i __DEFAULT_FN_ATTRS512
6288_mm512_cvttpd_epu32 (__m512d __A)
6289{
6290  return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
6291                  (__v8si)
6292                  _mm256_undefined_si256 (),
6293                  (__mmask8) -1,
6294                  _MM_FROUND_CUR_DIRECTION);
6295}
6296
6297static __inline__ __m256i __DEFAULT_FN_ATTRS512
6298_mm512_mask_cvttpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A)
6299{
6300  return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
6301                  (__v8si) __W,
6302                  (__mmask8) __U,
6303                  _MM_FROUND_CUR_DIRECTION);
6304}
6305
6306static __inline__ __m256i __DEFAULT_FN_ATTRS512
6307_mm512_maskz_cvttpd_epu32 (__mmask8 __U, __m512d __A)
6308{
6309  return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
6310                  (__v8si)
6311                  _mm256_setzero_si256 (),
6312                  (__mmask8) __U,
6313                  _MM_FROUND_CUR_DIRECTION);
6314}
6315
6316#define _mm_roundscale_round_sd(A, B, imm, R) \
6317  (__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \
6318                                                (__v2df)(__m128d)(B), \
6319                                                (__v2df)_mm_setzero_pd(), \
6320                                                (__mmask8)-1, (int)(imm), \
6321                                                (int)(R))
6322
6323#define _mm_roundscale_sd(A, B, imm) \
6324  (__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \
6325                                                (__v2df)(__m128d)(B), \
6326                                                (__v2df)_mm_setzero_pd(), \
6327                                                (__mmask8)-1, (int)(imm), \
6328                                                _MM_FROUND_CUR_DIRECTION)
6329
6330#define _mm_mask_roundscale_sd(W, U, A, B, imm) \
6331  (__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \
6332                                                (__v2df)(__m128d)(B), \
6333                                                (__v2df)(__m128d)(W), \
6334                                                (__mmask8)(U), (int)(imm), \
6335                                                _MM_FROUND_CUR_DIRECTION)
6336
6337#define _mm_mask_roundscale_round_sd(W, U, A, B, I, R) \
6338  (__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \
6339                                                (__v2df)(__m128d)(B), \
6340                                                (__v2df)(__m128d)(W), \
6341                                                (__mmask8)(U), (int)(I), \
6342                                                (int)(R))
6343
6344#define _mm_maskz_roundscale_sd(U, A, B, I) \
6345  (__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \
6346                                                (__v2df)(__m128d)(B), \
6347                                                (__v2df)_mm_setzero_pd(), \
6348                                                (__mmask8)(U), (int)(I), \
6349                                                _MM_FROUND_CUR_DIRECTION)
6350
6351#define _mm_maskz_roundscale_round_sd(U, A, B, I, R) \
6352  (__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \
6353                                                (__v2df)(__m128d)(B), \
6354                                                (__v2df)_mm_setzero_pd(), \
6355                                                (__mmask8)(U), (int)(I), \
6356                                                (int)(R))
6357
6358#define _mm_roundscale_round_ss(A, B, imm, R) \
6359  (__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \
6360                                               (__v4sf)(__m128)(B), \
6361                                               (__v4sf)_mm_setzero_ps(), \
6362                                               (__mmask8)-1, (int)(imm), \
6363                                               (int)(R))
6364
6365#define _mm_roundscale_ss(A, B, imm) \
6366  (__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \
6367                                               (__v4sf)(__m128)(B), \
6368                                               (__v4sf)_mm_setzero_ps(), \
6369                                               (__mmask8)-1, (int)(imm), \
6370                                               _MM_FROUND_CUR_DIRECTION)
6371
6372#define _mm_mask_roundscale_ss(W, U, A, B, I) \
6373  (__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \
6374                                               (__v4sf)(__m128)(B), \
6375                                               (__v4sf)(__m128)(W), \
6376                                               (__mmask8)(U), (int)(I), \
6377                                               _MM_FROUND_CUR_DIRECTION)
6378
6379#define _mm_mask_roundscale_round_ss(W, U, A, B, I, R) \
6380  (__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \
6381                                               (__v4sf)(__m128)(B), \
6382                                               (__v4sf)(__m128)(W), \
6383                                               (__mmask8)(U), (int)(I), \
6384                                               (int)(R))
6385
6386#define _mm_maskz_roundscale_ss(U, A, B, I) \
6387  (__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \
6388                                               (__v4sf)(__m128)(B), \
6389                                               (__v4sf)_mm_setzero_ps(), \
6390                                               (__mmask8)(U), (int)(I), \
6391                                               _MM_FROUND_CUR_DIRECTION)
6392
6393#define _mm_maskz_roundscale_round_ss(U, A, B, I, R) \
6394  (__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \
6395                                               (__v4sf)(__m128)(B), \
6396                                               (__v4sf)_mm_setzero_ps(), \
6397                                               (__mmask8)(U), (int)(I), \
6398                                               (int)(R))
6399
6400#define _mm512_scalef_round_pd(A, B, R) \
6401  (__m512d)__builtin_ia32_scalefpd512_mask((__v8df)(__m512d)(A), \
6402                                           (__v8df)(__m512d)(B), \
6403                                           (__v8df)_mm512_undefined_pd(), \
6404                                           (__mmask8)-1, (int)(R))
6405
6406#define _mm512_mask_scalef_round_pd(W, U, A, B, R) \
6407  (__m512d)__builtin_ia32_scalefpd512_mask((__v8df)(__m512d)(A), \
6408                                           (__v8df)(__m512d)(B), \
6409                                           (__v8df)(__m512d)(W), \
6410                                           (__mmask8)(U), (int)(R))
6411
6412#define _mm512_maskz_scalef_round_pd(U, A, B, R) \
6413  (__m512d)__builtin_ia32_scalefpd512_mask((__v8df)(__m512d)(A), \
6414                                           (__v8df)(__m512d)(B), \
6415                                           (__v8df)_mm512_setzero_pd(), \
6416                                           (__mmask8)(U), (int)(R))
6417
6418static __inline__ __m512d __DEFAULT_FN_ATTRS512
6419_mm512_scalef_pd (__m512d __A, __m512d __B)
6420{
6421  return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
6422                (__v8df) __B,
6423                (__v8df)
6424                _mm512_undefined_pd (),
6425                (__mmask8) -1,
6426                _MM_FROUND_CUR_DIRECTION);
6427}
6428
6429static __inline__ __m512d __DEFAULT_FN_ATTRS512
6430_mm512_mask_scalef_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
6431{
6432  return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
6433                (__v8df) __B,
6434                (__v8df) __W,
6435                (__mmask8) __U,
6436                _MM_FROUND_CUR_DIRECTION);
6437}
6438
6439static __inline__ __m512d __DEFAULT_FN_ATTRS512
6440_mm512_maskz_scalef_pd (__mmask8 __U, __m512d __A, __m512d __B)
6441{
6442  return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
6443                (__v8df) __B,
6444                (__v8df)
6445                _mm512_setzero_pd (),
6446                (__mmask8) __U,
6447                _MM_FROUND_CUR_DIRECTION);
6448}
6449
6450#define _mm512_scalef_round_ps(A, B, R) \
6451  (__m512)__builtin_ia32_scalefps512_mask((__v16sf)(__m512)(A), \
6452                                          (__v16sf)(__m512)(B), \
6453                                          (__v16sf)_mm512_undefined_ps(), \
6454                                          (__mmask16)-1, (int)(R))
6455
6456#define _mm512_mask_scalef_round_ps(W, U, A, B, R) \
6457  (__m512)__builtin_ia32_scalefps512_mask((__v16sf)(__m512)(A), \
6458                                          (__v16sf)(__m512)(B), \
6459                                          (__v16sf)(__m512)(W), \
6460                                          (__mmask16)(U), (int)(R))
6461
6462#define _mm512_maskz_scalef_round_ps(U, A, B, R) \
6463  (__m512)__builtin_ia32_scalefps512_mask((__v16sf)(__m512)(A), \
6464                                          (__v16sf)(__m512)(B), \
6465                                          (__v16sf)_mm512_setzero_ps(), \
6466                                          (__mmask16)(U), (int)(R))
6467
6468static __inline__ __m512 __DEFAULT_FN_ATTRS512
6469_mm512_scalef_ps (__m512 __A, __m512 __B)
6470{
6471  return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
6472               (__v16sf) __B,
6473               (__v16sf)
6474               _mm512_undefined_ps (),
6475               (__mmask16) -1,
6476               _MM_FROUND_CUR_DIRECTION);
6477}
6478
6479static __inline__ __m512 __DEFAULT_FN_ATTRS512
6480_mm512_mask_scalef_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
6481{
6482  return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
6483               (__v16sf) __B,
6484               (__v16sf) __W,
6485               (__mmask16) __U,
6486               _MM_FROUND_CUR_DIRECTION);
6487}
6488
6489static __inline__ __m512 __DEFAULT_FN_ATTRS512
6490_mm512_maskz_scalef_ps (__mmask16 __U, __m512 __A, __m512 __B)
6491{
6492  return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
6493               (__v16sf) __B,
6494               (__v16sf)
6495               _mm512_setzero_ps (),
6496               (__mmask16) __U,
6497               _MM_FROUND_CUR_DIRECTION);
6498}
6499
6500#define _mm_scalef_round_sd(A, B, R) \
6501  (__m128d)__builtin_ia32_scalefsd_round_mask((__v2df)(__m128d)(A), \
6502                                              (__v2df)(__m128d)(B), \
6503                                              (__v2df)_mm_setzero_pd(), \
6504                                              (__mmask8)-1, (int)(R))
6505
6506static __inline__ __m128d __DEFAULT_FN_ATTRS128
6507_mm_scalef_sd (__m128d __A, __m128d __B)
6508{
6509  return (__m128d) __builtin_ia32_scalefsd_round_mask ((__v2df) __A,
6510              (__v2df)( __B), (__v2df) _mm_setzero_pd(),
6511              (__mmask8) -1,
6512              _MM_FROUND_CUR_DIRECTION);
6513}
6514
6515static __inline__ __m128d __DEFAULT_FN_ATTRS128
6516_mm_mask_scalef_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
6517{
6518 return (__m128d) __builtin_ia32_scalefsd_round_mask ( (__v2df) __A,
6519                 (__v2df) __B,
6520                (__v2df) __W,
6521                (__mmask8) __U,
6522                _MM_FROUND_CUR_DIRECTION);
6523}
6524
6525#define _mm_mask_scalef_round_sd(W, U, A, B, R) \
6526  (__m128d)__builtin_ia32_scalefsd_round_mask((__v2df)(__m128d)(A), \
6527                                              (__v2df)(__m128d)(B), \
6528                                              (__v2df)(__m128d)(W), \
6529                                              (__mmask8)(U), (int)(R))
6530
6531static __inline__ __m128d __DEFAULT_FN_ATTRS128
6532_mm_maskz_scalef_sd (__mmask8 __U, __m128d __A, __m128d __B)
6533{
6534 return (__m128d) __builtin_ia32_scalefsd_round_mask ( (__v2df) __A,
6535                 (__v2df) __B,
6536                (__v2df) _mm_setzero_pd (),
6537                (__mmask8) __U,
6538                _MM_FROUND_CUR_DIRECTION);
6539}
6540
6541#define _mm_maskz_scalef_round_sd(U, A, B, R) \
6542  (__m128d)__builtin_ia32_scalefsd_round_mask((__v2df)(__m128d)(A), \
6543                                              (__v2df)(__m128d)(B), \
6544                                              (__v2df)_mm_setzero_pd(), \
6545                                              (__mmask8)(U), (int)(R))
6546
6547#define _mm_scalef_round_ss(A, B, R) \
6548  (__m128)__builtin_ia32_scalefss_round_mask((__v4sf)(__m128)(A), \
6549                                             (__v4sf)(__m128)(B), \
6550                                             (__v4sf)_mm_setzero_ps(), \
6551                                             (__mmask8)-1, (int)(R))
6552
6553static __inline__ __m128 __DEFAULT_FN_ATTRS128
6554_mm_scalef_ss (__m128 __A, __m128 __B)
6555{
6556  return (__m128) __builtin_ia32_scalefss_round_mask ((__v4sf) __A,
6557             (__v4sf)( __B), (__v4sf) _mm_setzero_ps(),
6558             (__mmask8) -1,
6559             _MM_FROUND_CUR_DIRECTION);
6560}
6561
6562static __inline__ __m128 __DEFAULT_FN_ATTRS128
6563_mm_mask_scalef_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
6564{
6565 return (__m128) __builtin_ia32_scalefss_round_mask ( (__v4sf) __A,
6566                (__v4sf) __B,
6567                (__v4sf) __W,
6568                (__mmask8) __U,
6569                _MM_FROUND_CUR_DIRECTION);
6570}
6571
6572#define _mm_mask_scalef_round_ss(W, U, A, B, R) \
6573  (__m128)__builtin_ia32_scalefss_round_mask((__v4sf)(__m128)(A), \
6574                                             (__v4sf)(__m128)(B), \
6575                                             (__v4sf)(__m128)(W), \
6576                                             (__mmask8)(U), (int)(R))
6577
6578static __inline__ __m128 __DEFAULT_FN_ATTRS128
6579_mm_maskz_scalef_ss (__mmask8 __U, __m128 __A, __m128 __B)
6580{
6581 return (__m128) __builtin_ia32_scalefss_round_mask ( (__v4sf) __A,
6582                 (__v4sf) __B,
6583                (__v4sf) _mm_setzero_ps (),
6584                (__mmask8) __U,
6585                _MM_FROUND_CUR_DIRECTION);
6586}
6587
6588#define _mm_maskz_scalef_round_ss(U, A, B, R) \
6589  (__m128)__builtin_ia32_scalefss_round_mask((__v4sf)(__m128)(A), \
6590                                             (__v4sf)(__m128)(B), \
6591                                             (__v4sf)_mm_setzero_ps(), \
6592                                             (__mmask8)(U), \
6593                                             (int)(R))
6594
6595static __inline__ __m512i __DEFAULT_FN_ATTRS512
6596_mm512_srai_epi32(__m512i __A, int __B)
6597{
6598  return (__m512i)__builtin_ia32_psradi512((__v16si)__A, __B);
6599}
6600
6601static __inline__ __m512i __DEFAULT_FN_ATTRS512
6602_mm512_mask_srai_epi32(__m512i __W, __mmask16 __U, __m512i __A, int __B)
6603{
6604  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
6605                                         (__v16si)_mm512_srai_epi32(__A, __B),
6606                                         (__v16si)__W);
6607}
6608
6609static __inline__ __m512i __DEFAULT_FN_ATTRS512
6610_mm512_maskz_srai_epi32(__mmask16 __U, __m512i __A, int __B) {
6611  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
6612                                         (__v16si)_mm512_srai_epi32(__A, __B),
6613                                         (__v16si)_mm512_setzero_si512());
6614}
6615
6616static __inline__ __m512i __DEFAULT_FN_ATTRS512
6617_mm512_srai_epi64(__m512i __A, int __B)
6618{
6619  return (__m512i)__builtin_ia32_psraqi512((__v8di)__A, __B);
6620}
6621
6622static __inline__ __m512i __DEFAULT_FN_ATTRS512
6623_mm512_mask_srai_epi64(__m512i __W, __mmask8 __U, __m512i __A, int __B)
6624{
6625  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
6626                                          (__v8di)_mm512_srai_epi64(__A, __B),
6627                                          (__v8di)__W);
6628}
6629
6630static __inline__ __m512i __DEFAULT_FN_ATTRS512
6631_mm512_maskz_srai_epi64(__mmask8 __U, __m512i __A, int __B)
6632{
6633  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
6634                                          (__v8di)_mm512_srai_epi64(__A, __B),
6635                                          (__v8di)_mm512_setzero_si512());
6636}
6637
6638#define _mm512_shuffle_f32x4(A, B, imm) \
6639  (__m512)__builtin_ia32_shuf_f32x4((__v16sf)(__m512)(A), \
6640                                    (__v16sf)(__m512)(B), (int)(imm))
6641
6642#define _mm512_mask_shuffle_f32x4(W, U, A, B, imm) \
6643  (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
6644                                      (__v16sf)_mm512_shuffle_f32x4((A), (B), (imm)), \
6645                                      (__v16sf)(__m512)(W))
6646
6647#define _mm512_maskz_shuffle_f32x4(U, A, B, imm) \
6648  (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
6649                                      (__v16sf)_mm512_shuffle_f32x4((A), (B), (imm)), \
6650                                      (__v16sf)_mm512_setzero_ps())
6651
6652#define _mm512_shuffle_f64x2(A, B, imm) \
6653  (__m512d)__builtin_ia32_shuf_f64x2((__v8df)(__m512d)(A), \
6654                                     (__v8df)(__m512d)(B), (int)(imm))
6655
6656#define _mm512_mask_shuffle_f64x2(W, U, A, B, imm) \
6657  (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
6658                                       (__v8df)_mm512_shuffle_f64x2((A), (B), (imm)), \
6659                                       (__v8df)(__m512d)(W))
6660
6661#define _mm512_maskz_shuffle_f64x2(U, A, B, imm) \
6662  (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
6663                                       (__v8df)_mm512_shuffle_f64x2((A), (B), (imm)), \
6664                                       (__v8df)_mm512_setzero_pd())
6665
6666#define _mm512_shuffle_i32x4(A, B, imm) \
6667  (__m512i)__builtin_ia32_shuf_i32x4((__v16si)(__m512i)(A), \
6668                                     (__v16si)(__m512i)(B), (int)(imm))
6669
6670#define _mm512_mask_shuffle_i32x4(W, U, A, B, imm) \
6671  (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
6672                                      (__v16si)_mm512_shuffle_i32x4((A), (B), (imm)), \
6673                                      (__v16si)(__m512i)(W))
6674
6675#define _mm512_maskz_shuffle_i32x4(U, A, B, imm) \
6676  (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
6677                                      (__v16si)_mm512_shuffle_i32x4((A), (B), (imm)), \
6678                                      (__v16si)_mm512_setzero_si512())
6679
6680#define _mm512_shuffle_i64x2(A, B, imm) \
6681  (__m512i)__builtin_ia32_shuf_i64x2((__v8di)(__m512i)(A), \
6682                                     (__v8di)(__m512i)(B), (int)(imm))
6683
6684#define _mm512_mask_shuffle_i64x2(W, U, A, B, imm) \
6685  (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
6686                                      (__v8di)_mm512_shuffle_i64x2((A), (B), (imm)), \
6687                                      (__v8di)(__m512i)(W))
6688
6689#define _mm512_maskz_shuffle_i64x2(U, A, B, imm) \
6690  (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
6691                                      (__v8di)_mm512_shuffle_i64x2((A), (B), (imm)), \
6692                                      (__v8di)_mm512_setzero_si512())
6693
6694#define _mm512_shuffle_pd(A, B, M) \
6695  (__m512d)__builtin_ia32_shufpd512((__v8df)(__m512d)(A), \
6696                                    (__v8df)(__m512d)(B), (int)(M))
6697
6698#define _mm512_mask_shuffle_pd(W, U, A, B, M) \
6699  (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
6700                                       (__v8df)_mm512_shuffle_pd((A), (B), (M)), \
6701                                       (__v8df)(__m512d)(W))
6702
6703#define _mm512_maskz_shuffle_pd(U, A, B, M) \
6704  (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
6705                                       (__v8df)_mm512_shuffle_pd((A), (B), (M)), \
6706                                       (__v8df)_mm512_setzero_pd())
6707
6708#define _mm512_shuffle_ps(A, B, M) \
6709  (__m512)__builtin_ia32_shufps512((__v16sf)(__m512)(A), \
6710                                   (__v16sf)(__m512)(B), (int)(M))
6711
6712#define _mm512_mask_shuffle_ps(W, U, A, B, M) \
6713  (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
6714                                      (__v16sf)_mm512_shuffle_ps((A), (B), (M)), \
6715                                      (__v16sf)(__m512)(W))
6716
6717#define _mm512_maskz_shuffle_ps(U, A, B, M) \
6718  (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
6719                                      (__v16sf)_mm512_shuffle_ps((A), (B), (M)), \
6720                                      (__v16sf)_mm512_setzero_ps())
6721
6722#define _mm_sqrt_round_sd(A, B, R) \
6723  (__m128d)__builtin_ia32_sqrtsd_round_mask((__v2df)(__m128d)(A), \
6724                                            (__v2df)(__m128d)(B), \
6725                                            (__v2df)_mm_setzero_pd(), \
6726                                            (__mmask8)-1, (int)(R))
6727
6728static __inline__ __m128d __DEFAULT_FN_ATTRS128
6729_mm_mask_sqrt_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
6730{
6731 return (__m128d) __builtin_ia32_sqrtsd_round_mask ( (__v2df) __A,
6732                 (__v2df) __B,
6733                (__v2df) __W,
6734                (__mmask8) __U,
6735                _MM_FROUND_CUR_DIRECTION);
6736}
6737
6738#define _mm_mask_sqrt_round_sd(W, U, A, B, R) \
6739  (__m128d)__builtin_ia32_sqrtsd_round_mask((__v2df)(__m128d)(A), \
6740                                            (__v2df)(__m128d)(B), \
6741                                            (__v2df)(__m128d)(W), \
6742                                            (__mmask8)(U), (int)(R))
6743
6744static __inline__ __m128d __DEFAULT_FN_ATTRS128
6745_mm_maskz_sqrt_sd (__mmask8 __U, __m128d __A, __m128d __B)
6746{
6747 return (__m128d) __builtin_ia32_sqrtsd_round_mask ( (__v2df) __A,
6748                 (__v2df) __B,
6749                (__v2df) _mm_setzero_pd (),
6750                (__mmask8) __U,
6751                _MM_FROUND_CUR_DIRECTION);
6752}
6753
6754#define _mm_maskz_sqrt_round_sd(U, A, B, R) \
6755  (__m128d)__builtin_ia32_sqrtsd_round_mask((__v2df)(__m128d)(A), \
6756                                            (__v2df)(__m128d)(B), \
6757                                            (__v2df)_mm_setzero_pd(), \
6758                                            (__mmask8)(U), (int)(R))
6759
6760#define _mm_sqrt_round_ss(A, B, R) \
6761  (__m128)__builtin_ia32_sqrtss_round_mask((__v4sf)(__m128)(A), \
6762                                           (__v4sf)(__m128)(B), \
6763                                           (__v4sf)_mm_setzero_ps(), \
6764                                           (__mmask8)-1, (int)(R))
6765
6766static __inline__ __m128 __DEFAULT_FN_ATTRS128
6767_mm_mask_sqrt_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
6768{
6769 return (__m128) __builtin_ia32_sqrtss_round_mask ( (__v4sf) __A,
6770                 (__v4sf) __B,
6771                (__v4sf) __W,
6772                (__mmask8) __U,
6773                _MM_FROUND_CUR_DIRECTION);
6774}
6775
6776#define _mm_mask_sqrt_round_ss(W, U, A, B, R) \
6777  (__m128)__builtin_ia32_sqrtss_round_mask((__v4sf)(__m128)(A), \
6778                                           (__v4sf)(__m128)(B), \
6779                                           (__v4sf)(__m128)(W), (__mmask8)(U), \
6780                                           (int)(R))
6781
6782static __inline__ __m128 __DEFAULT_FN_ATTRS128
6783_mm_maskz_sqrt_ss (__mmask8 __U, __m128 __A, __m128 __B)
6784{
6785 return (__m128) __builtin_ia32_sqrtss_round_mask ( (__v4sf) __A,
6786                 (__v4sf) __B,
6787                (__v4sf) _mm_setzero_ps (),
6788                (__mmask8) __U,
6789                _MM_FROUND_CUR_DIRECTION);
6790}
6791
6792#define _mm_maskz_sqrt_round_ss(U, A, B, R) \
6793  (__m128)__builtin_ia32_sqrtss_round_mask((__v4sf)(__m128)(A), \
6794                                           (__v4sf)(__m128)(B), \
6795                                           (__v4sf)_mm_setzero_ps(), \
6796                                           (__mmask8)(U), (int)(R))
6797
6798static __inline__ __m512 __DEFAULT_FN_ATTRS512
6799_mm512_broadcast_f32x4(__m128 __A)
6800{
6801  return (__m512)__builtin_shufflevector((__v4sf)__A, (__v4sf)__A,
6802                                         0, 1, 2, 3, 0, 1, 2, 3,
6803                                         0, 1, 2, 3, 0, 1, 2, 3);
6804}
6805
6806static __inline__ __m512 __DEFAULT_FN_ATTRS512
6807_mm512_mask_broadcast_f32x4(__m512 __O, __mmask16 __M, __m128 __A)
6808{
6809  return (__m512)__builtin_ia32_selectps_512((__mmask16)__M,
6810                                           (__v16sf)_mm512_broadcast_f32x4(__A),
6811                                           (__v16sf)__O);
6812}
6813
6814static __inline__ __m512 __DEFAULT_FN_ATTRS512
6815_mm512_maskz_broadcast_f32x4(__mmask16 __M, __m128 __A)
6816{
6817  return (__m512)__builtin_ia32_selectps_512((__mmask16)__M,
6818                                           (__v16sf)_mm512_broadcast_f32x4(__A),
6819                                           (__v16sf)_mm512_setzero_ps());
6820}
6821
6822static __inline__ __m512d __DEFAULT_FN_ATTRS512
6823_mm512_broadcast_f64x4(__m256d __A)
6824{
6825  return (__m512d)__builtin_shufflevector((__v4df)__A, (__v4df)__A,
6826                                          0, 1, 2, 3, 0, 1, 2, 3);
6827}
6828
6829static __inline__ __m512d __DEFAULT_FN_ATTRS512
6830_mm512_mask_broadcast_f64x4(__m512d __O, __mmask8 __M, __m256d __A)
6831{
6832  return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__M,
6833                                            (__v8df)_mm512_broadcast_f64x4(__A),
6834                                            (__v8df)__O);
6835}
6836
6837static __inline__ __m512d __DEFAULT_FN_ATTRS512
6838_mm512_maskz_broadcast_f64x4(__mmask8 __M, __m256d __A)
6839{
6840  return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__M,
6841                                            (__v8df)_mm512_broadcast_f64x4(__A),
6842                                            (__v8df)_mm512_setzero_pd());
6843}
6844
6845static __inline__ __m512i __DEFAULT_FN_ATTRS512
6846_mm512_broadcast_i32x4(__m128i __A)
6847{
6848  return (__m512i)__builtin_shufflevector((__v4si)__A, (__v4si)__A,
6849                                          0, 1, 2, 3, 0, 1, 2, 3,
6850                                          0, 1, 2, 3, 0, 1, 2, 3);
6851}
6852
6853static __inline__ __m512i __DEFAULT_FN_ATTRS512
6854_mm512_mask_broadcast_i32x4(__m512i __O, __mmask16 __M, __m128i __A)
6855{
6856  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
6857                                           (__v16si)_mm512_broadcast_i32x4(__A),
6858                                           (__v16si)__O);
6859}
6860
6861static __inline__ __m512i __DEFAULT_FN_ATTRS512
6862_mm512_maskz_broadcast_i32x4(__mmask16 __M, __m128i __A)
6863{
6864  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
6865                                           (__v16si)_mm512_broadcast_i32x4(__A),
6866                                           (__v16si)_mm512_setzero_si512());
6867}
6868
6869static __inline__ __m512i __DEFAULT_FN_ATTRS512
6870_mm512_broadcast_i64x4(__m256i __A)
6871{
6872  return (__m512i)__builtin_shufflevector((__v4di)__A, (__v4di)__A,
6873                                          0, 1, 2, 3, 0, 1, 2, 3);
6874}
6875
6876static __inline__ __m512i __DEFAULT_FN_ATTRS512
6877_mm512_mask_broadcast_i64x4(__m512i __O, __mmask8 __M, __m256i __A)
6878{
6879  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
6880                                            (__v8di)_mm512_broadcast_i64x4(__A),
6881                                            (__v8di)__O);
6882}
6883
6884static __inline__ __m512i __DEFAULT_FN_ATTRS512
6885_mm512_maskz_broadcast_i64x4(__mmask8 __M, __m256i __A)
6886{
6887  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
6888                                            (__v8di)_mm512_broadcast_i64x4(__A),
6889                                            (__v8di)_mm512_setzero_si512());
6890}
6891
6892static __inline__ __m512d __DEFAULT_FN_ATTRS512
6893_mm512_mask_broadcastsd_pd (__m512d __O, __mmask8 __M, __m128d __A)
6894{
6895  return (__m512d)__builtin_ia32_selectpd_512(__M,
6896                                              (__v8df) _mm512_broadcastsd_pd(__A),
6897                                              (__v8df) __O);
6898}
6899
6900static __inline__ __m512d __DEFAULT_FN_ATTRS512
6901_mm512_maskz_broadcastsd_pd (__mmask8 __M, __m128d __A)
6902{
6903  return (__m512d)__builtin_ia32_selectpd_512(__M,
6904                                              (__v8df) _mm512_broadcastsd_pd(__A),
6905                                              (__v8df) _mm512_setzero_pd());
6906}
6907
6908static __inline__ __m512 __DEFAULT_FN_ATTRS512
6909_mm512_mask_broadcastss_ps (__m512 __O, __mmask16 __M, __m128 __A)
6910{
6911  return (__m512)__builtin_ia32_selectps_512(__M,
6912                                             (__v16sf) _mm512_broadcastss_ps(__A),
6913                                             (__v16sf) __O);
6914}
6915
6916static __inline__ __m512 __DEFAULT_FN_ATTRS512
6917_mm512_maskz_broadcastss_ps (__mmask16 __M, __m128 __A)
6918{
6919  return (__m512)__builtin_ia32_selectps_512(__M,
6920                                             (__v16sf) _mm512_broadcastss_ps(__A),
6921                                             (__v16sf) _mm512_setzero_ps());
6922}
6923
6924static __inline__ __m128i __DEFAULT_FN_ATTRS512
6925_mm512_cvtsepi32_epi8 (__m512i __A)
6926{
6927  return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A,
6928               (__v16qi) _mm_undefined_si128 (),
6929               (__mmask16) -1);
6930}
6931
6932static __inline__ __m128i __DEFAULT_FN_ATTRS512
6933_mm512_mask_cvtsepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A)
6934{
6935  return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A,
6936               (__v16qi) __O, __M);
6937}
6938
6939static __inline__ __m128i __DEFAULT_FN_ATTRS512
6940_mm512_maskz_cvtsepi32_epi8 (__mmask16 __M, __m512i __A)
6941{
6942  return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A,
6943               (__v16qi) _mm_setzero_si128 (),
6944               __M);
6945}
6946
6947static __inline__ void __DEFAULT_FN_ATTRS512
6948_mm512_mask_cvtsepi32_storeu_epi8 (void * __P, __mmask16 __M, __m512i __A)
6949{
6950  __builtin_ia32_pmovsdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M);
6951}
6952
6953static __inline__ __m256i __DEFAULT_FN_ATTRS512
6954_mm512_cvtsepi32_epi16 (__m512i __A)
6955{
6956  return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A,
6957               (__v16hi) _mm256_undefined_si256 (),
6958               (__mmask16) -1);
6959}
6960
6961static __inline__ __m256i __DEFAULT_FN_ATTRS512
6962_mm512_mask_cvtsepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A)
6963{
6964  return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A,
6965               (__v16hi) __O, __M);
6966}
6967
6968static __inline__ __m256i __DEFAULT_FN_ATTRS512
6969_mm512_maskz_cvtsepi32_epi16 (__mmask16 __M, __m512i __A)
6970{
6971  return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A,
6972               (__v16hi) _mm256_setzero_si256 (),
6973               __M);
6974}
6975
6976static __inline__ void __DEFAULT_FN_ATTRS512
6977_mm512_mask_cvtsepi32_storeu_epi16 (void *__P, __mmask16 __M, __m512i __A)
6978{
6979  __builtin_ia32_pmovsdw512mem_mask ((__v16hi*) __P, (__v16si) __A, __M);
6980}
6981
6982static __inline__ __m128i __DEFAULT_FN_ATTRS512
6983_mm512_cvtsepi64_epi8 (__m512i __A)
6984{
6985  return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A,
6986               (__v16qi) _mm_undefined_si128 (),
6987               (__mmask8) -1);
6988}
6989
6990static __inline__ __m128i __DEFAULT_FN_ATTRS512
6991_mm512_mask_cvtsepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A)
6992{
6993  return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A,
6994               (__v16qi) __O, __M);
6995}
6996
6997static __inline__ __m128i __DEFAULT_FN_ATTRS512
6998_mm512_maskz_cvtsepi64_epi8 (__mmask8 __M, __m512i __A)
6999{
7000  return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A,
7001               (__v16qi) _mm_setzero_si128 (),
7002               __M);
7003}
7004
7005static __inline__ void __DEFAULT_FN_ATTRS512
7006_mm512_mask_cvtsepi64_storeu_epi8 (void * __P, __mmask8 __M, __m512i __A)
7007{
7008  __builtin_ia32_pmovsqb512mem_mask ((__v16qi *) __P, (__v8di) __A, __M);
7009}
7010
7011static __inline__ __m256i __DEFAULT_FN_ATTRS512
7012_mm512_cvtsepi64_epi32 (__m512i __A)
7013{
7014  return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A,
7015               (__v8si) _mm256_undefined_si256 (),
7016               (__mmask8) -1);
7017}
7018
7019static __inline__ __m256i __DEFAULT_FN_ATTRS512
7020_mm512_mask_cvtsepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A)
7021{
7022  return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A,
7023               (__v8si) __O, __M);
7024}
7025
7026static __inline__ __m256i __DEFAULT_FN_ATTRS512
7027_mm512_maskz_cvtsepi64_epi32 (__mmask8 __M, __m512i __A)
7028{
7029  return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A,
7030               (__v8si) _mm256_setzero_si256 (),
7031               __M);
7032}
7033
7034static __inline__ void __DEFAULT_FN_ATTRS512
7035_mm512_mask_cvtsepi64_storeu_epi32 (void *__P, __mmask8 __M, __m512i __A)
7036{
7037  __builtin_ia32_pmovsqd512mem_mask ((__v8si *) __P, (__v8di) __A, __M);
7038}
7039
7040static __inline__ __m128i __DEFAULT_FN_ATTRS512
7041_mm512_cvtsepi64_epi16 (__m512i __A)
7042{
7043  return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A,
7044               (__v8hi) _mm_undefined_si128 (),
7045               (__mmask8) -1);
7046}
7047
7048static __inline__ __m128i __DEFAULT_FN_ATTRS512
7049_mm512_mask_cvtsepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A)
7050{
7051  return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A,
7052               (__v8hi) __O, __M);
7053}
7054
7055static __inline__ __m128i __DEFAULT_FN_ATTRS512
7056_mm512_maskz_cvtsepi64_epi16 (__mmask8 __M, __m512i __A)
7057{
7058  return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A,
7059               (__v8hi) _mm_setzero_si128 (),
7060               __M);
7061}
7062
7063static __inline__ void __DEFAULT_FN_ATTRS512
7064_mm512_mask_cvtsepi64_storeu_epi16 (void * __P, __mmask8 __M, __m512i __A)
7065{
7066  __builtin_ia32_pmovsqw512mem_mask ((__v8hi *) __P, (__v8di) __A, __M);
7067}
7068
7069static __inline__ __m128i __DEFAULT_FN_ATTRS512
7070_mm512_cvtusepi32_epi8 (__m512i __A)
7071{
7072  return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A,
7073                (__v16qi) _mm_undefined_si128 (),
7074                (__mmask16) -1);
7075}
7076
7077static __inline__ __m128i __DEFAULT_FN_ATTRS512
7078_mm512_mask_cvtusepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A)
7079{
7080  return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A,
7081                (__v16qi) __O,
7082                __M);
7083}
7084
7085static __inline__ __m128i __DEFAULT_FN_ATTRS512
7086_mm512_maskz_cvtusepi32_epi8 (__mmask16 __M, __m512i __A)
7087{
7088  return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A,
7089                (__v16qi) _mm_setzero_si128 (),
7090                __M);
7091}
7092
7093static __inline__ void __DEFAULT_FN_ATTRS512
7094_mm512_mask_cvtusepi32_storeu_epi8 (void * __P, __mmask16 __M, __m512i __A)
7095{
7096  __builtin_ia32_pmovusdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M);
7097}
7098
7099static __inline__ __m256i __DEFAULT_FN_ATTRS512
7100_mm512_cvtusepi32_epi16 (__m512i __A)
7101{
7102  return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A,
7103                (__v16hi) _mm256_undefined_si256 (),
7104                (__mmask16) -1);
7105}
7106
7107static __inline__ __m256i __DEFAULT_FN_ATTRS512
7108_mm512_mask_cvtusepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A)
7109{
7110  return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A,
7111                (__v16hi) __O,
7112                __M);
7113}
7114
7115static __inline__ __m256i __DEFAULT_FN_ATTRS512
7116_mm512_maskz_cvtusepi32_epi16 (__mmask16 __M, __m512i __A)
7117{
7118  return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A,
7119                (__v16hi) _mm256_setzero_si256 (),
7120                __M);
7121}
7122
7123static __inline__ void __DEFAULT_FN_ATTRS512
7124_mm512_mask_cvtusepi32_storeu_epi16 (void *__P, __mmask16 __M, __m512i __A)
7125{
7126  __builtin_ia32_pmovusdw512mem_mask ((__v16hi*) __P, (__v16si) __A, __M);
7127}
7128
7129static __inline__ __m128i __DEFAULT_FN_ATTRS512
7130_mm512_cvtusepi64_epi8 (__m512i __A)
7131{
7132  return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A,
7133                (__v16qi) _mm_undefined_si128 (),
7134                (__mmask8) -1);
7135}
7136
7137static __inline__ __m128i __DEFAULT_FN_ATTRS512
7138_mm512_mask_cvtusepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A)
7139{
7140  return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A,
7141                (__v16qi) __O,
7142                __M);
7143}
7144
7145static __inline__ __m128i __DEFAULT_FN_ATTRS512
7146_mm512_maskz_cvtusepi64_epi8 (__mmask8 __M, __m512i __A)
7147{
7148  return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A,
7149                (__v16qi) _mm_setzero_si128 (),
7150                __M);
7151}
7152
7153static __inline__ void __DEFAULT_FN_ATTRS512
7154_mm512_mask_cvtusepi64_storeu_epi8 (void * __P, __mmask8 __M, __m512i __A)
7155{
7156  __builtin_ia32_pmovusqb512mem_mask ((__v16qi *) __P, (__v8di) __A, __M);
7157}
7158
7159static __inline__ __m256i __DEFAULT_FN_ATTRS512
7160_mm512_cvtusepi64_epi32 (__m512i __A)
7161{
7162  return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A,
7163                (__v8si) _mm256_undefined_si256 (),
7164                (__mmask8) -1);
7165}
7166
7167static __inline__ __m256i __DEFAULT_FN_ATTRS512
7168_mm512_mask_cvtusepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A)
7169{
7170  return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A,
7171                (__v8si) __O, __M);
7172}
7173
7174static __inline__ __m256i __DEFAULT_FN_ATTRS512
7175_mm512_maskz_cvtusepi64_epi32 (__mmask8 __M, __m512i __A)
7176{
7177  return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A,
7178                (__v8si) _mm256_setzero_si256 (),
7179                __M);
7180}
7181
7182static __inline__ void __DEFAULT_FN_ATTRS512
7183_mm512_mask_cvtusepi64_storeu_epi32 (void* __P, __mmask8 __M, __m512i __A)
7184{
7185  __builtin_ia32_pmovusqd512mem_mask ((__v8si*) __P, (__v8di) __A, __M);
7186}
7187
7188static __inline__ __m128i __DEFAULT_FN_ATTRS512
7189_mm512_cvtusepi64_epi16 (__m512i __A)
7190{
7191  return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A,
7192                (__v8hi) _mm_undefined_si128 (),
7193                (__mmask8) -1);
7194}
7195
7196static __inline__ __m128i __DEFAULT_FN_ATTRS512
7197_mm512_mask_cvtusepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A)
7198{
7199  return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A,
7200                (__v8hi) __O, __M);
7201}
7202
7203static __inline__ __m128i __DEFAULT_FN_ATTRS512
7204_mm512_maskz_cvtusepi64_epi16 (__mmask8 __M, __m512i __A)
7205{
7206  return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A,
7207                (__v8hi) _mm_setzero_si128 (),
7208                __M);
7209}
7210
7211static __inline__ void __DEFAULT_FN_ATTRS512
7212_mm512_mask_cvtusepi64_storeu_epi16 (void *__P, __mmask8 __M, __m512i __A)
7213{
7214  __builtin_ia32_pmovusqw512mem_mask ((__v8hi*) __P, (__v8di) __A, __M);
7215}
7216
7217static __inline__ __m128i __DEFAULT_FN_ATTRS512
7218_mm512_cvtepi32_epi8 (__m512i __A)
7219{
7220  return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A,
7221              (__v16qi) _mm_undefined_si128 (),
7222              (__mmask16) -1);
7223}
7224
7225static __inline__ __m128i __DEFAULT_FN_ATTRS512
7226_mm512_mask_cvtepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A)
7227{
7228  return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A,
7229              (__v16qi) __O, __M);
7230}
7231
7232static __inline__ __m128i __DEFAULT_FN_ATTRS512
7233_mm512_maskz_cvtepi32_epi8 (__mmask16 __M, __m512i __A)
7234{
7235  return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A,
7236              (__v16qi) _mm_setzero_si128 (),
7237              __M);
7238}
7239
7240static __inline__ void __DEFAULT_FN_ATTRS512
7241_mm512_mask_cvtepi32_storeu_epi8 (void * __P, __mmask16 __M, __m512i __A)
7242{
7243  __builtin_ia32_pmovdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M);
7244}
7245
7246static __inline__ __m256i __DEFAULT_FN_ATTRS512
7247_mm512_cvtepi32_epi16 (__m512i __A)
7248{
7249  return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A,
7250              (__v16hi) _mm256_undefined_si256 (),
7251              (__mmask16) -1);
7252}
7253
7254static __inline__ __m256i __DEFAULT_FN_ATTRS512
7255_mm512_mask_cvtepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A)
7256{
7257  return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A,
7258              (__v16hi) __O, __M);
7259}
7260
7261static __inline__ __m256i __DEFAULT_FN_ATTRS512
7262_mm512_maskz_cvtepi32_epi16 (__mmask16 __M, __m512i __A)
7263{
7264  return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A,
7265              (__v16hi) _mm256_setzero_si256 (),
7266              __M);
7267}
7268
7269static __inline__ void __DEFAULT_FN_ATTRS512
7270_mm512_mask_cvtepi32_storeu_epi16 (void * __P, __mmask16 __M, __m512i __A)
7271{
7272  __builtin_ia32_pmovdw512mem_mask ((__v16hi *) __P, (__v16si) __A, __M);
7273}
7274
7275static __inline__ __m128i __DEFAULT_FN_ATTRS512
7276_mm512_cvtepi64_epi8 (__m512i __A)
7277{
7278  return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A,
7279              (__v16qi) _mm_undefined_si128 (),
7280              (__mmask8) -1);
7281}
7282
7283static __inline__ __m128i __DEFAULT_FN_ATTRS512
7284_mm512_mask_cvtepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A)
7285{
7286  return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A,
7287              (__v16qi) __O, __M);
7288}
7289
7290static __inline__ __m128i __DEFAULT_FN_ATTRS512
7291_mm512_maskz_cvtepi64_epi8 (__mmask8 __M, __m512i __A)
7292{
7293  return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A,
7294              (__v16qi) _mm_setzero_si128 (),
7295              __M);
7296}
7297
7298static __inline__ void __DEFAULT_FN_ATTRS512
7299_mm512_mask_cvtepi64_storeu_epi8 (void * __P, __mmask8 __M, __m512i __A)
7300{
7301  __builtin_ia32_pmovqb512mem_mask ((__v16qi *) __P, (__v8di) __A, __M);
7302}
7303
7304static __inline__ __m256i __DEFAULT_FN_ATTRS512
7305_mm512_cvtepi64_epi32 (__m512i __A)
7306{
7307  return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A,
7308              (__v8si) _mm256_undefined_si256 (),
7309              (__mmask8) -1);
7310}
7311
7312static __inline__ __m256i __DEFAULT_FN_ATTRS512
7313_mm512_mask_cvtepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A)
7314{
7315  return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A,
7316              (__v8si) __O, __M);
7317}
7318
7319static __inline__ __m256i __DEFAULT_FN_ATTRS512
7320_mm512_maskz_cvtepi64_epi32 (__mmask8 __M, __m512i __A)
7321{
7322  return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A,
7323              (__v8si) _mm256_setzero_si256 (),
7324              __M);
7325}
7326
7327static __inline__ void __DEFAULT_FN_ATTRS512
7328_mm512_mask_cvtepi64_storeu_epi32 (void* __P, __mmask8 __M, __m512i __A)
7329{
7330  __builtin_ia32_pmovqd512mem_mask ((__v8si *) __P, (__v8di) __A, __M);
7331}
7332
7333static __inline__ __m128i __DEFAULT_FN_ATTRS512
7334_mm512_cvtepi64_epi16 (__m512i __A)
7335{
7336  return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A,
7337              (__v8hi) _mm_undefined_si128 (),
7338              (__mmask8) -1);
7339}
7340
7341static __inline__ __m128i __DEFAULT_FN_ATTRS512
7342_mm512_mask_cvtepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A)
7343{
7344  return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A,
7345              (__v8hi) __O, __M);
7346}
7347
7348static __inline__ __m128i __DEFAULT_FN_ATTRS512
7349_mm512_maskz_cvtepi64_epi16 (__mmask8 __M, __m512i __A)
7350{
7351  return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A,
7352              (__v8hi) _mm_setzero_si128 (),
7353              __M);
7354}
7355
7356static __inline__ void __DEFAULT_FN_ATTRS512
7357_mm512_mask_cvtepi64_storeu_epi16 (void *__P, __mmask8 __M, __m512i __A)
7358{
7359  __builtin_ia32_pmovqw512mem_mask ((__v8hi *) __P, (__v8di) __A, __M);
7360}
7361
7362#define _mm512_extracti32x4_epi32(A, imm) \
7363  (__m128i)__builtin_ia32_extracti32x4_mask((__v16si)(__m512i)(A), (int)(imm), \
7364                                            (__v4si)_mm_undefined_si128(), \
7365                                            (__mmask8)-1)
7366
7367#define _mm512_mask_extracti32x4_epi32(W, U, A, imm) \
7368  (__m128i)__builtin_ia32_extracti32x4_mask((__v16si)(__m512i)(A), (int)(imm), \
7369                                            (__v4si)(__m128i)(W), \
7370                                            (__mmask8)(U))
7371
7372#define _mm512_maskz_extracti32x4_epi32(U, A, imm) \
7373  (__m128i)__builtin_ia32_extracti32x4_mask((__v16si)(__m512i)(A), (int)(imm), \
7374                                            (__v4si)_mm_setzero_si128(), \
7375                                            (__mmask8)(U))
7376
7377#define _mm512_extracti64x4_epi64(A, imm) \
7378  (__m256i)__builtin_ia32_extracti64x4_mask((__v8di)(__m512i)(A), (int)(imm), \
7379                                            (__v4di)_mm256_undefined_si256(), \
7380                                            (__mmask8)-1)
7381
7382#define _mm512_mask_extracti64x4_epi64(W, U, A, imm) \
7383  (__m256i)__builtin_ia32_extracti64x4_mask((__v8di)(__m512i)(A), (int)(imm), \
7384                                            (__v4di)(__m256i)(W), \
7385                                            (__mmask8)(U))
7386
7387#define _mm512_maskz_extracti64x4_epi64(U, A, imm) \
7388  (__m256i)__builtin_ia32_extracti64x4_mask((__v8di)(__m512i)(A), (int)(imm), \
7389                                            (__v4di)_mm256_setzero_si256(), \
7390                                            (__mmask8)(U))
7391
7392#define _mm512_insertf64x4(A, B, imm) \
7393  (__m512d)__builtin_ia32_insertf64x4((__v8df)(__m512d)(A), \
7394                                      (__v4df)(__m256d)(B), (int)(imm))
7395
7396#define _mm512_mask_insertf64x4(W, U, A, B, imm) \
7397  (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
7398                                  (__v8df)_mm512_insertf64x4((A), (B), (imm)), \
7399                                  (__v8df)(__m512d)(W))
7400
7401#define _mm512_maskz_insertf64x4(U, A, B, imm) \
7402  (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
7403                                  (__v8df)_mm512_insertf64x4((A), (B), (imm)), \
7404                                  (__v8df)_mm512_setzero_pd())
7405
7406#define _mm512_inserti64x4(A, B, imm) \
7407  (__m512i)__builtin_ia32_inserti64x4((__v8di)(__m512i)(A), \
7408                                      (__v4di)(__m256i)(B), (int)(imm))
7409
7410#define _mm512_mask_inserti64x4(W, U, A, B, imm) \
7411  (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
7412                                  (__v8di)_mm512_inserti64x4((A), (B), (imm)), \
7413                                  (__v8di)(__m512i)(W))
7414
7415#define _mm512_maskz_inserti64x4(U, A, B, imm) \
7416  (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
7417                                  (__v8di)_mm512_inserti64x4((A), (B), (imm)), \
7418                                  (__v8di)_mm512_setzero_si512())
7419
7420#define _mm512_insertf32x4(A, B, imm) \
7421  (__m512)__builtin_ia32_insertf32x4((__v16sf)(__m512)(A), \
7422                                     (__v4sf)(__m128)(B), (int)(imm))
7423
7424#define _mm512_mask_insertf32x4(W, U, A, B, imm) \
7425  (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
7426                                 (__v16sf)_mm512_insertf32x4((A), (B), (imm)), \
7427                                 (__v16sf)(__m512)(W))
7428
7429#define _mm512_maskz_insertf32x4(U, A, B, imm) \
7430  (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
7431                                 (__v16sf)_mm512_insertf32x4((A), (B), (imm)), \
7432                                 (__v16sf)_mm512_setzero_ps())
7433
7434#define _mm512_inserti32x4(A, B, imm) \
7435  (__m512i)__builtin_ia32_inserti32x4((__v16si)(__m512i)(A), \
7436                                      (__v4si)(__m128i)(B), (int)(imm))
7437
7438#define _mm512_mask_inserti32x4(W, U, A, B, imm) \
7439  (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
7440                                 (__v16si)_mm512_inserti32x4((A), (B), (imm)), \
7441                                 (__v16si)(__m512i)(W))
7442
7443#define _mm512_maskz_inserti32x4(U, A, B, imm) \
7444  (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
7445                                 (__v16si)_mm512_inserti32x4((A), (B), (imm)), \
7446                                 (__v16si)_mm512_setzero_si512())
7447
7448#define _mm512_getmant_round_pd(A, B, C, R) \
7449  (__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \
7450                                            (int)(((C)<<2) | (B)), \
7451                                            (__v8df)_mm512_undefined_pd(), \
7452                                            (__mmask8)-1, (int)(R))
7453
7454#define _mm512_mask_getmant_round_pd(W, U, A, B, C, R) \
7455  (__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \
7456                                            (int)(((C)<<2) | (B)), \
7457                                            (__v8df)(__m512d)(W), \
7458                                            (__mmask8)(U), (int)(R))
7459
7460#define _mm512_maskz_getmant_round_pd(U, A, B, C, R) \
7461  (__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \
7462                                            (int)(((C)<<2) | (B)), \
7463                                            (__v8df)_mm512_setzero_pd(), \
7464                                            (__mmask8)(U), (int)(R))
7465
7466#define _mm512_getmant_pd(A, B, C) \
7467  (__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \
7468                                            (int)(((C)<<2) | (B)), \
7469                                            (__v8df)_mm512_setzero_pd(), \
7470                                            (__mmask8)-1, \
7471                                            _MM_FROUND_CUR_DIRECTION)
7472
7473#define _mm512_mask_getmant_pd(W, U, A, B, C) \
7474  (__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \
7475                                            (int)(((C)<<2) | (B)), \
7476                                            (__v8df)(__m512d)(W), \
7477                                            (__mmask8)(U), \
7478                                            _MM_FROUND_CUR_DIRECTION)
7479
7480#define _mm512_maskz_getmant_pd(U, A, B, C) \
7481  (__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \
7482                                            (int)(((C)<<2) | (B)), \
7483                                            (__v8df)_mm512_setzero_pd(), \
7484                                            (__mmask8)(U), \
7485                                            _MM_FROUND_CUR_DIRECTION)
7486
7487#define _mm512_getmant_round_ps(A, B, C, R) \
7488  (__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \
7489                                           (int)(((C)<<2) | (B)), \
7490                                           (__v16sf)_mm512_undefined_ps(), \
7491                                           (__mmask16)-1, (int)(R))
7492
7493#define _mm512_mask_getmant_round_ps(W, U, A, B, C, R) \
7494  (__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \
7495                                           (int)(((C)<<2) | (B)), \
7496                                           (__v16sf)(__m512)(W), \
7497                                           (__mmask16)(U), (int)(R))
7498
7499#define _mm512_maskz_getmant_round_ps(U, A, B, C, R) \
7500  (__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \
7501                                           (int)(((C)<<2) | (B)), \
7502                                           (__v16sf)_mm512_setzero_ps(), \
7503                                           (__mmask16)(U), (int)(R))
7504
7505#define _mm512_getmant_ps(A, B, C) \
7506  (__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \
7507                                           (int)(((C)<<2)|(B)), \
7508                                           (__v16sf)_mm512_undefined_ps(), \
7509                                           (__mmask16)-1, \
7510                                           _MM_FROUND_CUR_DIRECTION)
7511
7512#define _mm512_mask_getmant_ps(W, U, A, B, C) \
7513  (__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \
7514                                           (int)(((C)<<2)|(B)), \
7515                                           (__v16sf)(__m512)(W), \
7516                                           (__mmask16)(U), \
7517                                           _MM_FROUND_CUR_DIRECTION)
7518
7519#define _mm512_maskz_getmant_ps(U, A, B, C) \
7520  (__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \
7521                                           (int)(((C)<<2)|(B)), \
7522                                           (__v16sf)_mm512_setzero_ps(), \
7523                                           (__mmask16)(U), \
7524                                           _MM_FROUND_CUR_DIRECTION)
7525
7526#define _mm512_getexp_round_pd(A, R) \
7527  (__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
7528                                           (__v8df)_mm512_undefined_pd(), \
7529                                           (__mmask8)-1, (int)(R))
7530
7531#define _mm512_mask_getexp_round_pd(W, U, A, R) \
7532  (__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
7533                                           (__v8df)(__m512d)(W), \
7534                                           (__mmask8)(U), (int)(R))
7535
7536#define _mm512_maskz_getexp_round_pd(U, A, R) \
7537  (__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
7538                                           (__v8df)_mm512_setzero_pd(), \
7539                                           (__mmask8)(U), (int)(R))
7540
7541static __inline__ __m512d __DEFAULT_FN_ATTRS512
7542_mm512_getexp_pd (__m512d __A)
7543{
7544  return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
7545                (__v8df) _mm512_undefined_pd (),
7546                (__mmask8) -1,
7547                _MM_FROUND_CUR_DIRECTION);
7548}
7549
7550static __inline__ __m512d __DEFAULT_FN_ATTRS512
7551_mm512_mask_getexp_pd (__m512d __W, __mmask8 __U, __m512d __A)
7552{
7553  return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
7554                (__v8df) __W,
7555                (__mmask8) __U,
7556                _MM_FROUND_CUR_DIRECTION);
7557}
7558
7559static __inline__ __m512d __DEFAULT_FN_ATTRS512
7560_mm512_maskz_getexp_pd (__mmask8 __U, __m512d __A)
7561{
7562  return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
7563                (__v8df) _mm512_setzero_pd (),
7564                (__mmask8) __U,
7565                _MM_FROUND_CUR_DIRECTION);
7566}
7567
7568#define _mm512_getexp_round_ps(A, R) \
7569  (__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
7570                                          (__v16sf)_mm512_undefined_ps(), \
7571                                          (__mmask16)-1, (int)(R))
7572
7573#define _mm512_mask_getexp_round_ps(W, U, A, R) \
7574  (__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
7575                                          (__v16sf)(__m512)(W), \
7576                                          (__mmask16)(U), (int)(R))
7577
7578#define _mm512_maskz_getexp_round_ps(U, A, R) \
7579  (__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
7580                                          (__v16sf)_mm512_setzero_ps(), \
7581                                          (__mmask16)(U), (int)(R))
7582
7583static __inline__ __m512 __DEFAULT_FN_ATTRS512
7584_mm512_getexp_ps (__m512 __A)
7585{
7586  return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
7587               (__v16sf) _mm512_undefined_ps (),
7588               (__mmask16) -1,
7589               _MM_FROUND_CUR_DIRECTION);
7590}
7591
7592static __inline__ __m512 __DEFAULT_FN_ATTRS512
7593_mm512_mask_getexp_ps (__m512 __W, __mmask16 __U, __m512 __A)
7594{
7595  return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
7596               (__v16sf) __W,
7597               (__mmask16) __U,
7598               _MM_FROUND_CUR_DIRECTION);
7599}
7600
7601static __inline__ __m512 __DEFAULT_FN_ATTRS512
7602_mm512_maskz_getexp_ps (__mmask16 __U, __m512 __A)
7603{
7604  return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
7605               (__v16sf) _mm512_setzero_ps (),
7606               (__mmask16) __U,
7607               _MM_FROUND_CUR_DIRECTION);
7608}
7609
7610#define _mm512_i64gather_ps(index, addr, scale) \
7611  (__m256)__builtin_ia32_gatherdiv16sf((__v8sf)_mm256_undefined_ps(), \
7612                                       (void const *)(addr), \
7613                                       (__v8di)(__m512i)(index), (__mmask8)-1, \
7614                                       (int)(scale))
7615
7616#define _mm512_mask_i64gather_ps(v1_old, mask, index, addr, scale) \
7617  (__m256)__builtin_ia32_gatherdiv16sf((__v8sf)(__m256)(v1_old),\
7618                                       (void const *)(addr), \
7619                                       (__v8di)(__m512i)(index), \
7620                                       (__mmask8)(mask), (int)(scale))
7621
7622#define _mm512_i64gather_epi32(index, addr, scale) \
7623  (__m256i)__builtin_ia32_gatherdiv16si((__v8si)_mm256_undefined_si256(), \
7624                                        (void const *)(addr), \
7625                                        (__v8di)(__m512i)(index), \
7626                                        (__mmask8)-1, (int)(scale))
7627
7628#define _mm512_mask_i64gather_epi32(v1_old, mask, index, addr, scale) \
7629  (__m256i)__builtin_ia32_gatherdiv16si((__v8si)(__m256i)(v1_old), \
7630                                        (void const *)(addr), \
7631                                        (__v8di)(__m512i)(index), \
7632                                        (__mmask8)(mask), (int)(scale))
7633
7634#define _mm512_i64gather_pd(index, addr, scale) \
7635  (__m512d)__builtin_ia32_gatherdiv8df((__v8df)_mm512_undefined_pd(), \
7636                                       (void const *)(addr), \
7637                                       (__v8di)(__m512i)(index), (__mmask8)-1, \
7638                                       (int)(scale))
7639
7640#define _mm512_mask_i64gather_pd(v1_old, mask, index, addr, scale) \
7641  (__m512d)__builtin_ia32_gatherdiv8df((__v8df)(__m512d)(v1_old), \
7642                                       (void const *)(addr), \
7643                                       (__v8di)(__m512i)(index), \
7644                                       (__mmask8)(mask), (int)(scale))
7645
7646#define _mm512_i64gather_epi64(index, addr, scale) \
7647  (__m512i)__builtin_ia32_gatherdiv8di((__v8di)_mm512_undefined_epi32(), \
7648                                       (void const *)(addr), \
7649                                       (__v8di)(__m512i)(index), (__mmask8)-1, \
7650                                       (int)(scale))
7651
7652#define _mm512_mask_i64gather_epi64(v1_old, mask, index, addr, scale) \
7653  (__m512i)__builtin_ia32_gatherdiv8di((__v8di)(__m512i)(v1_old), \
7654                                       (void const *)(addr), \
7655                                       (__v8di)(__m512i)(index), \
7656                                       (__mmask8)(mask), (int)(scale))
7657
7658#define _mm512_i32gather_ps(index, addr, scale) \
7659  (__m512)__builtin_ia32_gathersiv16sf((__v16sf)_mm512_undefined_ps(), \
7660                                       (void const *)(addr), \
7661                                       (__v16si)(__m512)(index), \
7662                                       (__mmask16)-1, (int)(scale))
7663
7664#define _mm512_mask_i32gather_ps(v1_old, mask, index, addr, scale) \
7665  (__m512)__builtin_ia32_gathersiv16sf((__v16sf)(__m512)(v1_old), \
7666                                       (void const *)(addr), \
7667                                       (__v16si)(__m512)(index), \
7668                                       (__mmask16)(mask), (int)(scale))
7669
7670#define _mm512_i32gather_epi32(index, addr, scale) \
7671  (__m512i)__builtin_ia32_gathersiv16si((__v16si)_mm512_undefined_epi32(), \
7672                                        (void const *)(addr), \
7673                                        (__v16si)(__m512i)(index), \
7674                                        (__mmask16)-1, (int)(scale))
7675
7676#define _mm512_mask_i32gather_epi32(v1_old, mask, index, addr, scale) \
7677  (__m512i)__builtin_ia32_gathersiv16si((__v16si)(__m512i)(v1_old), \
7678                                        (void const *)(addr), \
7679                                        (__v16si)(__m512i)(index), \
7680                                        (__mmask16)(mask), (int)(scale))
7681
7682#define _mm512_i32gather_pd(index, addr, scale) \
7683  (__m512d)__builtin_ia32_gathersiv8df((__v8df)_mm512_undefined_pd(), \
7684                                       (void const *)(addr), \
7685                                       (__v8si)(__m256i)(index), (__mmask8)-1, \
7686                                       (int)(scale))
7687
7688#define _mm512_mask_i32gather_pd(v1_old, mask, index, addr, scale) \
7689  (__m512d)__builtin_ia32_gathersiv8df((__v8df)(__m512d)(v1_old), \
7690                                       (void const *)(addr), \
7691                                       (__v8si)(__m256i)(index), \
7692                                       (__mmask8)(mask), (int)(scale))
7693
7694#define _mm512_i32gather_epi64(index, addr, scale) \
7695  (__m512i)__builtin_ia32_gathersiv8di((__v8di)_mm512_undefined_epi32(), \
7696                                       (void const *)(addr), \
7697                                       (__v8si)(__m256i)(index), (__mmask8)-1, \
7698                                       (int)(scale))
7699
7700#define _mm512_mask_i32gather_epi64(v1_old, mask, index, addr, scale) \
7701  (__m512i)__builtin_ia32_gathersiv8di((__v8di)(__m512i)(v1_old), \
7702                                       (void const *)(addr), \
7703                                       (__v8si)(__m256i)(index), \
7704                                       (__mmask8)(mask), (int)(scale))
7705
7706#define _mm512_i64scatter_ps(addr, index, v1, scale) \
7707  __builtin_ia32_scatterdiv16sf((void *)(addr), (__mmask8)-1, \
7708                                (__v8di)(__m512i)(index), \
7709                                (__v8sf)(__m256)(v1), (int)(scale))
7710
7711#define _mm512_mask_i64scatter_ps(addr, mask, index, v1, scale) \
7712  __builtin_ia32_scatterdiv16sf((void *)(addr), (__mmask8)(mask), \
7713                                (__v8di)(__m512i)(index), \
7714                                (__v8sf)(__m256)(v1), (int)(scale))
7715
7716#define _mm512_i64scatter_epi32(addr, index, v1, scale) \
7717  __builtin_ia32_scatterdiv16si((void *)(addr), (__mmask8)-1, \
7718                                (__v8di)(__m512i)(index), \
7719                                (__v8si)(__m256i)(v1), (int)(scale))
7720
7721#define _mm512_mask_i64scatter_epi32(addr, mask, index, v1, scale) \
7722  __builtin_ia32_scatterdiv16si((void *)(addr), (__mmask8)(mask), \
7723                                (__v8di)(__m512i)(index), \
7724                                (__v8si)(__m256i)(v1), (int)(scale))
7725
7726#define _mm512_i64scatter_pd(addr, index, v1, scale) \
7727  __builtin_ia32_scatterdiv8df((void *)(addr), (__mmask8)-1, \
7728                               (__v8di)(__m512i)(index), \
7729                               (__v8df)(__m512d)(v1), (int)(scale))
7730
7731#define _mm512_mask_i64scatter_pd(addr, mask, index, v1, scale) \
7732  __builtin_ia32_scatterdiv8df((void *)(addr), (__mmask8)(mask), \
7733                               (__v8di)(__m512i)(index), \
7734                               (__v8df)(__m512d)(v1), (int)(scale))
7735
7736#define _mm512_i64scatter_epi64(addr, index, v1, scale) \
7737  __builtin_ia32_scatterdiv8di((void *)(addr), (__mmask8)-1, \
7738                               (__v8di)(__m512i)(index), \
7739                               (__v8di)(__m512i)(v1), (int)(scale))
7740
7741#define _mm512_mask_i64scatter_epi64(addr, mask, index, v1, scale) \
7742  __builtin_ia32_scatterdiv8di((void *)(addr), (__mmask8)(mask), \
7743                               (__v8di)(__m512i)(index), \
7744                               (__v8di)(__m512i)(v1), (int)(scale))
7745
7746#define _mm512_i32scatter_ps(addr, index, v1, scale) \
7747  __builtin_ia32_scattersiv16sf((void *)(addr), (__mmask16)-1, \
7748                                (__v16si)(__m512i)(index), \
7749                                (__v16sf)(__m512)(v1), (int)(scale))
7750
7751#define _mm512_mask_i32scatter_ps(addr, mask, index, v1, scale) \
7752  __builtin_ia32_scattersiv16sf((void *)(addr), (__mmask16)(mask), \
7753                                (__v16si)(__m512i)(index), \
7754                                (__v16sf)(__m512)(v1), (int)(scale))
7755
7756#define _mm512_i32scatter_epi32(addr, index, v1, scale) \
7757  __builtin_ia32_scattersiv16si((void *)(addr), (__mmask16)-1, \
7758                                (__v16si)(__m512i)(index), \
7759                                (__v16si)(__m512i)(v1), (int)(scale))
7760
7761#define _mm512_mask_i32scatter_epi32(addr, mask, index, v1, scale) \
7762  __builtin_ia32_scattersiv16si((void *)(addr), (__mmask16)(mask), \
7763                                (__v16si)(__m512i)(index), \
7764                                (__v16si)(__m512i)(v1), (int)(scale))
7765
7766#define _mm512_i32scatter_pd(addr, index, v1, scale) \
7767  __builtin_ia32_scattersiv8df((void *)(addr), (__mmask8)-1, \
7768                               (__v8si)(__m256i)(index), \
7769                               (__v8df)(__m512d)(v1), (int)(scale))
7770
7771#define _mm512_mask_i32scatter_pd(addr, mask, index, v1, scale) \
7772  __builtin_ia32_scattersiv8df((void *)(addr), (__mmask8)(mask), \
7773                               (__v8si)(__m256i)(index), \
7774                               (__v8df)(__m512d)(v1), (int)(scale))
7775
7776#define _mm512_i32scatter_epi64(addr, index, v1, scale) \
7777  __builtin_ia32_scattersiv8di((void *)(addr), (__mmask8)-1, \
7778                               (__v8si)(__m256i)(index), \
7779                               (__v8di)(__m512i)(v1), (int)(scale))
7780
7781#define _mm512_mask_i32scatter_epi64(addr, mask, index, v1, scale) \
7782  __builtin_ia32_scattersiv8di((void *)(addr), (__mmask8)(mask), \
7783                               (__v8si)(__m256i)(index), \
7784                               (__v8di)(__m512i)(v1), (int)(scale))
7785
7786static __inline__ __m128 __DEFAULT_FN_ATTRS128
7787_mm_mask_fmadd_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
7788{
7789  return __builtin_ia32_vfmaddss3_mask((__v4sf)__W,
7790                                       (__v4sf)__A,
7791                                       (__v4sf)__B,
7792                                       (__mmask8)__U,
7793                                       _MM_FROUND_CUR_DIRECTION);
7794}
7795
7796#define _mm_fmadd_round_ss(A, B, C, R) \
7797  (__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(A), \
7798                                        (__v4sf)(__m128)(B), \
7799                                        (__v4sf)(__m128)(C), (__mmask8)-1, \
7800                                        (int)(R))
7801
7802#define _mm_mask_fmadd_round_ss(W, U, A, B, R) \
7803  (__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(W), \
7804                                        (__v4sf)(__m128)(A), \
7805                                        (__v4sf)(__m128)(B), (__mmask8)(U), \
7806                                        (int)(R))
7807
7808static __inline__ __m128 __DEFAULT_FN_ATTRS128
7809_mm_maskz_fmadd_ss (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
7810{
7811  return __builtin_ia32_vfmaddss3_maskz((__v4sf)__A,
7812                                        (__v4sf)__B,
7813                                        (__v4sf)__C,
7814                                        (__mmask8)__U,
7815                                        _MM_FROUND_CUR_DIRECTION);
7816}
7817
7818#define _mm_maskz_fmadd_round_ss(U, A, B, C, R) \
7819  (__m128)__builtin_ia32_vfmaddss3_maskz((__v4sf)(__m128)(A), \
7820                                         (__v4sf)(__m128)(B), \
7821                                         (__v4sf)(__m128)(C), (__mmask8)(U), \
7822                                         (int)(R))
7823
7824static __inline__ __m128 __DEFAULT_FN_ATTRS128
7825_mm_mask3_fmadd_ss (__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U)
7826{
7827  return __builtin_ia32_vfmaddss3_mask3((__v4sf)__W,
7828                                        (__v4sf)__X,
7829                                        (__v4sf)__Y,
7830                                        (__mmask8)__U,
7831                                        _MM_FROUND_CUR_DIRECTION);
7832}
7833
7834#define _mm_mask3_fmadd_round_ss(W, X, Y, U, R) \
7835  (__m128)__builtin_ia32_vfmaddss3_mask3((__v4sf)(__m128)(W), \
7836                                         (__v4sf)(__m128)(X), \
7837                                         (__v4sf)(__m128)(Y), (__mmask8)(U), \
7838                                         (int)(R))
7839
7840static __inline__ __m128 __DEFAULT_FN_ATTRS128
7841_mm_mask_fmsub_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
7842{
7843  return __builtin_ia32_vfmaddss3_mask((__v4sf)__W,
7844                                       (__v4sf)__A,
7845                                       -(__v4sf)__B,
7846                                       (__mmask8)__U,
7847                                       _MM_FROUND_CUR_DIRECTION);
7848}
7849
7850#define _mm_fmsub_round_ss(A, B, C, R) \
7851  (__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(A), \
7852                                        (__v4sf)(__m128)(B), \
7853                                        -(__v4sf)(__m128)(C), (__mmask8)-1, \
7854                                        (int)(R))
7855
7856#define _mm_mask_fmsub_round_ss(W, U, A, B, R) \
7857  (__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(W), \
7858                                        (__v4sf)(__m128)(A), \
7859                                        -(__v4sf)(__m128)(B), (__mmask8)(U), \
7860                                        (int)(R))
7861
7862static __inline__ __m128 __DEFAULT_FN_ATTRS128
7863_mm_maskz_fmsub_ss (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
7864{
7865  return __builtin_ia32_vfmaddss3_maskz((__v4sf)__A,
7866                                        (__v4sf)__B,
7867                                        -(__v4sf)__C,
7868                                        (__mmask8)__U,
7869                                        _MM_FROUND_CUR_DIRECTION);
7870}
7871
7872#define _mm_maskz_fmsub_round_ss(U, A, B, C, R) \
7873  (__m128)__builtin_ia32_vfmaddss3_maskz((__v4sf)(__m128)(A), \
7874                                         (__v4sf)(__m128)(B), \
7875                                         -(__v4sf)(__m128)(C), (__mmask8)(U), \
7876                                         (int)(R))
7877
7878static __inline__ __m128 __DEFAULT_FN_ATTRS128
7879_mm_mask3_fmsub_ss (__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U)
7880{
7881  return __builtin_ia32_vfmsubss3_mask3((__v4sf)__W,
7882                                        (__v4sf)__X,
7883                                        (__v4sf)__Y,
7884                                        (__mmask8)__U,
7885                                        _MM_FROUND_CUR_DIRECTION);
7886}
7887
7888#define _mm_mask3_fmsub_round_ss(W, X, Y, U, R) \
7889  (__m128)__builtin_ia32_vfmsubss3_mask3((__v4sf)(__m128)(W), \
7890                                         (__v4sf)(__m128)(X), \
7891                                         (__v4sf)(__m128)(Y), (__mmask8)(U), \
7892                                         (int)(R))
7893
7894static __inline__ __m128 __DEFAULT_FN_ATTRS128
7895_mm_mask_fnmadd_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
7896{
7897  return __builtin_ia32_vfmaddss3_mask((__v4sf)__W,
7898                                       -(__v4sf)__A,
7899                                       (__v4sf)__B,
7900                                       (__mmask8)__U,
7901                                       _MM_FROUND_CUR_DIRECTION);
7902}
7903
7904#define _mm_fnmadd_round_ss(A, B, C, R) \
7905  (__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(A), \
7906                                        -(__v4sf)(__m128)(B), \
7907                                        (__v4sf)(__m128)(C), (__mmask8)-1, \
7908                                        (int)(R))
7909
7910#define _mm_mask_fnmadd_round_ss(W, U, A, B, R) \
7911  (__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(W), \
7912                                        -(__v4sf)(__m128)(A), \
7913                                        (__v4sf)(__m128)(B), (__mmask8)(U), \
7914                                        (int)(R))
7915
7916static __inline__ __m128 __DEFAULT_FN_ATTRS128
7917_mm_maskz_fnmadd_ss (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
7918{
7919  return __builtin_ia32_vfmaddss3_maskz((__v4sf)__A,
7920                                        -(__v4sf)__B,
7921                                        (__v4sf)__C,
7922                                        (__mmask8)__U,
7923                                        _MM_FROUND_CUR_DIRECTION);
7924}
7925
7926#define _mm_maskz_fnmadd_round_ss(U, A, B, C, R) \
7927  (__m128)__builtin_ia32_vfmaddss3_maskz((__v4sf)(__m128)(A), \
7928                                         -(__v4sf)(__m128)(B), \
7929                                         (__v4sf)(__m128)(C), (__mmask8)(U), \
7930                                         (int)(R))
7931
7932static __inline__ __m128 __DEFAULT_FN_ATTRS128
7933_mm_mask3_fnmadd_ss (__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U)
7934{
7935  return __builtin_ia32_vfmaddss3_mask3((__v4sf)__W,
7936                                        -(__v4sf)__X,
7937                                        (__v4sf)__Y,
7938                                        (__mmask8)__U,
7939                                        _MM_FROUND_CUR_DIRECTION);
7940}
7941
7942#define _mm_mask3_fnmadd_round_ss(W, X, Y, U, R) \
7943  (__m128)__builtin_ia32_vfmaddss3_mask3((__v4sf)(__m128)(W), \
7944                                         -(__v4sf)(__m128)(X), \
7945                                         (__v4sf)(__m128)(Y), (__mmask8)(U), \
7946                                         (int)(R))
7947
7948static __inline__ __m128 __DEFAULT_FN_ATTRS128
7949_mm_mask_fnmsub_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
7950{
7951  return __builtin_ia32_vfmaddss3_mask((__v4sf)__W,
7952                                       -(__v4sf)__A,
7953                                       -(__v4sf)__B,
7954                                       (__mmask8)__U,
7955                                       _MM_FROUND_CUR_DIRECTION);
7956}
7957
7958#define _mm_fnmsub_round_ss(A, B, C, R) \
7959  (__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(A), \
7960                                        -(__v4sf)(__m128)(B), \
7961                                        -(__v4sf)(__m128)(C), (__mmask8)-1, \
7962                                        (int)(R))
7963
7964#define _mm_mask_fnmsub_round_ss(W, U, A, B, R) \
7965  (__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(W), \
7966                                        -(__v4sf)(__m128)(A), \
7967                                        -(__v4sf)(__m128)(B), (__mmask8)(U), \
7968                                        (int)(R))
7969
7970static __inline__ __m128 __DEFAULT_FN_ATTRS128
7971_mm_maskz_fnmsub_ss (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
7972{
7973  return __builtin_ia32_vfmaddss3_maskz((__v4sf)__A,
7974                                        -(__v4sf)__B,
7975                                        -(__v4sf)__C,
7976                                        (__mmask8)__U,
7977                                        _MM_FROUND_CUR_DIRECTION);
7978}
7979
7980#define _mm_maskz_fnmsub_round_ss(U, A, B, C, R) \
7981  (__m128)__builtin_ia32_vfmaddss3_maskz((__v4sf)(__m128)(A), \
7982                                         -(__v4sf)(__m128)(B), \
7983                                         -(__v4sf)(__m128)(C), (__mmask8)(U), \
7984                                         (int)(R))
7985
7986static __inline__ __m128 __DEFAULT_FN_ATTRS128
7987_mm_mask3_fnmsub_ss (__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U)
7988{
7989  return __builtin_ia32_vfmsubss3_mask3((__v4sf)__W,
7990                                        -(__v4sf)__X,
7991                                        (__v4sf)__Y,
7992                                        (__mmask8)__U,
7993                                        _MM_FROUND_CUR_DIRECTION);
7994}
7995
7996#define _mm_mask3_fnmsub_round_ss(W, X, Y, U, R) \
7997  (__m128)__builtin_ia32_vfmsubss3_mask3((__v4sf)(__m128)(W), \
7998                                         -(__v4sf)(__m128)(X), \
7999                                         (__v4sf)(__m128)(Y), (__mmask8)(U), \
8000                                         (int)(R))
8001
8002static __inline__ __m128d __DEFAULT_FN_ATTRS128
8003_mm_mask_fmadd_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
8004{
8005  return __builtin_ia32_vfmaddsd3_mask((__v2df)__W,
8006                                       (__v2df)__A,
8007                                       (__v2df)__B,
8008                                       (__mmask8)__U,
8009                                       _MM_FROUND_CUR_DIRECTION);
8010}
8011
8012#define _mm_fmadd_round_sd(A, B, C, R) \
8013  (__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(A), \
8014                                         (__v2df)(__m128d)(B), \
8015                                         (__v2df)(__m128d)(C), (__mmask8)-1, \
8016                                         (int)(R))
8017
8018#define _mm_mask_fmadd_round_sd(W, U, A, B, R) \
8019  (__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(W), \
8020                                         (__v2df)(__m128d)(A), \
8021                                         (__v2df)(__m128d)(B), (__mmask8)(U), \
8022                                         (int)(R))
8023
8024static __inline__ __m128d __DEFAULT_FN_ATTRS128
8025_mm_maskz_fmadd_sd (__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
8026{
8027  return __builtin_ia32_vfmaddsd3_maskz((__v2df)__A,
8028                                        (__v2df)__B,
8029                                        (__v2df)__C,
8030                                        (__mmask8)__U,
8031                                        _MM_FROUND_CUR_DIRECTION);
8032}
8033
8034#define _mm_maskz_fmadd_round_sd(U, A, B, C, R) \
8035  (__m128d)__builtin_ia32_vfmaddsd3_maskz((__v2df)(__m128d)(A), \
8036                                          (__v2df)(__m128d)(B), \
8037                                          (__v2df)(__m128d)(C), (__mmask8)(U), \
8038                                          (int)(R))
8039
8040static __inline__ __m128d __DEFAULT_FN_ATTRS128
8041_mm_mask3_fmadd_sd (__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U)
8042{
8043  return __builtin_ia32_vfmaddsd3_mask3((__v2df)__W,
8044                                        (__v2df)__X,
8045                                        (__v2df)__Y,
8046                                        (__mmask8)__U,
8047                                        _MM_FROUND_CUR_DIRECTION);
8048}
8049
8050#define _mm_mask3_fmadd_round_sd(W, X, Y, U, R) \
8051  (__m128d)__builtin_ia32_vfmaddsd3_mask3((__v2df)(__m128d)(W), \
8052                                          (__v2df)(__m128d)(X), \
8053                                          (__v2df)(__m128d)(Y), (__mmask8)(U), \
8054                                          (int)(R))
8055
8056static __inline__ __m128d __DEFAULT_FN_ATTRS128
8057_mm_mask_fmsub_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
8058{
8059  return __builtin_ia32_vfmaddsd3_mask((__v2df)__W,
8060                                       (__v2df)__A,
8061                                       -(__v2df)__B,
8062                                       (__mmask8)__U,
8063                                       _MM_FROUND_CUR_DIRECTION);
8064}
8065
8066#define _mm_fmsub_round_sd(A, B, C, R) \
8067  (__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(A), \
8068                                         (__v2df)(__m128d)(B), \
8069                                         -(__v2df)(__m128d)(C), (__mmask8)-1, \
8070                                         (int)(R))
8071
8072#define _mm_mask_fmsub_round_sd(W, U, A, B, R) \
8073  (__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(W), \
8074                                         (__v2df)(__m128d)(A), \
8075                                         -(__v2df)(__m128d)(B), (__mmask8)(U), \
8076                                         (int)(R))
8077
8078static __inline__ __m128d __DEFAULT_FN_ATTRS128
8079_mm_maskz_fmsub_sd (__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
8080{
8081  return __builtin_ia32_vfmaddsd3_maskz((__v2df)__A,
8082                                        (__v2df)__B,
8083                                        -(__v2df)__C,
8084                                        (__mmask8)__U,
8085                                        _MM_FROUND_CUR_DIRECTION);
8086}
8087
8088#define _mm_maskz_fmsub_round_sd(U, A, B, C, R) \
8089  (__m128d)__builtin_ia32_vfmaddsd3_maskz((__v2df)(__m128d)(A), \
8090                                          (__v2df)(__m128d)(B), \
8091                                          -(__v2df)(__m128d)(C), \
8092                                          (__mmask8)(U), (int)(R))
8093
8094static __inline__ __m128d __DEFAULT_FN_ATTRS128
8095_mm_mask3_fmsub_sd (__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U)
8096{
8097  return __builtin_ia32_vfmsubsd3_mask3((__v2df)__W,
8098                                        (__v2df)__X,
8099                                        (__v2df)__Y,
8100                                        (__mmask8)__U,
8101                                        _MM_FROUND_CUR_DIRECTION);
8102}
8103
8104#define _mm_mask3_fmsub_round_sd(W, X, Y, U, R) \
8105  (__m128d)__builtin_ia32_vfmsubsd3_mask3((__v2df)(__m128d)(W), \
8106                                          (__v2df)(__m128d)(X), \
8107                                          (__v2df)(__m128d)(Y), \
8108                                          (__mmask8)(U), (int)(R))
8109
8110static __inline__ __m128d __DEFAULT_FN_ATTRS128
8111_mm_mask_fnmadd_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
8112{
8113  return __builtin_ia32_vfmaddsd3_mask((__v2df)__W,
8114                                       -(__v2df)__A,
8115                                       (__v2df)__B,
8116                                       (__mmask8)__U,
8117                                       _MM_FROUND_CUR_DIRECTION);
8118}
8119
8120#define _mm_fnmadd_round_sd(A, B, C, R) \
8121  (__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(A), \
8122                                         -(__v2df)(__m128d)(B), \
8123                                         (__v2df)(__m128d)(C), (__mmask8)-1, \
8124                                         (int)(R))
8125
8126#define _mm_mask_fnmadd_round_sd(W, U, A, B, R) \
8127  (__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(W), \
8128                                         -(__v2df)(__m128d)(A), \
8129                                         (__v2df)(__m128d)(B), (__mmask8)(U), \
8130                                         (int)(R))
8131
8132static __inline__ __m128d __DEFAULT_FN_ATTRS128
8133_mm_maskz_fnmadd_sd (__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
8134{
8135  return __builtin_ia32_vfmaddsd3_maskz((__v2df)__A,
8136                                        -(__v2df)__B,
8137                                        (__v2df)__C,
8138                                        (__mmask8)__U,
8139                                        _MM_FROUND_CUR_DIRECTION);
8140}
8141
8142#define _mm_maskz_fnmadd_round_sd(U, A, B, C, R) \
8143  (__m128d)__builtin_ia32_vfmaddsd3_maskz((__v2df)(__m128d)(A), \
8144                                          -(__v2df)(__m128d)(B), \
8145                                          (__v2df)(__m128d)(C), (__mmask8)(U), \
8146                                          (int)(R))
8147
8148static __inline__ __m128d __DEFAULT_FN_ATTRS128
8149_mm_mask3_fnmadd_sd (__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U)
8150{
8151  return __builtin_ia32_vfmaddsd3_mask3((__v2df)__W,
8152                                        -(__v2df)__X,
8153                                        (__v2df)__Y,
8154                                        (__mmask8)__U,
8155                                        _MM_FROUND_CUR_DIRECTION);
8156}
8157
8158#define _mm_mask3_fnmadd_round_sd(W, X, Y, U, R) \
8159  (__m128d)__builtin_ia32_vfmaddsd3_mask3((__v2df)(__m128d)(W), \
8160                                          -(__v2df)(__m128d)(X), \
8161                                          (__v2df)(__m128d)(Y), (__mmask8)(U), \
8162                                          (int)(R))
8163
8164static __inline__ __m128d __DEFAULT_FN_ATTRS128
8165_mm_mask_fnmsub_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
8166{
8167  return __builtin_ia32_vfmaddsd3_mask((__v2df)__W,
8168                                       -(__v2df)__A,
8169                                       -(__v2df)__B,
8170                                       (__mmask8)__U,
8171                                       _MM_FROUND_CUR_DIRECTION);
8172}
8173
8174#define _mm_fnmsub_round_sd(A, B, C, R) \
8175  (__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(A), \
8176                                         -(__v2df)(__m128d)(B), \
8177                                         -(__v2df)(__m128d)(C), (__mmask8)-1, \
8178                                         (int)(R))
8179
8180#define _mm_mask_fnmsub_round_sd(W, U, A, B, R) \
8181  (__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(W), \
8182                                         -(__v2df)(__m128d)(A), \
8183                                         -(__v2df)(__m128d)(B), (__mmask8)(U), \
8184                                         (int)(R))
8185
8186static __inline__ __m128d __DEFAULT_FN_ATTRS128
8187_mm_maskz_fnmsub_sd (__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
8188{
8189  return __builtin_ia32_vfmaddsd3_maskz((__v2df)__A,
8190                                        -(__v2df)__B,
8191                                        -(__v2df)__C,
8192                                        (__mmask8)__U,
8193                                        _MM_FROUND_CUR_DIRECTION);
8194}
8195
8196#define _mm_maskz_fnmsub_round_sd(U, A, B, C, R) \
8197  (__m128d)__builtin_ia32_vfmaddsd3_maskz((__v2df)(__m128d)(A), \
8198                                          -(__v2df)(__m128d)(B), \
8199                                          -(__v2df)(__m128d)(C), \
8200                                          (__mmask8)(U), \
8201                                          (int)(R))
8202
8203static __inline__ __m128d __DEFAULT_FN_ATTRS128
8204_mm_mask3_fnmsub_sd (__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U)
8205{
8206  return __builtin_ia32_vfmsubsd3_mask3((__v2df)__W,
8207                                        -(__v2df)__X,
8208                                        (__v2df)__Y,
8209                                        (__mmask8)__U,
8210                                        _MM_FROUND_CUR_DIRECTION);
8211}
8212
8213#define _mm_mask3_fnmsub_round_sd(W, X, Y, U, R) \
8214  (__m128d)__builtin_ia32_vfmsubsd3_mask3((__v2df)(__m128d)(W), \
8215                                          -(__v2df)(__m128d)(X), \
8216                                          (__v2df)(__m128d)(Y), \
8217                                          (__mmask8)(U), (int)(R))
8218
8219#define _mm512_permutex_pd(X, C) \
8220  (__m512d)__builtin_ia32_permdf512((__v8df)(__m512d)(X), (int)(C))
8221
8222#define _mm512_mask_permutex_pd(W, U, X, C) \
8223  (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
8224                                       (__v8df)_mm512_permutex_pd((X), (C)), \
8225                                       (__v8df)(__m512d)(W))
8226
8227#define _mm512_maskz_permutex_pd(U, X, C) \
8228  (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
8229                                       (__v8df)_mm512_permutex_pd((X), (C)), \
8230                                       (__v8df)_mm512_setzero_pd())
8231
8232#define _mm512_permutex_epi64(X, C) \
8233  (__m512i)__builtin_ia32_permdi512((__v8di)(__m512i)(X), (int)(C))
8234
8235#define _mm512_mask_permutex_epi64(W, U, X, C) \
8236  (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
8237                                      (__v8di)_mm512_permutex_epi64((X), (C)), \
8238                                      (__v8di)(__m512i)(W))
8239
8240#define _mm512_maskz_permutex_epi64(U, X, C) \
8241  (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
8242                                      (__v8di)_mm512_permutex_epi64((X), (C)), \
8243                                      (__v8di)_mm512_setzero_si512())
8244
8245static __inline__ __m512d __DEFAULT_FN_ATTRS512
8246_mm512_permutexvar_pd (__m512i __X, __m512d __Y)
8247{
8248  return (__m512d)__builtin_ia32_permvardf512((__v8df) __Y, (__v8di) __X);
8249}
8250
8251static __inline__ __m512d __DEFAULT_FN_ATTRS512
8252_mm512_mask_permutexvar_pd (__m512d __W, __mmask8 __U, __m512i __X, __m512d __Y)
8253{
8254  return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
8255                                        (__v8df)_mm512_permutexvar_pd(__X, __Y),
8256                                        (__v8df)__W);
8257}
8258
8259static __inline__ __m512d __DEFAULT_FN_ATTRS512
8260_mm512_maskz_permutexvar_pd (__mmask8 __U, __m512i __X, __m512d __Y)
8261{
8262  return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
8263                                        (__v8df)_mm512_permutexvar_pd(__X, __Y),
8264                                        (__v8df)_mm512_setzero_pd());
8265}
8266
8267static __inline__ __m512i __DEFAULT_FN_ATTRS512
8268_mm512_permutexvar_epi64 (__m512i __X, __m512i __Y)
8269{
8270  return (__m512i)__builtin_ia32_permvardi512((__v8di)__Y, (__v8di)__X);
8271}
8272
8273static __inline__ __m512i __DEFAULT_FN_ATTRS512
8274_mm512_maskz_permutexvar_epi64 (__mmask8 __M, __m512i __X, __m512i __Y)
8275{
8276  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
8277                                     (__v8di)_mm512_permutexvar_epi64(__X, __Y),
8278                                     (__v8di)_mm512_setzero_si512());
8279}
8280
8281static __inline__ __m512i __DEFAULT_FN_ATTRS512
8282_mm512_mask_permutexvar_epi64 (__m512i __W, __mmask8 __M, __m512i __X,
8283             __m512i __Y)
8284{
8285  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
8286                                     (__v8di)_mm512_permutexvar_epi64(__X, __Y),
8287                                     (__v8di)__W);
8288}
8289
8290static __inline__ __m512 __DEFAULT_FN_ATTRS512
8291_mm512_permutexvar_ps (__m512i __X, __m512 __Y)
8292{
8293  return (__m512)__builtin_ia32_permvarsf512((__v16sf)__Y, (__v16si)__X);
8294}
8295
8296static __inline__ __m512 __DEFAULT_FN_ATTRS512
8297_mm512_mask_permutexvar_ps (__m512 __W, __mmask16 __U, __m512i __X, __m512 __Y)
8298{
8299  return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
8300                                       (__v16sf)_mm512_permutexvar_ps(__X, __Y),
8301                                       (__v16sf)__W);
8302}
8303
8304static __inline__ __m512 __DEFAULT_FN_ATTRS512
8305_mm512_maskz_permutexvar_ps (__mmask16 __U, __m512i __X, __m512 __Y)
8306{
8307  return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
8308                                       (__v16sf)_mm512_permutexvar_ps(__X, __Y),
8309                                       (__v16sf)_mm512_setzero_ps());
8310}
8311
8312static __inline__ __m512i __DEFAULT_FN_ATTRS512
8313_mm512_permutexvar_epi32 (__m512i __X, __m512i __Y)
8314{
8315  return (__m512i)__builtin_ia32_permvarsi512((__v16si)__Y, (__v16si)__X);
8316}
8317
8318#define _mm512_permutevar_epi32 _mm512_permutexvar_epi32
8319
8320static __inline__ __m512i __DEFAULT_FN_ATTRS512
8321_mm512_maskz_permutexvar_epi32 (__mmask16 __M, __m512i __X, __m512i __Y)
8322{
8323  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
8324                                    (__v16si)_mm512_permutexvar_epi32(__X, __Y),
8325                                    (__v16si)_mm512_setzero_si512());
8326}
8327
8328static __inline__ __m512i __DEFAULT_FN_ATTRS512
8329_mm512_mask_permutexvar_epi32 (__m512i __W, __mmask16 __M, __m512i __X,
8330             __m512i __Y)
8331{
8332  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
8333                                    (__v16si)_mm512_permutexvar_epi32(__X, __Y),
8334                                    (__v16si)__W);
8335}
8336
8337#define _mm512_mask_permutevar_epi32 _mm512_mask_permutexvar_epi32
8338
8339static __inline__ __mmask16 __DEFAULT_FN_ATTRS
8340_mm512_kand (__mmask16 __A, __mmask16 __B)
8341{
8342  return (__mmask16) __builtin_ia32_kandhi ((__mmask16) __A, (__mmask16) __B);
8343}
8344
8345static __inline__ __mmask16 __DEFAULT_FN_ATTRS
8346_mm512_kandn (__mmask16 __A, __mmask16 __B)
8347{
8348  return (__mmask16) __builtin_ia32_kandnhi ((__mmask16) __A, (__mmask16) __B);
8349}
8350
8351static __inline__ __mmask16 __DEFAULT_FN_ATTRS
8352_mm512_kor (__mmask16 __A, __mmask16 __B)
8353{
8354  return (__mmask16) __builtin_ia32_korhi ((__mmask16) __A, (__mmask16) __B);
8355}
8356
8357static __inline__ int __DEFAULT_FN_ATTRS
8358_mm512_kortestc (__mmask16 __A, __mmask16 __B)
8359{
8360  return __builtin_ia32_kortestchi ((__mmask16) __A, (__mmask16) __B);
8361}
8362
8363static __inline__ int __DEFAULT_FN_ATTRS
8364_mm512_kortestz (__mmask16 __A, __mmask16 __B)
8365{
8366  return __builtin_ia32_kortestzhi ((__mmask16) __A, (__mmask16) __B);
8367}
8368
8369static __inline__ unsigned char __DEFAULT_FN_ATTRS
8370_kortestc_mask16_u8(__mmask16 __A, __mmask16 __B)
8371{
8372  return (unsigned char)__builtin_ia32_kortestchi(__A, __B);
8373}
8374
8375static __inline__ unsigned char __DEFAULT_FN_ATTRS
8376_kortestz_mask16_u8(__mmask16 __A, __mmask16 __B)
8377{
8378  return (unsigned char)__builtin_ia32_kortestzhi(__A, __B);
8379}
8380
8381static __inline__ unsigned char __DEFAULT_FN_ATTRS
8382_kortest_mask16_u8(__mmask16 __A, __mmask16 __B, unsigned char *__C) {
8383  *__C = (unsigned char)__builtin_ia32_kortestchi(__A, __B);
8384  return (unsigned char)__builtin_ia32_kortestzhi(__A, __B);
8385}
8386
8387static __inline__ __mmask16 __DEFAULT_FN_ATTRS
8388_mm512_kunpackb (__mmask16 __A, __mmask16 __B)
8389{
8390  return (__mmask16) __builtin_ia32_kunpckhi ((__mmask16) __A, (__mmask16) __B);
8391}
8392
8393static __inline__ __mmask16 __DEFAULT_FN_ATTRS
8394_mm512_kxnor (__mmask16 __A, __mmask16 __B)
8395{
8396  return (__mmask16) __builtin_ia32_kxnorhi ((__mmask16) __A, (__mmask16) __B);
8397}
8398
8399static __inline__ __mmask16 __DEFAULT_FN_ATTRS
8400_mm512_kxor (__mmask16 __A, __mmask16 __B)
8401{
8402  return (__mmask16) __builtin_ia32_kxorhi ((__mmask16) __A, (__mmask16) __B);
8403}
8404
8405#define _kand_mask16 _mm512_kand
8406#define _kandn_mask16 _mm512_kandn
8407#define _knot_mask16 _mm512_knot
8408#define _kor_mask16 _mm512_kor
8409#define _kxnor_mask16 _mm512_kxnor
8410#define _kxor_mask16 _mm512_kxor
8411
8412#define _kshiftli_mask16(A, I) \
8413  (__mmask16)__builtin_ia32_kshiftlihi((__mmask16)(A), (unsigned int)(I))
8414
8415#define _kshiftri_mask16(A, I) \
8416  (__mmask16)__builtin_ia32_kshiftrihi((__mmask16)(A), (unsigned int)(I))
8417
8418static __inline__ unsigned int __DEFAULT_FN_ATTRS
8419_cvtmask16_u32(__mmask16 __A) {
8420  return (unsigned int)__builtin_ia32_kmovw((__mmask16)__A);
8421}
8422
8423static __inline__ __mmask16 __DEFAULT_FN_ATTRS
8424_cvtu32_mask16(unsigned int __A) {
8425  return (__mmask16)__builtin_ia32_kmovw((__mmask16)__A);
8426}
8427
8428static __inline__ __mmask16 __DEFAULT_FN_ATTRS
8429_load_mask16(__mmask16 *__A) {
8430  return (__mmask16)__builtin_ia32_kmovw(*(__mmask16 *)__A);
8431}
8432
8433static __inline__ void __DEFAULT_FN_ATTRS
8434_store_mask16(__mmask16 *__A, __mmask16 __B) {
8435  *(__mmask16 *)__A = __builtin_ia32_kmovw((__mmask16)__B);
8436}
8437
8438static __inline__ void __DEFAULT_FN_ATTRS512
8439_mm512_stream_si512 (void * __P, __m512i __A)
8440{
8441  typedef __v8di __v8di_aligned __attribute__((aligned(64)));
8442  __builtin_nontemporal_store((__v8di_aligned)__A, (__v8di_aligned*)__P);
8443}
8444
8445static __inline__ __m512i __DEFAULT_FN_ATTRS512
8446_mm512_stream_load_si512 (void const *__P)
8447{
8448  typedef __v8di __v8di_aligned __attribute__((aligned(64)));
8449  return (__m512i) __builtin_nontemporal_load((const __v8di_aligned *)__P);
8450}
8451
8452static __inline__ void __DEFAULT_FN_ATTRS512
8453_mm512_stream_pd (void *__P, __m512d __A)
8454{
8455  typedef __v8df __v8df_aligned __attribute__((aligned(64)));
8456  __builtin_nontemporal_store((__v8df_aligned)__A, (__v8df_aligned*)__P);
8457}
8458
8459static __inline__ void __DEFAULT_FN_ATTRS512
8460_mm512_stream_ps (void *__P, __m512 __A)
8461{
8462  typedef __v16sf __v16sf_aligned __attribute__((aligned(64)));
8463  __builtin_nontemporal_store((__v16sf_aligned)__A, (__v16sf_aligned*)__P);
8464}
8465
8466static __inline__ __m512d __DEFAULT_FN_ATTRS512
8467_mm512_mask_compress_pd (__m512d __W, __mmask8 __U, __m512d __A)
8468{
8469  return (__m512d) __builtin_ia32_compressdf512_mask ((__v8df) __A,
8470                  (__v8df) __W,
8471                  (__mmask8) __U);
8472}
8473
8474static __inline__ __m512d __DEFAULT_FN_ATTRS512
8475_mm512_maskz_compress_pd (__mmask8 __U, __m512d __A)
8476{
8477  return (__m512d) __builtin_ia32_compressdf512_mask ((__v8df) __A,
8478                  (__v8df)
8479                  _mm512_setzero_pd (),
8480                  (__mmask8) __U);
8481}
8482
8483static __inline__ __m512i __DEFAULT_FN_ATTRS512
8484_mm512_mask_compress_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
8485{
8486  return (__m512i) __builtin_ia32_compressdi512_mask ((__v8di) __A,
8487                  (__v8di) __W,
8488                  (__mmask8) __U);
8489}
8490
8491static __inline__ __m512i __DEFAULT_FN_ATTRS512
8492_mm512_maskz_compress_epi64 (__mmask8 __U, __m512i __A)
8493{
8494  return (__m512i) __builtin_ia32_compressdi512_mask ((__v8di) __A,
8495                  (__v8di)
8496                  _mm512_setzero_si512 (),
8497                  (__mmask8) __U);
8498}
8499
8500static __inline__ __m512 __DEFAULT_FN_ATTRS512
8501_mm512_mask_compress_ps (__m512 __W, __mmask16 __U, __m512 __A)
8502{
8503  return (__m512) __builtin_ia32_compresssf512_mask ((__v16sf) __A,
8504                 (__v16sf) __W,
8505                 (__mmask16) __U);
8506}
8507
8508static __inline__ __m512 __DEFAULT_FN_ATTRS512
8509_mm512_maskz_compress_ps (__mmask16 __U, __m512 __A)
8510{
8511  return (__m512) __builtin_ia32_compresssf512_mask ((__v16sf) __A,
8512                 (__v16sf)
8513                 _mm512_setzero_ps (),
8514                 (__mmask16) __U);
8515}
8516
8517static __inline__ __m512i __DEFAULT_FN_ATTRS512
8518_mm512_mask_compress_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
8519{
8520  return (__m512i) __builtin_ia32_compresssi512_mask ((__v16si) __A,
8521                  (__v16si) __W,
8522                  (__mmask16) __U);
8523}
8524
8525static __inline__ __m512i __DEFAULT_FN_ATTRS512
8526_mm512_maskz_compress_epi32 (__mmask16 __U, __m512i __A)
8527{
8528  return (__m512i) __builtin_ia32_compresssi512_mask ((__v16si) __A,
8529                  (__v16si)
8530                  _mm512_setzero_si512 (),
8531                  (__mmask16) __U);
8532}
8533
8534#define _mm_cmp_round_ss_mask(X, Y, P, R) \
8535  (__mmask8)__builtin_ia32_cmpss_mask((__v4sf)(__m128)(X), \
8536                                      (__v4sf)(__m128)(Y), (int)(P), \
8537                                      (__mmask8)-1, (int)(R))
8538
8539#define _mm_mask_cmp_round_ss_mask(M, X, Y, P, R) \
8540  (__mmask8)__builtin_ia32_cmpss_mask((__v4sf)(__m128)(X), \
8541                                      (__v4sf)(__m128)(Y), (int)(P), \
8542                                      (__mmask8)(M), (int)(R))
8543
8544#define _mm_cmp_ss_mask(X, Y, P) \
8545  (__mmask8)__builtin_ia32_cmpss_mask((__v4sf)(__m128)(X), \
8546                                      (__v4sf)(__m128)(Y), (int)(P), \
8547                                      (__mmask8)-1, \
8548                                      _MM_FROUND_CUR_DIRECTION)
8549
8550#define _mm_mask_cmp_ss_mask(M, X, Y, P) \
8551  (__mmask8)__builtin_ia32_cmpss_mask((__v4sf)(__m128)(X), \
8552                                      (__v4sf)(__m128)(Y), (int)(P), \
8553                                      (__mmask8)(M), \
8554                                      _MM_FROUND_CUR_DIRECTION)
8555
8556#define _mm_cmp_round_sd_mask(X, Y, P, R) \
8557  (__mmask8)__builtin_ia32_cmpsd_mask((__v2df)(__m128d)(X), \
8558                                      (__v2df)(__m128d)(Y), (int)(P), \
8559                                      (__mmask8)-1, (int)(R))
8560
8561#define _mm_mask_cmp_round_sd_mask(M, X, Y, P, R) \
8562  (__mmask8)__builtin_ia32_cmpsd_mask((__v2df)(__m128d)(X), \
8563                                      (__v2df)(__m128d)(Y), (int)(P), \
8564                                      (__mmask8)(M), (int)(R))
8565
8566#define _mm_cmp_sd_mask(X, Y, P) \
8567  (__mmask8)__builtin_ia32_cmpsd_mask((__v2df)(__m128d)(X), \
8568                                      (__v2df)(__m128d)(Y), (int)(P), \
8569                                      (__mmask8)-1, \
8570                                      _MM_FROUND_CUR_DIRECTION)
8571
8572#define _mm_mask_cmp_sd_mask(M, X, Y, P) \
8573  (__mmask8)__builtin_ia32_cmpsd_mask((__v2df)(__m128d)(X), \
8574                                      (__v2df)(__m128d)(Y), (int)(P), \
8575                                      (__mmask8)(M), \
8576                                      _MM_FROUND_CUR_DIRECTION)
8577
8578/* Bit Test */
8579
8580static __inline __mmask16 __DEFAULT_FN_ATTRS512
8581_mm512_test_epi32_mask (__m512i __A, __m512i __B)
8582{
8583  return _mm512_cmpneq_epi32_mask (_mm512_and_epi32(__A, __B),
8584                                   _mm512_setzero_si512());
8585}
8586
8587static __inline__ __mmask16 __DEFAULT_FN_ATTRS512
8588_mm512_mask_test_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B)
8589{
8590  return _mm512_mask_cmpneq_epi32_mask (__U, _mm512_and_epi32 (__A, __B),
8591                                        _mm512_setzero_si512());
8592}
8593
8594static __inline __mmask8 __DEFAULT_FN_ATTRS512
8595_mm512_test_epi64_mask (__m512i __A, __m512i __B)
8596{
8597  return _mm512_cmpneq_epi64_mask (_mm512_and_epi32 (__A, __B),
8598                                   _mm512_setzero_si512());
8599}
8600
8601static __inline__ __mmask8 __DEFAULT_FN_ATTRS512
8602_mm512_mask_test_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B)
8603{
8604  return _mm512_mask_cmpneq_epi64_mask (__U, _mm512_and_epi32 (__A, __B),
8605                                        _mm512_setzero_si512());
8606}
8607
8608static __inline__ __mmask16 __DEFAULT_FN_ATTRS512
8609_mm512_testn_epi32_mask (__m512i __A, __m512i __B)
8610{
8611  return _mm512_cmpeq_epi32_mask (_mm512_and_epi32 (__A, __B),
8612                                  _mm512_setzero_si512());
8613}
8614
8615static __inline__ __mmask16 __DEFAULT_FN_ATTRS512
8616_mm512_mask_testn_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B)
8617{
8618  return _mm512_mask_cmpeq_epi32_mask (__U, _mm512_and_epi32 (__A, __B),
8619                                       _mm512_setzero_si512());
8620}
8621
8622static __inline__ __mmask8 __DEFAULT_FN_ATTRS512
8623_mm512_testn_epi64_mask (__m512i __A, __m512i __B)
8624{
8625  return _mm512_cmpeq_epi64_mask (_mm512_and_epi32 (__A, __B),
8626                                  _mm512_setzero_si512());
8627}
8628
8629static __inline__ __mmask8 __DEFAULT_FN_ATTRS512
8630_mm512_mask_testn_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B)
8631{
8632  return _mm512_mask_cmpeq_epi64_mask (__U, _mm512_and_epi32 (__A, __B),
8633                                       _mm512_setzero_si512());
8634}
8635
8636static __inline__ __m512 __DEFAULT_FN_ATTRS512
8637_mm512_movehdup_ps (__m512 __A)
8638{
8639  return (__m512)__builtin_shufflevector((__v16sf)__A, (__v16sf)__A,
8640                         1, 1, 3, 3, 5, 5, 7, 7, 9, 9, 11, 11, 13, 13, 15, 15);
8641}
8642
8643static __inline__ __m512 __DEFAULT_FN_ATTRS512
8644_mm512_mask_movehdup_ps (__m512 __W, __mmask16 __U, __m512 __A)
8645{
8646  return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
8647                                             (__v16sf)_mm512_movehdup_ps(__A),
8648                                             (__v16sf)__W);
8649}
8650
8651static __inline__ __m512 __DEFAULT_FN_ATTRS512
8652_mm512_maskz_movehdup_ps (__mmask16 __U, __m512 __A)
8653{
8654  return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
8655                                             (__v16sf)_mm512_movehdup_ps(__A),
8656                                             (__v16sf)_mm512_setzero_ps());
8657}
8658
8659static __inline__ __m512 __DEFAULT_FN_ATTRS512
8660_mm512_moveldup_ps (__m512 __A)
8661{
8662  return (__m512)__builtin_shufflevector((__v16sf)__A, (__v16sf)__A,
8663                         0, 0, 2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12, 14, 14);
8664}
8665
8666static __inline__ __m512 __DEFAULT_FN_ATTRS512
8667_mm512_mask_moveldup_ps (__m512 __W, __mmask16 __U, __m512 __A)
8668{
8669  return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
8670                                             (__v16sf)_mm512_moveldup_ps(__A),
8671                                             (__v16sf)__W);
8672}
8673
8674static __inline__ __m512 __DEFAULT_FN_ATTRS512
8675_mm512_maskz_moveldup_ps (__mmask16 __U, __m512 __A)
8676{
8677  return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
8678                                             (__v16sf)_mm512_moveldup_ps(__A),
8679                                             (__v16sf)_mm512_setzero_ps());
8680}
8681
8682static __inline__ __m128 __DEFAULT_FN_ATTRS128
8683_mm_mask_move_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
8684{
8685  return __builtin_ia32_selectss_128(__U, _mm_move_ss(__A, __B), __W);
8686}
8687
8688static __inline__ __m128 __DEFAULT_FN_ATTRS128
8689_mm_maskz_move_ss (__mmask8 __U, __m128 __A, __m128 __B)
8690{
8691  return __builtin_ia32_selectss_128(__U, _mm_move_ss(__A, __B),
8692                                     _mm_setzero_ps());
8693}
8694
8695static __inline__ __m128d __DEFAULT_FN_ATTRS128
8696_mm_mask_move_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
8697{
8698  return __builtin_ia32_selectsd_128(__U, _mm_move_sd(__A, __B), __W);
8699}
8700
8701static __inline__ __m128d __DEFAULT_FN_ATTRS128
8702_mm_maskz_move_sd (__mmask8 __U, __m128d __A, __m128d __B)
8703{
8704  return __builtin_ia32_selectsd_128(__U, _mm_move_sd(__A, __B),
8705                                     _mm_setzero_pd());
8706}
8707
8708static __inline__ void __DEFAULT_FN_ATTRS128
8709_mm_mask_store_ss (float * __W, __mmask8 __U, __m128 __A)
8710{
8711  __builtin_ia32_storess128_mask ((__v4sf *)__W, __A, __U & 1);
8712}
8713
8714static __inline__ void __DEFAULT_FN_ATTRS128
8715_mm_mask_store_sd (double * __W, __mmask8 __U, __m128d __A)
8716{
8717  __builtin_ia32_storesd128_mask ((__v2df *)__W, __A, __U & 1);
8718}
8719
8720static __inline__ __m128 __DEFAULT_FN_ATTRS128
8721_mm_mask_load_ss (__m128 __W, __mmask8 __U, const float* __A)
8722{
8723  __m128 src = (__v4sf) __builtin_shufflevector((__v4sf) __W,
8724                                                (__v4sf)_mm_setzero_ps(),
8725                                                0, 4, 4, 4);
8726
8727  return (__m128) __builtin_ia32_loadss128_mask ((const __v4sf *) __A, src, __U & 1);
8728}
8729
8730static __inline__ __m128 __DEFAULT_FN_ATTRS128
8731_mm_maskz_load_ss (__mmask8 __U, const float* __A)
8732{
8733  return (__m128)__builtin_ia32_loadss128_mask ((const __v4sf *) __A,
8734                                                (__v4sf) _mm_setzero_ps(),
8735                                                __U & 1);
8736}
8737
8738static __inline__ __m128d __DEFAULT_FN_ATTRS128
8739_mm_mask_load_sd (__m128d __W, __mmask8 __U, const double* __A)
8740{
8741  __m128d src = (__v2df) __builtin_shufflevector((__v2df) __W,
8742                                                 (__v2df)_mm_setzero_pd(),
8743                                                 0, 2);
8744
8745  return (__m128d) __builtin_ia32_loadsd128_mask ((const __v2df *) __A, src, __U & 1);
8746}
8747
8748static __inline__ __m128d __DEFAULT_FN_ATTRS128
8749_mm_maskz_load_sd (__mmask8 __U, const double* __A)
8750{
8751  return (__m128d) __builtin_ia32_loadsd128_mask ((const __v2df *) __A,
8752                                                  (__v2df) _mm_setzero_pd(),
8753                                                  __U & 1);
8754}
8755
8756#define _mm512_shuffle_epi32(A, I) \
8757  (__m512i)__builtin_ia32_pshufd512((__v16si)(__m512i)(A), (int)(I))
8758
8759#define _mm512_mask_shuffle_epi32(W, U, A, I) \
8760  (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
8761                                      (__v16si)_mm512_shuffle_epi32((A), (I)), \
8762                                      (__v16si)(__m512i)(W))
8763
8764#define _mm512_maskz_shuffle_epi32(U, A, I) \
8765  (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
8766                                      (__v16si)_mm512_shuffle_epi32((A), (I)), \
8767                                      (__v16si)_mm512_setzero_si512())
8768
8769static __inline__ __m512d __DEFAULT_FN_ATTRS512
8770_mm512_mask_expand_pd (__m512d __W, __mmask8 __U, __m512d __A)
8771{
8772  return (__m512d) __builtin_ia32_expanddf512_mask ((__v8df) __A,
8773                (__v8df) __W,
8774                (__mmask8) __U);
8775}
8776
8777static __inline__ __m512d __DEFAULT_FN_ATTRS512
8778_mm512_maskz_expand_pd (__mmask8 __U, __m512d __A)
8779{
8780  return (__m512d) __builtin_ia32_expanddf512_mask ((__v8df) __A,
8781                (__v8df) _mm512_setzero_pd (),
8782                (__mmask8) __U);
8783}
8784
8785static __inline__ __m512i __DEFAULT_FN_ATTRS512
8786_mm512_mask_expand_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
8787{
8788  return (__m512i) __builtin_ia32_expanddi512_mask ((__v8di) __A,
8789                (__v8di) __W,
8790                (__mmask8) __U);
8791}
8792
8793static __inline__ __m512i __DEFAULT_FN_ATTRS512
8794_mm512_maskz_expand_epi64 ( __mmask8 __U, __m512i __A)
8795{
8796  return (__m512i) __builtin_ia32_expanddi512_mask ((__v8di) __A,
8797                (__v8di) _mm512_setzero_si512 (),
8798                (__mmask8) __U);
8799}
8800
8801static __inline__ __m512d __DEFAULT_FN_ATTRS512
8802_mm512_mask_expandloadu_pd(__m512d __W, __mmask8 __U, void const *__P)
8803{
8804  return (__m512d) __builtin_ia32_expandloaddf512_mask ((const __v8df *)__P,
8805              (__v8df) __W,
8806              (__mmask8) __U);
8807}
8808
8809static __inline__ __m512d __DEFAULT_FN_ATTRS512
8810_mm512_maskz_expandloadu_pd(__mmask8 __U, void const *__P)
8811{
8812  return (__m512d) __builtin_ia32_expandloaddf512_mask ((const __v8df *)__P,
8813              (__v8df) _mm512_setzero_pd(),
8814              (__mmask8) __U);
8815}
8816
8817static __inline__ __m512i __DEFAULT_FN_ATTRS512
8818_mm512_mask_expandloadu_epi64(__m512i __W, __mmask8 __U, void const *__P)
8819{
8820  return (__m512i) __builtin_ia32_expandloaddi512_mask ((const __v8di *)__P,
8821              (__v8di) __W,
8822              (__mmask8) __U);
8823}
8824
8825static __inline__ __m512i __DEFAULT_FN_ATTRS512
8826_mm512_maskz_expandloadu_epi64(__mmask8 __U, void const *__P)
8827{
8828  return (__m512i) __builtin_ia32_expandloaddi512_mask ((const __v8di *)__P,
8829              (__v8di) _mm512_setzero_si512(),
8830              (__mmask8) __U);
8831}
8832
8833static __inline__ __m512 __DEFAULT_FN_ATTRS512
8834_mm512_mask_expandloadu_ps(__m512 __W, __mmask16 __U, void const *__P)
8835{
8836  return (__m512) __builtin_ia32_expandloadsf512_mask ((const __v16sf *)__P,
8837                   (__v16sf) __W,
8838                   (__mmask16) __U);
8839}
8840
8841static __inline__ __m512 __DEFAULT_FN_ATTRS512
8842_mm512_maskz_expandloadu_ps(__mmask16 __U, void const *__P)
8843{
8844  return (__m512) __builtin_ia32_expandloadsf512_mask ((const __v16sf *)__P,
8845                   (__v16sf) _mm512_setzero_ps(),
8846                   (__mmask16) __U);
8847}
8848
8849static __inline__ __m512i __DEFAULT_FN_ATTRS512
8850_mm512_mask_expandloadu_epi32(__m512i __W, __mmask16 __U, void const *__P)
8851{
8852  return (__m512i) __builtin_ia32_expandloadsi512_mask ((const __v16si *)__P,
8853              (__v16si) __W,
8854              (__mmask16) __U);
8855}
8856
8857static __inline__ __m512i __DEFAULT_FN_ATTRS512
8858_mm512_maskz_expandloadu_epi32(__mmask16 __U, void const *__P)
8859{
8860  return (__m512i) __builtin_ia32_expandloadsi512_mask ((const __v16si *)__P,
8861              (__v16si) _mm512_setzero_si512(),
8862              (__mmask16) __U);
8863}
8864
8865static __inline__ __m512 __DEFAULT_FN_ATTRS512
8866_mm512_mask_expand_ps (__m512 __W, __mmask16 __U, __m512 __A)
8867{
8868  return (__m512) __builtin_ia32_expandsf512_mask ((__v16sf) __A,
8869               (__v16sf) __W,
8870               (__mmask16) __U);
8871}
8872
8873static __inline__ __m512 __DEFAULT_FN_ATTRS512
8874_mm512_maskz_expand_ps (__mmask16 __U, __m512 __A)
8875{
8876  return (__m512) __builtin_ia32_expandsf512_mask ((__v16sf) __A,
8877               (__v16sf) _mm512_setzero_ps(),
8878               (__mmask16) __U);
8879}
8880
8881static __inline__ __m512i __DEFAULT_FN_ATTRS512
8882_mm512_mask_expand_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
8883{
8884  return (__m512i) __builtin_ia32_expandsi512_mask ((__v16si) __A,
8885                (__v16si) __W,
8886                (__mmask16) __U);
8887}
8888
8889static __inline__ __m512i __DEFAULT_FN_ATTRS512
8890_mm512_maskz_expand_epi32 (__mmask16 __U, __m512i __A)
8891{
8892  return (__m512i) __builtin_ia32_expandsi512_mask ((__v16si) __A,
8893                (__v16si) _mm512_setzero_si512(),
8894                (__mmask16) __U);
8895}
8896
8897#define _mm512_cvt_roundps_pd(A, R) \
8898  (__m512d)__builtin_ia32_cvtps2pd512_mask((__v8sf)(__m256)(A), \
8899                                           (__v8df)_mm512_undefined_pd(), \
8900                                           (__mmask8)-1, (int)(R))
8901
8902#define _mm512_mask_cvt_roundps_pd(W, U, A, R) \
8903  (__m512d)__builtin_ia32_cvtps2pd512_mask((__v8sf)(__m256)(A), \
8904                                           (__v8df)(__m512d)(W), \
8905                                           (__mmask8)(U), (int)(R))
8906
8907#define _mm512_maskz_cvt_roundps_pd(U, A, R) \
8908  (__m512d)__builtin_ia32_cvtps2pd512_mask((__v8sf)(__m256)(A), \
8909                                           (__v8df)_mm512_setzero_pd(), \
8910                                           (__mmask8)(U), (int)(R))
8911
8912static __inline__ __m512d __DEFAULT_FN_ATTRS512
8913_mm512_cvtps_pd (__m256 __A)
8914{
8915  return (__m512d) __builtin_convertvector((__v8sf)__A, __v8df);
8916}
8917
8918static __inline__ __m512d __DEFAULT_FN_ATTRS512
8919_mm512_mask_cvtps_pd (__m512d __W, __mmask8 __U, __m256 __A)
8920{
8921  return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
8922                                              (__v8df)_mm512_cvtps_pd(__A),
8923                                              (__v8df)__W);
8924}
8925
8926static __inline__ __m512d __DEFAULT_FN_ATTRS512
8927_mm512_maskz_cvtps_pd (__mmask8 __U, __m256 __A)
8928{
8929  return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
8930                                              (__v8df)_mm512_cvtps_pd(__A),
8931                                              (__v8df)_mm512_setzero_pd());
8932}
8933
8934static __inline__ __m512d __DEFAULT_FN_ATTRS512
8935_mm512_cvtpslo_pd (__m512 __A)
8936{
8937  return (__m512d) _mm512_cvtps_pd(_mm512_castps512_ps256(__A));
8938}
8939
8940static __inline__ __m512d __DEFAULT_FN_ATTRS512
8941_mm512_mask_cvtpslo_pd (__m512d __W, __mmask8 __U, __m512 __A)
8942{
8943  return (__m512d) _mm512_mask_cvtps_pd(__W, __U, _mm512_castps512_ps256(__A));
8944}
8945
8946static __inline__ __m512d __DEFAULT_FN_ATTRS512
8947_mm512_mask_mov_pd (__m512d __W, __mmask8 __U, __m512d __A)
8948{
8949  return (__m512d) __builtin_ia32_selectpd_512 ((__mmask8) __U,
8950              (__v8df) __A,
8951              (__v8df) __W);
8952}
8953
8954static __inline__ __m512d __DEFAULT_FN_ATTRS512
8955_mm512_maskz_mov_pd (__mmask8 __U, __m512d __A)
8956{
8957  return (__m512d) __builtin_ia32_selectpd_512 ((__mmask8) __U,
8958              (__v8df) __A,
8959              (__v8df) _mm512_setzero_pd ());
8960}
8961
8962static __inline__ __m512 __DEFAULT_FN_ATTRS512
8963_mm512_mask_mov_ps (__m512 __W, __mmask16 __U, __m512 __A)
8964{
8965  return (__m512) __builtin_ia32_selectps_512 ((__mmask16) __U,
8966             (__v16sf) __A,
8967             (__v16sf) __W);
8968}
8969
8970static __inline__ __m512 __DEFAULT_FN_ATTRS512
8971_mm512_maskz_mov_ps (__mmask16 __U, __m512 __A)
8972{
8973  return (__m512) __builtin_ia32_selectps_512 ((__mmask16) __U,
8974             (__v16sf) __A,
8975             (__v16sf) _mm512_setzero_ps ());
8976}
8977
8978static __inline__ void __DEFAULT_FN_ATTRS512
8979_mm512_mask_compressstoreu_pd (void *__P, __mmask8 __U, __m512d __A)
8980{
8981  __builtin_ia32_compressstoredf512_mask ((__v8df *) __P, (__v8df) __A,
8982            (__mmask8) __U);
8983}
8984
8985static __inline__ void __DEFAULT_FN_ATTRS512
8986_mm512_mask_compressstoreu_epi64 (void *__P, __mmask8 __U, __m512i __A)
8987{
8988  __builtin_ia32_compressstoredi512_mask ((__v8di *) __P, (__v8di) __A,
8989            (__mmask8) __U);
8990}
8991
8992static __inline__ void __DEFAULT_FN_ATTRS512
8993_mm512_mask_compressstoreu_ps (void *__P, __mmask16 __U, __m512 __A)
8994{
8995  __builtin_ia32_compressstoresf512_mask ((__v16sf *) __P, (__v16sf) __A,
8996            (__mmask16) __U);
8997}
8998
8999static __inline__ void __DEFAULT_FN_ATTRS512
9000_mm512_mask_compressstoreu_epi32 (void *__P, __mmask16 __U, __m512i __A)
9001{
9002  __builtin_ia32_compressstoresi512_mask ((__v16si *) __P, (__v16si) __A,
9003            (__mmask16) __U);
9004}
9005
9006#define _mm_cvt_roundsd_ss(A, B, R) \
9007  (__m128)__builtin_ia32_cvtsd2ss_round_mask((__v4sf)(__m128)(A), \
9008                                             (__v2df)(__m128d)(B), \
9009                                             (__v4sf)_mm_undefined_ps(), \
9010                                             (__mmask8)-1, (int)(R))
9011
9012#define _mm_mask_cvt_roundsd_ss(W, U, A, B, R) \
9013  (__m128)__builtin_ia32_cvtsd2ss_round_mask((__v4sf)(__m128)(A), \
9014                                             (__v2df)(__m128d)(B), \
9015                                             (__v4sf)(__m128)(W), \
9016                                             (__mmask8)(U), (int)(R))
9017
9018#define _mm_maskz_cvt_roundsd_ss(U, A, B, R) \
9019  (__m128)__builtin_ia32_cvtsd2ss_round_mask((__v4sf)(__m128)(A), \
9020                                             (__v2df)(__m128d)(B), \
9021                                             (__v4sf)_mm_setzero_ps(), \
9022                                             (__mmask8)(U), (int)(R))
9023
9024static __inline__ __m128 __DEFAULT_FN_ATTRS128
9025_mm_mask_cvtsd_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128d __B)
9026{
9027  return __builtin_ia32_cvtsd2ss_round_mask ((__v4sf)__A,
9028                                             (__v2df)__B,
9029                                             (__v4sf)__W,
9030                                             (__mmask8)__U, _MM_FROUND_CUR_DIRECTION);
9031}
9032
9033static __inline__ __m128 __DEFAULT_FN_ATTRS128
9034_mm_maskz_cvtsd_ss (__mmask8 __U, __m128 __A, __m128d __B)
9035{
9036  return __builtin_ia32_cvtsd2ss_round_mask ((__v4sf)__A,
9037                                             (__v2df)__B,
9038                                             (__v4sf)_mm_setzero_ps(),
9039                                             (__mmask8)__U, _MM_FROUND_CUR_DIRECTION);
9040}
9041
9042#define _mm_cvtss_i32 _mm_cvtss_si32
9043#define _mm_cvtsd_i32 _mm_cvtsd_si32
9044#define _mm_cvti32_sd _mm_cvtsi32_sd
9045#define _mm_cvti32_ss _mm_cvtsi32_ss
9046#ifdef __x86_64__
9047#define _mm_cvtss_i64 _mm_cvtss_si64
9048#define _mm_cvtsd_i64 _mm_cvtsd_si64
9049#define _mm_cvti64_sd _mm_cvtsi64_sd
9050#define _mm_cvti64_ss _mm_cvtsi64_ss
9051#endif
9052
9053#ifdef __x86_64__
9054#define _mm_cvt_roundi64_sd(A, B, R) \
9055  (__m128d)__builtin_ia32_cvtsi2sd64((__v2df)(__m128d)(A), (long long)(B), \
9056                                     (int)(R))
9057
9058#define _mm_cvt_roundsi64_sd(A, B, R) \
9059  (__m128d)__builtin_ia32_cvtsi2sd64((__v2df)(__m128d)(A), (long long)(B), \
9060                                     (int)(R))
9061#endif
9062
9063#define _mm_cvt_roundsi32_ss(A, B, R) \
9064  (__m128)__builtin_ia32_cvtsi2ss32((__v4sf)(__m128)(A), (int)(B), (int)(R))
9065
9066#define _mm_cvt_roundi32_ss(A, B, R) \
9067  (__m128)__builtin_ia32_cvtsi2ss32((__v4sf)(__m128)(A), (int)(B), (int)(R))
9068
9069#ifdef __x86_64__
9070#define _mm_cvt_roundsi64_ss(A, B, R) \
9071  (__m128)__builtin_ia32_cvtsi2ss64((__v4sf)(__m128)(A), (long long)(B), \
9072                                    (int)(R))
9073
9074#define _mm_cvt_roundi64_ss(A, B, R) \
9075  (__m128)__builtin_ia32_cvtsi2ss64((__v4sf)(__m128)(A), (long long)(B), \
9076                                    (int)(R))
9077#endif
9078
9079#define _mm_cvt_roundss_sd(A, B, R) \
9080  (__m128d)__builtin_ia32_cvtss2sd_round_mask((__v2df)(__m128d)(A), \
9081                                              (__v4sf)(__m128)(B), \
9082                                              (__v2df)_mm_undefined_pd(), \
9083                                              (__mmask8)-1, (int)(R))
9084
9085#define _mm_mask_cvt_roundss_sd(W, U, A, B, R) \
9086  (__m128d)__builtin_ia32_cvtss2sd_round_mask((__v2df)(__m128d)(A), \
9087                                              (__v4sf)(__m128)(B), \
9088                                              (__v2df)(__m128d)(W), \
9089                                              (__mmask8)(U), (int)(R))
9090
9091#define _mm_maskz_cvt_roundss_sd(U, A, B, R) \
9092  (__m128d)__builtin_ia32_cvtss2sd_round_mask((__v2df)(__m128d)(A), \
9093                                              (__v4sf)(__m128)(B), \
9094                                              (__v2df)_mm_setzero_pd(), \
9095                                              (__mmask8)(U), (int)(R))
9096
9097static __inline__ __m128d __DEFAULT_FN_ATTRS128
9098_mm_mask_cvtss_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128 __B)
9099{
9100  return __builtin_ia32_cvtss2sd_round_mask((__v2df)__A,
9101                                            (__v4sf)__B,
9102                                            (__v2df)__W,
9103                                            (__mmask8)__U, _MM_FROUND_CUR_DIRECTION);
9104}
9105
9106static __inline__ __m128d __DEFAULT_FN_ATTRS128
9107_mm_maskz_cvtss_sd (__mmask8 __U, __m128d __A, __m128 __B)
9108{
9109  return __builtin_ia32_cvtss2sd_round_mask((__v2df)__A,
9110                                            (__v4sf)__B,
9111                                            (__v2df)_mm_setzero_pd(),
9112                                            (__mmask8)__U, _MM_FROUND_CUR_DIRECTION);
9113}
9114
9115static __inline__ __m128d __DEFAULT_FN_ATTRS128
9116_mm_cvtu32_sd (__m128d __A, unsigned __B)
9117{
9118  __A[0] = __B;
9119  return __A;
9120}
9121
9122#ifdef __x86_64__
9123#define _mm_cvt_roundu64_sd(A, B, R) \
9124  (__m128d)__builtin_ia32_cvtusi2sd64((__v2df)(__m128d)(A), \
9125                                      (unsigned long long)(B), (int)(R))
9126
9127static __inline__ __m128d __DEFAULT_FN_ATTRS128
9128_mm_cvtu64_sd (__m128d __A, unsigned long long __B)
9129{
9130  __A[0] = __B;
9131  return __A;
9132}
9133#endif
9134
9135#define _mm_cvt_roundu32_ss(A, B, R) \
9136  (__m128)__builtin_ia32_cvtusi2ss32((__v4sf)(__m128)(A), (unsigned int)(B), \
9137                                     (int)(R))
9138
9139static __inline__ __m128 __DEFAULT_FN_ATTRS128
9140_mm_cvtu32_ss (__m128 __A, unsigned __B)
9141{
9142  __A[0] = __B;
9143  return __A;
9144}
9145
9146#ifdef __x86_64__
9147#define _mm_cvt_roundu64_ss(A, B, R) \
9148  (__m128)__builtin_ia32_cvtusi2ss64((__v4sf)(__m128)(A), \
9149                                     (unsigned long long)(B), (int)(R))
9150
9151static __inline__ __m128 __DEFAULT_FN_ATTRS128
9152_mm_cvtu64_ss (__m128 __A, unsigned long long __B)
9153{
9154  __A[0] = __B;
9155  return __A;
9156}
9157#endif
9158
9159static __inline__ __m512i __DEFAULT_FN_ATTRS512
9160_mm512_mask_set1_epi32 (__m512i __O, __mmask16 __M, int __A)
9161{
9162  return (__m512i) __builtin_ia32_selectd_512(__M,
9163                                              (__v16si) _mm512_set1_epi32(__A),
9164                                              (__v16si) __O);
9165}
9166
9167static __inline__ __m512i __DEFAULT_FN_ATTRS512
9168_mm512_mask_set1_epi64 (__m512i __O, __mmask8 __M, long long __A)
9169{
9170  return (__m512i) __builtin_ia32_selectq_512(__M,
9171                                              (__v8di) _mm512_set1_epi64(__A),
9172                                              (__v8di) __O);
9173}
9174
9175static  __inline __m512i __DEFAULT_FN_ATTRS512
9176_mm512_set_epi8 (char __e63, char __e62, char __e61, char __e60, char __e59,
9177    char __e58, char __e57, char __e56, char __e55, char __e54, char __e53,
9178    char __e52, char __e51, char __e50, char __e49, char __e48, char __e47,
9179    char __e46, char __e45, char __e44, char __e43, char __e42, char __e41,
9180    char __e40, char __e39, char __e38, char __e37, char __e36, char __e35,
9181    char __e34, char __e33, char __e32, char __e31, char __e30, char __e29,
9182    char __e28, char __e27, char __e26, char __e25, char __e24, char __e23,
9183    char __e22, char __e21, char __e20, char __e19, char __e18, char __e17,
9184    char __e16, char __e15, char __e14, char __e13, char __e12, char __e11,
9185    char __e10, char __e9, char __e8, char __e7, char __e6, char __e5,
9186    char __e4, char __e3, char __e2, char __e1, char __e0) {
9187
9188  return __extension__ (__m512i)(__v64qi)
9189    {__e0, __e1, __e2, __e3, __e4, __e5, __e6, __e7,
9190     __e8, __e9, __e10, __e11, __e12, __e13, __e14, __e15,
9191     __e16, __e17, __e18, __e19, __e20, __e21, __e22, __e23,
9192     __e24, __e25, __e26, __e27, __e28, __e29, __e30, __e31,
9193     __e32, __e33, __e34, __e35, __e36, __e37, __e38, __e39,
9194     __e40, __e41, __e42, __e43, __e44, __e45, __e46, __e47,
9195     __e48, __e49, __e50, __e51, __e52, __e53, __e54, __e55,
9196     __e56, __e57, __e58, __e59, __e60, __e61, __e62, __e63};
9197}
9198
9199static  __inline __m512i __DEFAULT_FN_ATTRS512
9200_mm512_set_epi16(short __e31, short __e30, short __e29, short __e28,
9201    short __e27, short __e26, short __e25, short __e24, short __e23,
9202    short __e22, short __e21, short __e20, short __e19, short __e18,
9203    short __e17, short __e16, short __e15, short __e14, short __e13,
9204    short __e12, short __e11, short __e10, short __e9, short __e8,
9205    short __e7, short __e6, short __e5, short __e4, short __e3,
9206    short __e2, short __e1, short __e0) {
9207  return __extension__ (__m512i)(__v32hi)
9208    {__e0, __e1, __e2, __e3, __e4, __e5, __e6, __e7,
9209     __e8, __e9, __e10, __e11, __e12, __e13, __e14, __e15,
9210     __e16, __e17, __e18, __e19, __e20, __e21, __e22, __e23,
9211     __e24, __e25, __e26, __e27, __e28, __e29, __e30, __e31 };
9212}
9213
9214static __inline __m512i __DEFAULT_FN_ATTRS512
9215_mm512_set_epi32 (int __A, int __B, int __C, int __D,
9216     int __E, int __F, int __G, int __H,
9217     int __I, int __J, int __K, int __L,
9218     int __M, int __N, int __O, int __P)
9219{
9220  return __extension__ (__m512i)(__v16si)
9221  { __P, __O, __N, __M, __L, __K, __J, __I,
9222    __H, __G, __F, __E, __D, __C, __B, __A };
9223}
9224
9225#define _mm512_setr_epi32(e0,e1,e2,e3,e4,e5,e6,e7,           \
9226       e8,e9,e10,e11,e12,e13,e14,e15)          \
9227  _mm512_set_epi32((e15),(e14),(e13),(e12),(e11),(e10),(e9),(e8),(e7),(e6), \
9228                   (e5),(e4),(e3),(e2),(e1),(e0))
9229
9230static __inline__ __m512i __DEFAULT_FN_ATTRS512
9231_mm512_set_epi64 (long long __A, long long __B, long long __C,
9232     long long __D, long long __E, long long __F,
9233     long long __G, long long __H)
9234{
9235  return __extension__ (__m512i) (__v8di)
9236  { __H, __G, __F, __E, __D, __C, __B, __A };
9237}
9238
9239#define _mm512_setr_epi64(e0,e1,e2,e3,e4,e5,e6,e7)           \
9240  _mm512_set_epi64((e7),(e6),(e5),(e4),(e3),(e2),(e1),(e0))
9241
9242static __inline__ __m512d __DEFAULT_FN_ATTRS512
9243_mm512_set_pd (double __A, double __B, double __C, double __D,
9244        double __E, double __F, double __G, double __H)
9245{
9246  return __extension__ (__m512d)
9247  { __H, __G, __F, __E, __D, __C, __B, __A };
9248}
9249
9250#define _mm512_setr_pd(e0,e1,e2,e3,e4,e5,e6,e7)              \
9251  _mm512_set_pd((e7),(e6),(e5),(e4),(e3),(e2),(e1),(e0))
9252
9253static __inline__ __m512 __DEFAULT_FN_ATTRS512
9254_mm512_set_ps (float __A, float __B, float __C, float __D,
9255        float __E, float __F, float __G, float __H,
9256        float __I, float __J, float __K, float __L,
9257        float __M, float __N, float __O, float __P)
9258{
9259  return __extension__ (__m512)
9260  { __P, __O, __N, __M, __L, __K, __J, __I,
9261    __H, __G, __F, __E, __D, __C, __B, __A };
9262}
9263
9264#define _mm512_setr_ps(e0,e1,e2,e3,e4,e5,e6,e7,e8,e9,e10,e11,e12,e13,e14,e15) \
9265  _mm512_set_ps((e15),(e14),(e13),(e12),(e11),(e10),(e9),(e8),(e7),(e6),(e5), \
9266                (e4),(e3),(e2),(e1),(e0))
9267
9268static __inline__ __m512 __DEFAULT_FN_ATTRS512
9269_mm512_abs_ps(__m512 __A)
9270{
9271  return (__m512)_mm512_and_epi32(_mm512_set1_epi32(0x7FFFFFFF),(__m512i)__A) ;
9272}
9273
9274static __inline__ __m512 __DEFAULT_FN_ATTRS512
9275_mm512_mask_abs_ps(__m512 __W, __mmask16 __K, __m512 __A)
9276{
9277  return (__m512)_mm512_mask_and_epi32((__m512i)__W, __K, _mm512_set1_epi32(0x7FFFFFFF),(__m512i)__A) ;
9278}
9279
9280static __inline__ __m512d __DEFAULT_FN_ATTRS512
9281_mm512_abs_pd(__m512d __A)
9282{
9283  return (__m512d)_mm512_and_epi64(_mm512_set1_epi64(0x7FFFFFFFFFFFFFFF),(__v8di)__A) ;
9284}
9285
9286static __inline__ __m512d __DEFAULT_FN_ATTRS512
9287_mm512_mask_abs_pd(__m512d __W, __mmask8 __K, __m512d __A)
9288{
9289  return (__m512d)_mm512_mask_and_epi64((__v8di)__W, __K, _mm512_set1_epi64(0x7FFFFFFFFFFFFFFF),(__v8di)__A);
9290}
9291
9292/* Vector-reduction arithmetic accepts vectors as inputs and produces scalars as
9293 * outputs. This class of vector operation forms the basis of many scientific
9294 * computations. In vector-reduction arithmetic, the evaluation off is
9295 * independent of the order of the input elements of V.
9296
9297 * Used bisection method. At each step, we partition the vector with previous
9298 * step in half, and the operation is performed on its two halves.
9299 * This takes log2(n) steps where n is the number of elements in the vector.
9300 */
9301
9302#define _mm512_mask_reduce_operator(op) \
9303  __v4du __t1 = (__v4du)_mm512_extracti64x4_epi64(__W, 0); \
9304  __v4du __t2 = (__v4du)_mm512_extracti64x4_epi64(__W, 1); \
9305  __m256i __t3 = (__m256i)(__t1 op __t2); \
9306  __v2du __t4 = (__v2du)_mm256_extracti128_si256(__t3, 0); \
9307  __v2du __t5 = (__v2du)_mm256_extracti128_si256(__t3, 1); \
9308  __v2du __t6 = __t4 op __t5; \
9309  __v2du __t7 = __builtin_shufflevector(__t6, __t6, 1, 0); \
9310  __v2du __t8 = __t6 op __t7; \
9311  return __t8[0]
9312
9313static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_reduce_add_epi64(__m512i __W) {
9314  _mm512_mask_reduce_operator(+);
9315}
9316
9317static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_reduce_mul_epi64(__m512i __W) {
9318  _mm512_mask_reduce_operator(*);
9319}
9320
9321static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_reduce_and_epi64(__m512i __W) {
9322  _mm512_mask_reduce_operator(&);
9323}
9324
9325static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_reduce_or_epi64(__m512i __W) {
9326  _mm512_mask_reduce_operator(|);
9327}
9328
9329static __inline__ long long __DEFAULT_FN_ATTRS512
9330_mm512_mask_reduce_add_epi64(__mmask8 __M, __m512i __W) {
9331  __W = _mm512_maskz_mov_epi64(__M, __W);
9332  _mm512_mask_reduce_operator(+);
9333}
9334
9335static __inline__ long long __DEFAULT_FN_ATTRS512
9336_mm512_mask_reduce_mul_epi64(__mmask8 __M, __m512i __W) {
9337  __W = _mm512_mask_mov_epi64(_mm512_set1_epi64(1), __M, __W);
9338  _mm512_mask_reduce_operator(*);
9339}
9340
9341static __inline__ long long __DEFAULT_FN_ATTRS512
9342_mm512_mask_reduce_and_epi64(__mmask8 __M, __m512i __W) {
9343  __W = _mm512_mask_mov_epi64(_mm512_set1_epi64(~0ULL), __M, __W);
9344  _mm512_mask_reduce_operator(&);
9345}
9346
9347static __inline__ long long __DEFAULT_FN_ATTRS512
9348_mm512_mask_reduce_or_epi64(__mmask8 __M, __m512i __W) {
9349  __W = _mm512_maskz_mov_epi64(__M, __W);
9350  _mm512_mask_reduce_operator(|);
9351}
9352#undef _mm512_mask_reduce_operator
9353
9354#define _mm512_mask_reduce_operator(op) \
9355  __m256d __t1 = _mm512_extractf64x4_pd(__W, 0); \
9356  __m256d __t2 = _mm512_extractf64x4_pd(__W, 1); \
9357  __m256d __t3 = __t1 op __t2; \
9358  __m128d __t4 = _mm256_extractf128_pd(__t3, 0); \
9359  __m128d __t5 = _mm256_extractf128_pd(__t3, 1); \
9360  __m128d __t6 = __t4 op __t5; \
9361  __m128d __t7 = __builtin_shufflevector(__t6, __t6, 1, 0); \
9362  __m128d __t8 = __t6 op __t7; \
9363  return __t8[0]
9364
9365static __inline__ double __DEFAULT_FN_ATTRS512 _mm512_reduce_add_pd(__m512d __W) {
9366  _mm512_mask_reduce_operator(+);
9367}
9368
9369static __inline__ double __DEFAULT_FN_ATTRS512 _mm512_reduce_mul_pd(__m512d __W) {
9370  _mm512_mask_reduce_operator(*);
9371}
9372
9373static __inline__ double __DEFAULT_FN_ATTRS512
9374_mm512_mask_reduce_add_pd(__mmask8 __M, __m512d __W) {
9375  __W = _mm512_maskz_mov_pd(__M, __W);
9376  _mm512_mask_reduce_operator(+);
9377}
9378
9379static __inline__ double __DEFAULT_FN_ATTRS512
9380_mm512_mask_reduce_mul_pd(__mmask8 __M, __m512d __W) {
9381  __W = _mm512_mask_mov_pd(_mm512_set1_pd(1.0), __M, __W);
9382  _mm512_mask_reduce_operator(*);
9383}
9384#undef _mm512_mask_reduce_operator
9385
9386#define _mm512_mask_reduce_operator(op) \
9387  __v8su __t1 = (__v8su)_mm512_extracti64x4_epi64(__W, 0); \
9388  __v8su __t2 = (__v8su)_mm512_extracti64x4_epi64(__W, 1); \
9389  __m256i __t3 = (__m256i)(__t1 op __t2); \
9390  __v4su __t4 = (__v4su)_mm256_extracti128_si256(__t3, 0); \
9391  __v4su __t5 = (__v4su)_mm256_extracti128_si256(__t3, 1); \
9392  __v4su __t6 = __t4 op __t5; \
9393  __v4su __t7 = __builtin_shufflevector(__t6, __t6, 2, 3, 0, 1); \
9394  __v4su __t8 = __t6 op __t7; \
9395  __v4su __t9 = __builtin_shufflevector(__t8, __t8, 1, 0, 3, 2); \
9396  __v4su __t10 = __t8 op __t9; \
9397  return __t10[0]
9398
9399static __inline__ int __DEFAULT_FN_ATTRS512
9400_mm512_reduce_add_epi32(__m512i __W) {
9401  _mm512_mask_reduce_operator(+);
9402}
9403
9404static __inline__ int __DEFAULT_FN_ATTRS512
9405_mm512_reduce_mul_epi32(__m512i __W) {
9406  _mm512_mask_reduce_operator(*);
9407}
9408
9409static __inline__ int __DEFAULT_FN_ATTRS512
9410_mm512_reduce_and_epi32(__m512i __W) {
9411  _mm512_mask_reduce_operator(&);
9412}
9413
9414static __inline__ int __DEFAULT_FN_ATTRS512
9415_mm512_reduce_or_epi32(__m512i __W) {
9416  _mm512_mask_reduce_operator(|);
9417}
9418
9419static __inline__ int __DEFAULT_FN_ATTRS512
9420_mm512_mask_reduce_add_epi32( __mmask16 __M, __m512i __W) {
9421  __W = _mm512_maskz_mov_epi32(__M, __W);
9422  _mm512_mask_reduce_operator(+);
9423}
9424
9425static __inline__ int __DEFAULT_FN_ATTRS512
9426_mm512_mask_reduce_mul_epi32( __mmask16 __M, __m512i __W) {
9427  __W = _mm512_mask_mov_epi32(_mm512_set1_epi32(1), __M, __W);
9428  _mm512_mask_reduce_operator(*);
9429}
9430
9431static __inline__ int __DEFAULT_FN_ATTRS512
9432_mm512_mask_reduce_and_epi32( __mmask16 __M, __m512i __W) {
9433  __W = _mm512_mask_mov_epi32(_mm512_set1_epi32(~0U), __M, __W);
9434  _mm512_mask_reduce_operator(&);
9435}
9436
9437static __inline__ int __DEFAULT_FN_ATTRS512
9438_mm512_mask_reduce_or_epi32(__mmask16 __M, __m512i __W) {
9439  __W = _mm512_maskz_mov_epi32(__M, __W);
9440  _mm512_mask_reduce_operator(|);
9441}
9442#undef _mm512_mask_reduce_operator
9443
9444#define _mm512_mask_reduce_operator(op) \
9445  __m256 __t1 = (__m256)_mm512_extractf64x4_pd((__m512d)__W, 0); \
9446  __m256 __t2 = (__m256)_mm512_extractf64x4_pd((__m512d)__W, 1); \
9447  __m256 __t3 = __t1 op __t2; \
9448  __m128 __t4 = _mm256_extractf128_ps(__t3, 0); \
9449  __m128 __t5 = _mm256_extractf128_ps(__t3, 1); \
9450  __m128 __t6 = __t4 op __t5; \
9451  __m128 __t7 = __builtin_shufflevector(__t6, __t6, 2, 3, 0, 1); \
9452  __m128 __t8 = __t6 op __t7; \
9453  __m128 __t9 = __builtin_shufflevector(__t8, __t8, 1, 0, 3, 2); \
9454  __m128 __t10 = __t8 op __t9; \
9455  return __t10[0]
9456
9457static __inline__ float __DEFAULT_FN_ATTRS512
9458_mm512_reduce_add_ps(__m512 __W) {
9459  _mm512_mask_reduce_operator(+);
9460}
9461
9462static __inline__ float __DEFAULT_FN_ATTRS512
9463_mm512_reduce_mul_ps(__m512 __W) {
9464  _mm512_mask_reduce_operator(*);
9465}
9466
9467static __inline__ float __DEFAULT_FN_ATTRS512
9468_mm512_mask_reduce_add_ps(__mmask16 __M, __m512 __W) {
9469  __W = _mm512_maskz_mov_ps(__M, __W);
9470  _mm512_mask_reduce_operator(+);
9471}
9472
9473static __inline__ float __DEFAULT_FN_ATTRS512
9474_mm512_mask_reduce_mul_ps(__mmask16 __M, __m512 __W) {
9475  __W = _mm512_mask_mov_ps(_mm512_set1_ps(1.0f), __M, __W);
9476  _mm512_mask_reduce_operator(*);
9477}
9478#undef _mm512_mask_reduce_operator
9479
9480#define _mm512_mask_reduce_operator(op) \
9481  __m512i __t1 = (__m512i)__builtin_shufflevector((__v8di)__V, (__v8di)__V, 4, 5, 6, 7, 0, 1, 2, 3); \
9482  __m512i __t2 = _mm512_##op(__V, __t1); \
9483  __m512i __t3 = (__m512i)__builtin_shufflevector((__v8di)__t2, (__v8di)__t2, 2, 3, 0, 1, 6, 7, 4, 5); \
9484  __m512i __t4 = _mm512_##op(__t2, __t3); \
9485  __m512i __t5 = (__m512i)__builtin_shufflevector((__v8di)__t4, (__v8di)__t4, 1, 0, 3, 2, 5, 4, 7, 6); \
9486  __v8di __t6 = (__v8di)_mm512_##op(__t4, __t5); \
9487  return __t6[0]
9488
9489static __inline__ long long __DEFAULT_FN_ATTRS512
9490_mm512_reduce_max_epi64(__m512i __V) {
9491  _mm512_mask_reduce_operator(max_epi64);
9492}
9493
9494static __inline__ unsigned long long __DEFAULT_FN_ATTRS512
9495_mm512_reduce_max_epu64(__m512i __V) {
9496  _mm512_mask_reduce_operator(max_epu64);
9497}
9498
9499static __inline__ long long __DEFAULT_FN_ATTRS512
9500_mm512_reduce_min_epi64(__m512i __V) {
9501  _mm512_mask_reduce_operator(min_epi64);
9502}
9503
9504static __inline__ unsigned long long __DEFAULT_FN_ATTRS512
9505_mm512_reduce_min_epu64(__m512i __V) {
9506  _mm512_mask_reduce_operator(min_epu64);
9507}
9508
9509static __inline__ long long __DEFAULT_FN_ATTRS512
9510_mm512_mask_reduce_max_epi64(__mmask8 __M, __m512i __V) {
9511  __V = _mm512_mask_mov_epi64(_mm512_set1_epi64(-__LONG_LONG_MAX__ - 1LL), __M, __V);
9512  _mm512_mask_reduce_operator(max_epi64);
9513}
9514
9515static __inline__ unsigned long long __DEFAULT_FN_ATTRS512
9516_mm512_mask_reduce_max_epu64(__mmask8 __M, __m512i __V) {
9517  __V = _mm512_maskz_mov_epi64(__M, __V);
9518  _mm512_mask_reduce_operator(max_epu64);
9519}
9520
9521static __inline__ long long __DEFAULT_FN_ATTRS512
9522_mm512_mask_reduce_min_epi64(__mmask8 __M, __m512i __V) {
9523  __V = _mm512_mask_mov_epi64(_mm512_set1_epi64(__LONG_LONG_MAX__), __M, __V);
9524  _mm512_mask_reduce_operator(min_epi64);
9525}
9526
9527static __inline__ unsigned long long __DEFAULT_FN_ATTRS512
9528_mm512_mask_reduce_min_epu64(__mmask8 __M, __m512i __V) {
9529  __V = _mm512_mask_mov_epi64(_mm512_set1_epi64(~0ULL), __M, __V);
9530  _mm512_mask_reduce_operator(min_epu64);
9531}
9532#undef _mm512_mask_reduce_operator
9533
9534#define _mm512_mask_reduce_operator(op) \
9535  __m256i __t1 = _mm512_extracti64x4_epi64(__V, 0); \
9536  __m256i __t2 = _mm512_extracti64x4_epi64(__V, 1); \
9537  __m256i __t3 = _mm256_##op(__t1, __t2); \
9538  __m128i __t4 = _mm256_extracti128_si256(__t3, 0); \
9539  __m128i __t5 = _mm256_extracti128_si256(__t3, 1); \
9540  __m128i __t6 = _mm_##op(__t4, __t5); \
9541  __m128i __t7 = (__m128i)__builtin_shufflevector((__v4si)__t6, (__v4si)__t6, 2, 3, 0, 1); \
9542  __m128i __t8 = _mm_##op(__t6, __t7); \
9543  __m128i __t9 = (__m128i)__builtin_shufflevector((__v4si)__t8, (__v4si)__t8, 1, 0, 3, 2); \
9544  __v4si __t10 = (__v4si)_mm_##op(__t8, __t9); \
9545  return __t10[0]
9546
9547static __inline__ int __DEFAULT_FN_ATTRS512
9548_mm512_reduce_max_epi32(__m512i __V) {
9549  _mm512_mask_reduce_operator(max_epi32);
9550}
9551
9552static __inline__ unsigned int __DEFAULT_FN_ATTRS512
9553_mm512_reduce_max_epu32(__m512i __V) {
9554  _mm512_mask_reduce_operator(max_epu32);
9555}
9556
9557static __inline__ int __DEFAULT_FN_ATTRS512
9558_mm512_reduce_min_epi32(__m512i __V) {
9559  _mm512_mask_reduce_operator(min_epi32);
9560}
9561
9562static __inline__ unsigned int __DEFAULT_FN_ATTRS512
9563_mm512_reduce_min_epu32(__m512i __V) {
9564  _mm512_mask_reduce_operator(min_epu32);
9565}
9566
9567static __inline__ int __DEFAULT_FN_ATTRS512
9568_mm512_mask_reduce_max_epi32(__mmask16 __M, __m512i __V) {
9569  __V = _mm512_mask_mov_epi32(_mm512_set1_epi32(-__INT_MAX__ - 1), __M, __V);
9570  _mm512_mask_reduce_operator(max_epi32);
9571}
9572
9573static __inline__ unsigned int __DEFAULT_FN_ATTRS512
9574_mm512_mask_reduce_max_epu32(__mmask16 __M, __m512i __V) {
9575  __V = _mm512_maskz_mov_epi32(__M, __V);
9576  _mm512_mask_reduce_operator(max_epu32);
9577}
9578
9579static __inline__ int __DEFAULT_FN_ATTRS512
9580_mm512_mask_reduce_min_epi32(__mmask16 __M, __m512i __V) {
9581  __V = _mm512_mask_mov_epi32(_mm512_set1_epi32(__INT_MAX__), __M, __V);
9582  _mm512_mask_reduce_operator(min_epi32);
9583}
9584
9585static __inline__ unsigned int __DEFAULT_FN_ATTRS512
9586_mm512_mask_reduce_min_epu32(__mmask16 __M, __m512i __V) {
9587  __V = _mm512_mask_mov_epi32(_mm512_set1_epi32(~0U), __M, __V);
9588  _mm512_mask_reduce_operator(min_epu32);
9589}
9590#undef _mm512_mask_reduce_operator
9591
9592#define _mm512_mask_reduce_operator(op) \
9593  __m256d __t1 = _mm512_extractf64x4_pd(__V, 0); \
9594  __m256d __t2 = _mm512_extractf64x4_pd(__V, 1); \
9595  __m256d __t3 = _mm256_##op(__t1, __t2); \
9596  __m128d __t4 = _mm256_extractf128_pd(__t3, 0); \
9597  __m128d __t5 = _mm256_extractf128_pd(__t3, 1); \
9598  __m128d __t6 = _mm_##op(__t4, __t5); \
9599  __m128d __t7 = __builtin_shufflevector(__t6, __t6, 1, 0); \
9600  __m128d __t8 = _mm_##op(__t6, __t7); \
9601  return __t8[0]
9602
9603static __inline__ double __DEFAULT_FN_ATTRS512
9604_mm512_reduce_max_pd(__m512d __V) {
9605  _mm512_mask_reduce_operator(max_pd);
9606}
9607
9608static __inline__ double __DEFAULT_FN_ATTRS512
9609_mm512_reduce_min_pd(__m512d __V) {
9610  _mm512_mask_reduce_operator(min_pd);
9611}
9612
9613static __inline__ double __DEFAULT_FN_ATTRS512
9614_mm512_mask_reduce_max_pd(__mmask8 __M, __m512d __V) {
9615  __V = _mm512_mask_mov_pd(_mm512_set1_pd(-__builtin_inf()), __M, __V);
9616  _mm512_mask_reduce_operator(max_pd);
9617}
9618
9619static __inline__ double __DEFAULT_FN_ATTRS512
9620_mm512_mask_reduce_min_pd(__mmask8 __M, __m512d __V) {
9621  __V = _mm512_mask_mov_pd(_mm512_set1_pd(__builtin_inf()), __M, __V);
9622  _mm512_mask_reduce_operator(min_pd);
9623}
9624#undef _mm512_mask_reduce_operator
9625
9626#define _mm512_mask_reduce_operator(op) \
9627  __m256 __t1 = (__m256)_mm512_extractf64x4_pd((__m512d)__V, 0); \
9628  __m256 __t2 = (__m256)_mm512_extractf64x4_pd((__m512d)__V, 1); \
9629  __m256 __t3 = _mm256_##op(__t1, __t2); \
9630  __m128 __t4 = _mm256_extractf128_ps(__t3, 0); \
9631  __m128 __t5 = _mm256_extractf128_ps(__t3, 1); \
9632  __m128 __t6 = _mm_##op(__t4, __t5); \
9633  __m128 __t7 = __builtin_shufflevector(__t6, __t6, 2, 3, 0, 1); \
9634  __m128 __t8 = _mm_##op(__t6, __t7); \
9635  __m128 __t9 = __builtin_shufflevector(__t8, __t8, 1, 0, 3, 2); \
9636  __m128 __t10 = _mm_##op(__t8, __t9); \
9637  return __t10[0]
9638
9639static __inline__ float __DEFAULT_FN_ATTRS512
9640_mm512_reduce_max_ps(__m512 __V) {
9641  _mm512_mask_reduce_operator(max_ps);
9642}
9643
9644static __inline__ float __DEFAULT_FN_ATTRS512
9645_mm512_reduce_min_ps(__m512 __V) {
9646  _mm512_mask_reduce_operator(min_ps);
9647}
9648
9649static __inline__ float __DEFAULT_FN_ATTRS512
9650_mm512_mask_reduce_max_ps(__mmask16 __M, __m512 __V) {
9651  __V = _mm512_mask_mov_ps(_mm512_set1_ps(-__builtin_inff()), __M, __V);
9652  _mm512_mask_reduce_operator(max_ps);
9653}
9654
9655static __inline__ float __DEFAULT_FN_ATTRS512
9656_mm512_mask_reduce_min_ps(__mmask16 __M, __m512 __V) {
9657  __V = _mm512_mask_mov_ps(_mm512_set1_ps(__builtin_inff()), __M, __V);
9658  _mm512_mask_reduce_operator(min_ps);
9659}
9660#undef _mm512_mask_reduce_operator
9661
9662/// Moves the least significant 32 bits of a vector of [16 x i32] to a
9663///    32-bit signed integer value.
9664///
9665/// \headerfile <x86intrin.h>
9666///
9667/// This intrinsic corresponds to the <c> VMOVD / MOVD </c> instruction.
9668///
9669/// \param __A
9670///    A vector of [16 x i32]. The least significant 32 bits are moved to the
9671///    destination.
9672/// \returns A 32-bit signed integer containing the moved value.
9673static __inline__ int __DEFAULT_FN_ATTRS512
9674_mm512_cvtsi512_si32(__m512i __A) {
9675  __v16si __b = (__v16si)__A;
9676  return __b[0];
9677}
9678
9679#undef __DEFAULT_FN_ATTRS512
9680#undef __DEFAULT_FN_ATTRS128
9681#undef __DEFAULT_FN_ATTRS
9682
9683#endif /* __AVX512FINTRIN_H */
9684