avx512dqintrin.h revision 309124
1/*===---- avx512dqintrin.h - AVX512DQ intrinsics ---------------------------===
2 *
3 * Permission is hereby granted, free of charge, to any person obtaining a copy
4 * of this software and associated documentation files (the "Software"), to deal
5 * in the Software without restriction, including without limitation the rights
6 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7 * copies of the Software, and to permit persons to whom the Software is
8 * furnished to do so, subject to the following conditions:
9 *
10 * The above copyright notice and this permission notice shall be included in
11 * all copies or substantial portions of the Software.
12 *
13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19 * THE SOFTWARE.
20 *
21 *===-----------------------------------------------------------------------===
22 */
23
24#ifndef __IMMINTRIN_H
25#error "Never use <avx512dqintrin.h> directly; include <immintrin.h> instead."
26#endif
27
28#ifndef __AVX512DQINTRIN_H
29#define __AVX512DQINTRIN_H
30
31/* Define the default attributes for the functions in this file. */
32#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512dq")))
33
34static __inline__ __m512i __DEFAULT_FN_ATTRS
35_mm512_mullo_epi64 (__m512i __A, __m512i __B) {
36  return (__m512i) ((__v8du) __A * (__v8du) __B);
37}
38
39static __inline__ __m512i __DEFAULT_FN_ATTRS
40_mm512_mask_mullo_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) {
41  return (__m512i) __builtin_ia32_pmullq512_mask ((__v8di) __A,
42              (__v8di) __B,
43              (__v8di) __W,
44              (__mmask8) __U);
45}
46
47static __inline__ __m512i __DEFAULT_FN_ATTRS
48_mm512_maskz_mullo_epi64 (__mmask8 __U, __m512i __A, __m512i __B) {
49  return (__m512i) __builtin_ia32_pmullq512_mask ((__v8di) __A,
50              (__v8di) __B,
51              (__v8di)
52              _mm512_setzero_si512 (),
53              (__mmask8) __U);
54}
55
56static __inline__ __m512d __DEFAULT_FN_ATTRS
57_mm512_xor_pd (__m512d __A, __m512d __B) {
58  return (__m512d) ((__v8du) __A ^ (__v8du) __B);
59}
60
61static __inline__ __m512d __DEFAULT_FN_ATTRS
62_mm512_mask_xor_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
63  return (__m512d) __builtin_ia32_xorpd512_mask ((__v8df) __A,
64             (__v8df) __B,
65             (__v8df) __W,
66             (__mmask8) __U);
67}
68
69static __inline__ __m512d __DEFAULT_FN_ATTRS
70_mm512_maskz_xor_pd (__mmask8 __U, __m512d __A, __m512d __B) {
71  return (__m512d) __builtin_ia32_xorpd512_mask ((__v8df) __A,
72             (__v8df) __B,
73             (__v8df)
74             _mm512_setzero_pd (),
75             (__mmask8) __U);
76}
77
78static __inline__ __m512 __DEFAULT_FN_ATTRS
79_mm512_xor_ps (__m512 __A, __m512 __B) {
80  return (__m512) ((__v16su) __A ^ (__v16su) __B);
81}
82
83static __inline__ __m512 __DEFAULT_FN_ATTRS
84_mm512_mask_xor_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
85  return (__m512) __builtin_ia32_xorps512_mask ((__v16sf) __A,
86            (__v16sf) __B,
87            (__v16sf) __W,
88            (__mmask16) __U);
89}
90
91static __inline__ __m512 __DEFAULT_FN_ATTRS
92_mm512_maskz_xor_ps (__mmask16 __U, __m512 __A, __m512 __B) {
93  return (__m512) __builtin_ia32_xorps512_mask ((__v16sf) __A,
94            (__v16sf) __B,
95            (__v16sf)
96            _mm512_setzero_ps (),
97            (__mmask16) __U);
98}
99
100static __inline__ __m512d __DEFAULT_FN_ATTRS
101_mm512_or_pd (__m512d __A, __m512d __B) {
102  return (__m512d) ((__v8du) __A | (__v8du) __B);
103}
104
105static __inline__ __m512d __DEFAULT_FN_ATTRS
106_mm512_mask_or_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
107  return (__m512d) __builtin_ia32_orpd512_mask ((__v8df) __A,
108            (__v8df) __B,
109            (__v8df) __W,
110            (__mmask8) __U);
111}
112
113static __inline__ __m512d __DEFAULT_FN_ATTRS
114_mm512_maskz_or_pd (__mmask8 __U, __m512d __A, __m512d __B) {
115  return (__m512d) __builtin_ia32_orpd512_mask ((__v8df) __A,
116            (__v8df) __B,
117            (__v8df)
118            _mm512_setzero_pd (),
119            (__mmask8) __U);
120}
121
122static __inline__ __m512 __DEFAULT_FN_ATTRS
123_mm512_or_ps (__m512 __A, __m512 __B) {
124  return (__m512) ((__v16su) __A | (__v16su) __B);
125}
126
127static __inline__ __m512 __DEFAULT_FN_ATTRS
128_mm512_mask_or_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
129  return (__m512) __builtin_ia32_orps512_mask ((__v16sf) __A,
130                 (__v16sf) __B,
131                 (__v16sf) __W,
132                 (__mmask16) __U);
133}
134
135static __inline__ __m512 __DEFAULT_FN_ATTRS
136_mm512_maskz_or_ps (__mmask16 __U, __m512 __A, __m512 __B) {
137  return (__m512) __builtin_ia32_orps512_mask ((__v16sf) __A,
138                 (__v16sf) __B,
139                 (__v16sf)
140                 _mm512_setzero_ps (),
141                 (__mmask16) __U);
142}
143
144static __inline__ __m512d __DEFAULT_FN_ATTRS
145_mm512_and_pd (__m512d __A, __m512d __B) {
146  return (__m512d) ((__v8du) __A & (__v8du) __B);
147}
148
149static __inline__ __m512d __DEFAULT_FN_ATTRS
150_mm512_mask_and_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
151  return (__m512d) __builtin_ia32_andpd512_mask ((__v8df) __A,
152             (__v8df) __B,
153             (__v8df) __W,
154             (__mmask8) __U);
155}
156
157static __inline__ __m512d __DEFAULT_FN_ATTRS
158_mm512_maskz_and_pd (__mmask8 __U, __m512d __A, __m512d __B) {
159  return (__m512d) __builtin_ia32_andpd512_mask ((__v8df) __A,
160             (__v8df) __B,
161             (__v8df)
162             _mm512_setzero_pd (),
163             (__mmask8) __U);
164}
165
166static __inline__ __m512 __DEFAULT_FN_ATTRS
167_mm512_and_ps (__m512 __A, __m512 __B) {
168  return (__m512) ((__v16su) __A & (__v16su) __B);
169}
170
171static __inline__ __m512 __DEFAULT_FN_ATTRS
172_mm512_mask_and_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
173  return (__m512) __builtin_ia32_andps512_mask ((__v16sf) __A,
174            (__v16sf) __B,
175            (__v16sf) __W,
176            (__mmask16) __U);
177}
178
179static __inline__ __m512 __DEFAULT_FN_ATTRS
180_mm512_maskz_and_ps (__mmask16 __U, __m512 __A, __m512 __B) {
181  return (__m512) __builtin_ia32_andps512_mask ((__v16sf) __A,
182            (__v16sf) __B,
183            (__v16sf)
184            _mm512_setzero_ps (),
185            (__mmask16) __U);
186}
187
188static __inline__ __m512d __DEFAULT_FN_ATTRS
189_mm512_andnot_pd (__m512d __A, __m512d __B) {
190  return (__m512d) __builtin_ia32_andnpd512_mask ((__v8df) __A,
191              (__v8df) __B,
192              (__v8df)
193              _mm512_setzero_pd (),
194              (__mmask8) -1);
195}
196
197static __inline__ __m512d __DEFAULT_FN_ATTRS
198_mm512_mask_andnot_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
199  return (__m512d) __builtin_ia32_andnpd512_mask ((__v8df) __A,
200              (__v8df) __B,
201              (__v8df) __W,
202              (__mmask8) __U);
203}
204
205static __inline__ __m512d __DEFAULT_FN_ATTRS
206_mm512_maskz_andnot_pd (__mmask8 __U, __m512d __A, __m512d __B) {
207  return (__m512d) __builtin_ia32_andnpd512_mask ((__v8df) __A,
208              (__v8df) __B,
209              (__v8df)
210              _mm512_setzero_pd (),
211              (__mmask8) __U);
212}
213
214static __inline__ __m512 __DEFAULT_FN_ATTRS
215_mm512_andnot_ps (__m512 __A, __m512 __B) {
216  return (__m512) __builtin_ia32_andnps512_mask ((__v16sf) __A,
217             (__v16sf) __B,
218             (__v16sf)
219             _mm512_setzero_ps (),
220             (__mmask16) -1);
221}
222
223static __inline__ __m512 __DEFAULT_FN_ATTRS
224_mm512_mask_andnot_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
225  return (__m512) __builtin_ia32_andnps512_mask ((__v16sf) __A,
226             (__v16sf) __B,
227             (__v16sf) __W,
228             (__mmask16) __U);
229}
230
231static __inline__ __m512 __DEFAULT_FN_ATTRS
232_mm512_maskz_andnot_ps (__mmask16 __U, __m512 __A, __m512 __B) {
233  return (__m512) __builtin_ia32_andnps512_mask ((__v16sf) __A,
234             (__v16sf) __B,
235             (__v16sf)
236             _mm512_setzero_ps (),
237             (__mmask16) __U);
238}
239
240static __inline__ __m512i __DEFAULT_FN_ATTRS
241_mm512_cvtpd_epi64 (__m512d __A) {
242  return (__m512i) __builtin_ia32_cvtpd2qq512_mask ((__v8df) __A,
243                (__v8di) _mm512_setzero_si512(),
244                (__mmask8) -1,
245                _MM_FROUND_CUR_DIRECTION);
246}
247
248static __inline__ __m512i __DEFAULT_FN_ATTRS
249_mm512_mask_cvtpd_epi64 (__m512i __W, __mmask8 __U, __m512d __A) {
250  return (__m512i) __builtin_ia32_cvtpd2qq512_mask ((__v8df) __A,
251                (__v8di) __W,
252                (__mmask8) __U,
253                _MM_FROUND_CUR_DIRECTION);
254}
255
256static __inline__ __m512i __DEFAULT_FN_ATTRS
257_mm512_maskz_cvtpd_epi64 (__mmask8 __U, __m512d __A) {
258  return (__m512i) __builtin_ia32_cvtpd2qq512_mask ((__v8df) __A,
259                (__v8di) _mm512_setzero_si512(),
260                (__mmask8) __U,
261                _MM_FROUND_CUR_DIRECTION);
262}
263
264#define _mm512_cvt_roundpd_epi64(A, R) __extension__ ({              \
265  (__m512i)__builtin_ia32_cvtpd2qq512_mask((__v8df)(__m512d)(A), \
266                                           (__v8di)_mm512_setzero_si512(), \
267                                           (__mmask8)-1, (int)(R)); })
268
269#define _mm512_mask_cvt_roundpd_epi64(W, U, A, R) __extension__ ({ \
270  (__m512i)__builtin_ia32_cvtpd2qq512_mask((__v8df)(__m512d)(A), \
271                                           (__v8di)(__m512i)(W), \
272                                           (__mmask8)(U), (int)(R)); })
273
274#define _mm512_maskz_cvt_roundpd_epi64(U, A, R) __extension__ ({   \
275  (__m512i)__builtin_ia32_cvtpd2qq512_mask((__v8df)(__m512d)(A), \
276                                           (__v8di)_mm512_setzero_si512(), \
277                                           (__mmask8)(U), (int)(R)); })
278
279static __inline__ __m512i __DEFAULT_FN_ATTRS
280_mm512_cvtpd_epu64 (__m512d __A) {
281  return (__m512i) __builtin_ia32_cvtpd2uqq512_mask ((__v8df) __A,
282                 (__v8di) _mm512_setzero_si512(),
283                 (__mmask8) -1,
284                 _MM_FROUND_CUR_DIRECTION);
285}
286
287static __inline__ __m512i __DEFAULT_FN_ATTRS
288_mm512_mask_cvtpd_epu64 (__m512i __W, __mmask8 __U, __m512d __A) {
289  return (__m512i) __builtin_ia32_cvtpd2uqq512_mask ((__v8df) __A,
290                 (__v8di) __W,
291                 (__mmask8) __U,
292                 _MM_FROUND_CUR_DIRECTION);
293}
294
295static __inline__ __m512i __DEFAULT_FN_ATTRS
296_mm512_maskz_cvtpd_epu64 (__mmask8 __U, __m512d __A) {
297  return (__m512i) __builtin_ia32_cvtpd2uqq512_mask ((__v8df) __A,
298                 (__v8di) _mm512_setzero_si512(),
299                 (__mmask8) __U,
300                 _MM_FROUND_CUR_DIRECTION);
301}
302
303#define _mm512_cvt_roundpd_epu64(A, R) __extension__ ({               \
304  (__m512i)__builtin_ia32_cvtpd2uqq512_mask((__v8df)(__m512d)(A), \
305                                            (__v8di)_mm512_setzero_si512(), \
306                                            (__mmask8)-1, (int)(R)); })
307
308#define _mm512_mask_cvt_roundpd_epu64(W, U, A, R) __extension__ ({ \
309  (__m512i)__builtin_ia32_cvtpd2uqq512_mask((__v8df)(__m512d)(A), \
310                                            (__v8di)(__m512i)(W), \
311                                            (__mmask8)(U), (int)(R)); })
312
313#define _mm512_maskz_cvt_roundpd_epu64(U, A, R) __extension__ ({     \
314  (__m512i)__builtin_ia32_cvtpd2uqq512_mask((__v8df)(__m512d)(A), \
315                                            (__v8di)_mm512_setzero_si512(), \
316                                            (__mmask8)(U), (int)(R)); })
317
318static __inline__ __m512i __DEFAULT_FN_ATTRS
319_mm512_cvtps_epi64 (__m256 __A) {
320  return (__m512i) __builtin_ia32_cvtps2qq512_mask ((__v8sf) __A,
321                (__v8di) _mm512_setzero_si512(),
322                (__mmask8) -1,
323                _MM_FROUND_CUR_DIRECTION);
324}
325
326static __inline__ __m512i __DEFAULT_FN_ATTRS
327_mm512_mask_cvtps_epi64 (__m512i __W, __mmask8 __U, __m256 __A) {
328  return (__m512i) __builtin_ia32_cvtps2qq512_mask ((__v8sf) __A,
329                (__v8di) __W,
330                (__mmask8) __U,
331                _MM_FROUND_CUR_DIRECTION);
332}
333
334static __inline__ __m512i __DEFAULT_FN_ATTRS
335_mm512_maskz_cvtps_epi64 (__mmask8 __U, __m256 __A) {
336  return (__m512i) __builtin_ia32_cvtps2qq512_mask ((__v8sf) __A,
337                (__v8di) _mm512_setzero_si512(),
338                (__mmask8) __U,
339                _MM_FROUND_CUR_DIRECTION);
340}
341
342#define _mm512_cvt_roundps_epi64(A, R) __extension__ ({             \
343  (__m512i)__builtin_ia32_cvtps2qq512_mask((__v8sf)(__m256)(A), \
344                                           (__v8di)_mm512_setzero_si512(), \
345                                           (__mmask8)-1, (int)(R)); })
346
347#define _mm512_mask_cvt_roundps_epi64(W, U, A, R) __extension__ ({ \
348  (__m512i)__builtin_ia32_cvtps2qq512_mask((__v8sf)(__m256)(A), \
349                                           (__v8di)(__m512i)(W), \
350                                           (__mmask8)(U), (int)(R)); })
351
352#define _mm512_maskz_cvt_roundps_epi64(U, A, R) __extension__ ({   \
353  (__m512i)__builtin_ia32_cvtps2qq512_mask((__v8sf)(__m256)(A), \
354                                           (__v8di)_mm512_setzero_si512(), \
355                                           (__mmask8)(U), (int)(R)); })
356
357static __inline__ __m512i __DEFAULT_FN_ATTRS
358_mm512_cvtps_epu64 (__m256 __A) {
359  return (__m512i) __builtin_ia32_cvtps2uqq512_mask ((__v8sf) __A,
360                 (__v8di) _mm512_setzero_si512(),
361                 (__mmask8) -1,
362                 _MM_FROUND_CUR_DIRECTION);
363}
364
365static __inline__ __m512i __DEFAULT_FN_ATTRS
366_mm512_mask_cvtps_epu64 (__m512i __W, __mmask8 __U, __m256 __A) {
367  return (__m512i) __builtin_ia32_cvtps2uqq512_mask ((__v8sf) __A,
368                 (__v8di) __W,
369                 (__mmask8) __U,
370                 _MM_FROUND_CUR_DIRECTION);
371}
372
373static __inline__ __m512i __DEFAULT_FN_ATTRS
374_mm512_maskz_cvtps_epu64 (__mmask8 __U, __m256 __A) {
375  return (__m512i) __builtin_ia32_cvtps2uqq512_mask ((__v8sf) __A,
376                 (__v8di) _mm512_setzero_si512(),
377                 (__mmask8) __U,
378                 _MM_FROUND_CUR_DIRECTION);
379}
380
381#define _mm512_cvt_roundps_epu64(A, R) __extension__ ({              \
382  (__m512i)__builtin_ia32_cvtps2uqq512_mask((__v8sf)(__m256)(A), \
383                                            (__v8di)_mm512_setzero_si512(), \
384                                            (__mmask8)-1, (int)(R)); })
385
386#define _mm512_mask_cvt_roundps_epu64(W, U, A, R) __extension__ ({ \
387  (__m512i)__builtin_ia32_cvtps2uqq512_mask((__v8sf)(__m256)(A), \
388                                            (__v8di)(__m512i)(W), \
389                                            (__mmask8)(U), (int)(R)); })
390
391#define _mm512_maskz_cvt_roundps_epu64(U, A, R) __extension__ ({   \
392  (__m512i)__builtin_ia32_cvtps2uqq512_mask((__v8sf)(__m256)(A), \
393                                            (__v8di)_mm512_setzero_si512(), \
394                                            (__mmask8)(U), (int)(R)); })
395
396
397static __inline__ __m512d __DEFAULT_FN_ATTRS
398_mm512_cvtepi64_pd (__m512i __A) {
399  return (__m512d) __builtin_ia32_cvtqq2pd512_mask ((__v8di) __A,
400                (__v8df) _mm512_setzero_pd(),
401                (__mmask8) -1,
402                _MM_FROUND_CUR_DIRECTION);
403}
404
405static __inline__ __m512d __DEFAULT_FN_ATTRS
406_mm512_mask_cvtepi64_pd (__m512d __W, __mmask8 __U, __m512i __A) {
407  return (__m512d) __builtin_ia32_cvtqq2pd512_mask ((__v8di) __A,
408                (__v8df) __W,
409                (__mmask8) __U,
410                _MM_FROUND_CUR_DIRECTION);
411}
412
413static __inline__ __m512d __DEFAULT_FN_ATTRS
414_mm512_maskz_cvtepi64_pd (__mmask8 __U, __m512i __A) {
415  return (__m512d) __builtin_ia32_cvtqq2pd512_mask ((__v8di) __A,
416                (__v8df) _mm512_setzero_pd(),
417                (__mmask8) __U,
418                _MM_FROUND_CUR_DIRECTION);
419}
420
421#define _mm512_cvt_roundepi64_pd(A, R) __extension__ ({          \
422  (__m512d)__builtin_ia32_cvtqq2pd512_mask((__v8di)(__m512i)(A), \
423                                           (__v8df)_mm512_setzero_pd(), \
424                                           (__mmask8)-1, (int)(R)); })
425
426#define _mm512_mask_cvt_roundepi64_pd(W, U, A, R) __extension__ ({ \
427  (__m512d)__builtin_ia32_cvtqq2pd512_mask((__v8di)(__m512i)(A), \
428                                           (__v8df)(__m512d)(W), \
429                                           (__mmask8)(U), (int)(R)); })
430
431#define _mm512_maskz_cvt_roundepi64_pd(U, A, R) __extension__ ({ \
432  (__m512d)__builtin_ia32_cvtqq2pd512_mask((__v8di)(__m512i)(A), \
433                                           (__v8df)_mm512_setzero_pd(), \
434                                           (__mmask8)(U), (int)(R)); })
435
436static __inline__ __m256 __DEFAULT_FN_ATTRS
437_mm512_cvtepi64_ps (__m512i __A) {
438  return (__m256) __builtin_ia32_cvtqq2ps512_mask ((__v8di) __A,
439               (__v8sf) _mm256_setzero_ps(),
440               (__mmask8) -1,
441               _MM_FROUND_CUR_DIRECTION);
442}
443
444static __inline__ __m256 __DEFAULT_FN_ATTRS
445_mm512_mask_cvtepi64_ps (__m256 __W, __mmask8 __U, __m512i __A) {
446  return (__m256) __builtin_ia32_cvtqq2ps512_mask ((__v8di) __A,
447               (__v8sf) __W,
448               (__mmask8) __U,
449               _MM_FROUND_CUR_DIRECTION);
450}
451
452static __inline__ __m256 __DEFAULT_FN_ATTRS
453_mm512_maskz_cvtepi64_ps (__mmask8 __U, __m512i __A) {
454  return (__m256) __builtin_ia32_cvtqq2ps512_mask ((__v8di) __A,
455               (__v8sf) _mm256_setzero_ps(),
456               (__mmask8) __U,
457               _MM_FROUND_CUR_DIRECTION);
458}
459
460#define _mm512_cvt_roundepi64_ps(A, R) __extension__ ({        \
461  (__m256)__builtin_ia32_cvtqq2ps512_mask((__v8di)(__m512i)(A), \
462                                          (__v8sf)_mm256_setzero_ps(), \
463                                          (__mmask8)-1, (int)(R)); })
464
465#define _mm512_mask_cvt_roundepi64_ps(W, U, A, R) __extension__ ({ \
466  (__m256)__builtin_ia32_cvtqq2ps512_mask((__v8di)(__m512i)(A), \
467                                          (__v8sf)(__m256)(W), (__mmask8)(U), \
468                                          (int)(R)); })
469
470#define _mm512_maskz_cvt_roundepi64_ps(U, A, R) __extension__ ({ \
471  (__m256)__builtin_ia32_cvtqq2ps512_mask((__v8di)(__m512i)(A), \
472                                          (__v8sf)_mm256_setzero_ps(), \
473                                          (__mmask8)(U), (int)(R)); })
474
475
476static __inline__ __m512i __DEFAULT_FN_ATTRS
477_mm512_cvttpd_epi64 (__m512d __A) {
478  return (__m512i) __builtin_ia32_cvttpd2qq512_mask ((__v8df) __A,
479                 (__v8di) _mm512_setzero_si512(),
480                 (__mmask8) -1,
481                 _MM_FROUND_CUR_DIRECTION);
482}
483
484static __inline__ __m512i __DEFAULT_FN_ATTRS
485_mm512_mask_cvttpd_epi64 (__m512i __W, __mmask8 __U, __m512d __A) {
486  return (__m512i) __builtin_ia32_cvttpd2qq512_mask ((__v8df) __A,
487                 (__v8di) __W,
488                 (__mmask8) __U,
489                 _MM_FROUND_CUR_DIRECTION);
490}
491
492static __inline__ __m512i __DEFAULT_FN_ATTRS
493_mm512_maskz_cvttpd_epi64 (__mmask8 __U, __m512d __A) {
494  return (__m512i) __builtin_ia32_cvttpd2qq512_mask ((__v8df) __A,
495                 (__v8di) _mm512_setzero_si512(),
496                 (__mmask8) __U,
497                 _MM_FROUND_CUR_DIRECTION);
498}
499
500#define _mm512_cvtt_roundpd_epi64(A, R) __extension__ ({             \
501  (__m512i)__builtin_ia32_cvttpd2qq512_mask((__v8df)(__m512d)(A), \
502                                            (__v8di)_mm512_setzero_si512(), \
503                                            (__mmask8)-1, (int)(R)); })
504
505#define _mm512_mask_cvtt_roundpd_epi64(W, U, A, R) __extension__ ({ \
506  (__m512i)__builtin_ia32_cvttpd2qq512_mask((__v8df)(__m512d)(A), \
507                                            (__v8di)(__m512i)(W), \
508                                            (__mmask8)(U), (int)(R)); })
509
510#define _mm512_maskz_cvtt_roundpd_epi64(U, A, R) __extension__ ({ \
511  (__m512i)__builtin_ia32_cvttpd2qq512_mask((__v8df)(__m512d)(A), \
512                                            (__v8di)_mm512_setzero_si512(), \
513                                            (__mmask8)(U), (int)(R)); })
514
515static __inline__ __m512i __DEFAULT_FN_ATTRS
516_mm512_cvttpd_epu64 (__m512d __A) {
517  return (__m512i) __builtin_ia32_cvttpd2uqq512_mask ((__v8df) __A,
518                  (__v8di) _mm512_setzero_si512(),
519                  (__mmask8) -1,
520                  _MM_FROUND_CUR_DIRECTION);
521}
522
523static __inline__ __m512i __DEFAULT_FN_ATTRS
524_mm512_mask_cvttpd_epu64 (__m512i __W, __mmask8 __U, __m512d __A) {
525  return (__m512i) __builtin_ia32_cvttpd2uqq512_mask ((__v8df) __A,
526                  (__v8di) __W,
527                  (__mmask8) __U,
528                  _MM_FROUND_CUR_DIRECTION);
529}
530
531static __inline__ __m512i __DEFAULT_FN_ATTRS
532_mm512_maskz_cvttpd_epu64 (__mmask8 __U, __m512d __A) {
533  return (__m512i) __builtin_ia32_cvttpd2uqq512_mask ((__v8df) __A,
534                  (__v8di) _mm512_setzero_si512(),
535                  (__mmask8) __U,
536                  _MM_FROUND_CUR_DIRECTION);
537}
538
539#define _mm512_cvtt_roundpd_epu64(A, R) __extension__ ({              \
540  (__m512i)__builtin_ia32_cvttpd2uqq512_mask((__v8df)(__m512d)(A), \
541                                             (__v8di)_mm512_setzero_si512(), \
542                                             (__mmask8)-1, (int)(R)); })
543
544#define _mm512_mask_cvtt_roundpd_epu64(W, U, A, R) __extension__ ({ \
545  (__m512i)__builtin_ia32_cvttpd2uqq512_mask((__v8df)(__m512d)(A), \
546                                             (__v8di)(__m512i)(W), \
547                                             (__mmask8)(U), (int)(R)); })
548
549#define _mm512_maskz_cvtt_roundpd_epu64(U, A, R) __extension__ ({   \
550  (__m512i)__builtin_ia32_cvttpd2uqq512_mask((__v8df)(__m512d)(A), \
551                                             (__v8di)_mm512_setzero_si512(), \
552                                             (__mmask8)(U), (int)(R)); })
553
554static __inline__ __m512i __DEFAULT_FN_ATTRS
555_mm512_cvttps_epi64 (__m256 __A) {
556  return (__m512i) __builtin_ia32_cvttps2qq512_mask ((__v8sf) __A,
557                 (__v8di) _mm512_setzero_si512(),
558                 (__mmask8) -1,
559                 _MM_FROUND_CUR_DIRECTION);
560}
561
562static __inline__ __m512i __DEFAULT_FN_ATTRS
563_mm512_mask_cvttps_epi64 (__m512i __W, __mmask8 __U, __m256 __A) {
564  return (__m512i) __builtin_ia32_cvttps2qq512_mask ((__v8sf) __A,
565                 (__v8di) __W,
566                 (__mmask8) __U,
567                 _MM_FROUND_CUR_DIRECTION);
568}
569
570static __inline__ __m512i __DEFAULT_FN_ATTRS
571_mm512_maskz_cvttps_epi64 (__mmask8 __U, __m256 __A) {
572  return (__m512i) __builtin_ia32_cvttps2qq512_mask ((__v8sf) __A,
573                 (__v8di) _mm512_setzero_si512(),
574                 (__mmask8) __U,
575                 _MM_FROUND_CUR_DIRECTION);
576}
577
578#define _mm512_cvtt_roundps_epi64(A, R) __extension__ ({            \
579  (__m512i)__builtin_ia32_cvttps2qq512_mask((__v8sf)(__m256)(A), \
580                                            (__v8di)_mm512_setzero_si512(), \
581                                            (__mmask8)-1, (int)(R)); })
582
583#define _mm512_mask_cvtt_roundps_epi64(W, U, A, R) __extension__ ({ \
584  (__m512i)__builtin_ia32_cvttps2qq512_mask((__v8sf)(__m256)(A), \
585                                            (__v8di)(__m512i)(W), \
586                                            (__mmask8)(U), (int)(R)); })
587
588#define _mm512_maskz_cvtt_roundps_epi64(U, A, R) __extension__ ({  \
589  (__m512i)__builtin_ia32_cvttps2qq512_mask((__v8sf)(__m256)(A), \
590                                            (__v8di)_mm512_setzero_si512(), \
591                                            (__mmask8)(U), (int)(R)); })
592
593static __inline__ __m512i __DEFAULT_FN_ATTRS
594_mm512_cvttps_epu64 (__m256 __A) {
595  return (__m512i) __builtin_ia32_cvttps2uqq512_mask ((__v8sf) __A,
596                  (__v8di) _mm512_setzero_si512(),
597                  (__mmask8) -1,
598                  _MM_FROUND_CUR_DIRECTION);
599}
600
601static __inline__ __m512i __DEFAULT_FN_ATTRS
602_mm512_mask_cvttps_epu64 (__m512i __W, __mmask8 __U, __m256 __A) {
603  return (__m512i) __builtin_ia32_cvttps2uqq512_mask ((__v8sf) __A,
604                  (__v8di) __W,
605                  (__mmask8) __U,
606                  _MM_FROUND_CUR_DIRECTION);
607}
608
609static __inline__ __m512i __DEFAULT_FN_ATTRS
610_mm512_maskz_cvttps_epu64 (__mmask8 __U, __m256 __A) {
611  return (__m512i) __builtin_ia32_cvttps2uqq512_mask ((__v8sf) __A,
612                  (__v8di) _mm512_setzero_si512(),
613                  (__mmask8) __U,
614                  _MM_FROUND_CUR_DIRECTION);
615}
616
617#define _mm512_cvtt_roundps_epu64(A, R) __extension__ ({            \
618  (__m512i)__builtin_ia32_cvttps2uqq512_mask((__v8sf)(__m256)(A), \
619                                             (__v8di)_mm512_setzero_si512(), \
620                                             (__mmask8)-1, (int)(R)); })
621
622#define _mm512_mask_cvtt_roundps_epu64(W, U, A, R) __extension__ ({ \
623  (__m512i)__builtin_ia32_cvttps2uqq512_mask((__v8sf)(__m256)(A), \
624                                             (__v8di)(__m512i)(W), \
625                                             (__mmask8)(U), (int)(R)); })
626
627#define _mm512_maskz_cvtt_roundps_epu64(U, A, R) __extension__ ({  \
628  (__m512i)__builtin_ia32_cvttps2uqq512_mask((__v8sf)(__m256)(A), \
629                                             (__v8di)_mm512_setzero_si512(), \
630                                             (__mmask8)(U), (int)(R)); })
631
632static __inline__ __m512d __DEFAULT_FN_ATTRS
633_mm512_cvtepu64_pd (__m512i __A) {
634  return (__m512d) __builtin_ia32_cvtuqq2pd512_mask ((__v8di) __A,
635                 (__v8df) _mm512_setzero_pd(),
636                 (__mmask8) -1,
637                 _MM_FROUND_CUR_DIRECTION);
638}
639
640static __inline__ __m512d __DEFAULT_FN_ATTRS
641_mm512_mask_cvtepu64_pd (__m512d __W, __mmask8 __U, __m512i __A) {
642  return (__m512d) __builtin_ia32_cvtuqq2pd512_mask ((__v8di) __A,
643                 (__v8df) __W,
644                 (__mmask8) __U,
645                 _MM_FROUND_CUR_DIRECTION);
646}
647
648static __inline__ __m512d __DEFAULT_FN_ATTRS
649_mm512_maskz_cvtepu64_pd (__mmask8 __U, __m512i __A) {
650  return (__m512d) __builtin_ia32_cvtuqq2pd512_mask ((__v8di) __A,
651                 (__v8df) _mm512_setzero_pd(),
652                 (__mmask8) __U,
653                 _MM_FROUND_CUR_DIRECTION);
654}
655
656#define _mm512_cvt_roundepu64_pd(A, R) __extension__ ({          \
657  (__m512d)__builtin_ia32_cvtuqq2pd512_mask((__v8di)(__m512i)(A), \
658                                            (__v8df)_mm512_setzero_pd(), \
659                                            (__mmask8)-1, (int)(R)); })
660
661#define _mm512_mask_cvt_roundepu64_pd(W, U, A, R) __extension__ ({ \
662  (__m512d)__builtin_ia32_cvtuqq2pd512_mask((__v8di)(__m512i)(A), \
663                                            (__v8df)(__m512d)(W), \
664                                            (__mmask8)(U), (int)(R)); })
665
666
667#define _mm512_maskz_cvt_roundepu64_pd(U, A, R) __extension__ ({ \
668  (__m512d)__builtin_ia32_cvtuqq2pd512_mask((__v8di)(__m512i)(A), \
669                                            (__v8df)_mm512_setzero_pd(), \
670                                            (__mmask8)(U), (int)(R)); })
671
672
673static __inline__ __m256 __DEFAULT_FN_ATTRS
674_mm512_cvtepu64_ps (__m512i __A) {
675  return (__m256) __builtin_ia32_cvtuqq2ps512_mask ((__v8di) __A,
676                (__v8sf) _mm256_setzero_ps(),
677                (__mmask8) -1,
678                _MM_FROUND_CUR_DIRECTION);
679}
680
681static __inline__ __m256 __DEFAULT_FN_ATTRS
682_mm512_mask_cvtepu64_ps (__m256 __W, __mmask8 __U, __m512i __A) {
683  return (__m256) __builtin_ia32_cvtuqq2ps512_mask ((__v8di) __A,
684                (__v8sf) __W,
685                (__mmask8) __U,
686                _MM_FROUND_CUR_DIRECTION);
687}
688
689static __inline__ __m256 __DEFAULT_FN_ATTRS
690_mm512_maskz_cvtepu64_ps (__mmask8 __U, __m512i __A) {
691  return (__m256) __builtin_ia32_cvtuqq2ps512_mask ((__v8di) __A,
692                (__v8sf) _mm256_setzero_ps(),
693                (__mmask8) __U,
694                _MM_FROUND_CUR_DIRECTION);
695}
696
697#define _mm512_cvt_roundepu64_ps(A, R) __extension__ ({         \
698  (__m256)__builtin_ia32_cvtuqq2ps512_mask((__v8di)(__m512i)(A), \
699                                           (__v8sf)_mm256_setzero_ps(), \
700                                           (__mmask8)-1, (int)(R)); })
701
702#define _mm512_mask_cvt_roundepu64_ps(W, U, A, R) __extension__ ({ \
703  (__m256)__builtin_ia32_cvtuqq2ps512_mask((__v8di)(__m512i)(A), \
704                                           (__v8sf)(__m256)(W), (__mmask8)(U), \
705                                           (int)(R)); })
706
707#define _mm512_maskz_cvt_roundepu64_ps(U, A, R) __extension__ ({ \
708  (__m256)__builtin_ia32_cvtuqq2ps512_mask((__v8di)(__m512i)(A), \
709                                           (__v8sf)_mm256_setzero_ps(), \
710                                           (__mmask8)(U), (int)(R)); })
711
712#define _mm512_range_pd(A, B, C) __extension__ ({                     \
713  (__m512d)__builtin_ia32_rangepd512_mask((__v8df)(__m512d)(A), \
714                                          (__v8df)(__m512d)(B), (int)(C), \
715                                          (__v8df)_mm512_setzero_pd(), \
716                                          (__mmask8)-1, \
717                                          _MM_FROUND_CUR_DIRECTION); })
718
719#define _mm512_mask_range_pd(W, U, A, B, C) __extension__ ({      \
720  (__m512d)__builtin_ia32_rangepd512_mask((__v8df)(__m512d)(A), \
721                                          (__v8df)(__m512d)(B), (int)(C), \
722                                          (__v8df)(__m512d)(W), (__mmask8)(U), \
723                                          _MM_FROUND_CUR_DIRECTION); })
724
725#define _mm512_maskz_range_pd(U, A, B, C) __extension__ ({           \
726  (__m512d)__builtin_ia32_rangepd512_mask((__v8df)(__m512d)(A), \
727                                          (__v8df)(__m512d)(B), (int)(C), \
728                                          (__v8df)_mm512_setzero_pd(), \
729                                          (__mmask8)(U), \
730                                          _MM_FROUND_CUR_DIRECTION); })
731
732#define _mm512_range_round_pd(A, B, C, R) __extension__ ({           \
733  (__m512d)__builtin_ia32_rangepd512_mask((__v8df)(__m512d)(A), \
734                                          (__v8df)(__m512d)(B), (int)(C), \
735                                          (__v8df)_mm512_setzero_pd(), \
736                                          (__mmask8)-1, (int)(R)); })
737
738#define _mm512_mask_range_round_pd(W, U, A, B, C, R) __extension__ ({ \
739  (__m512d)__builtin_ia32_rangepd512_mask((__v8df)(__m512d)(A), \
740                                          (__v8df)(__m512d)(B), (int)(C), \
741                                          (__v8df)(__m512d)(W), (__mmask8)(U), \
742                                          (int)(R)); })
743
744#define _mm512_maskz_range_round_pd(U, A, B, C, R) __extension__ ({ \
745  (__m512d)__builtin_ia32_rangepd512_mask((__v8df)(__m512d)(A), \
746                                          (__v8df)(__m512d)(B), (int)(C), \
747                                          (__v8df)_mm512_setzero_pd(), \
748                                          (__mmask8)(U), (int)(R)); })
749
750#define _mm512_range_ps(A, B, C) __extension__ ({                       \
751  (__m512)__builtin_ia32_rangeps512_mask((__v16sf)(__m512)(A), \
752                                         (__v16sf)(__m512)(B), (int)(C), \
753                                         (__v16sf)_mm512_setzero_ps(), \
754                                         (__mmask16)-1, \
755                                         _MM_FROUND_CUR_DIRECTION); })
756
757#define _mm512_mask_range_ps(W, U, A, B, C) __extension__ ({         \
758  (__m512)__builtin_ia32_rangeps512_mask((__v16sf)(__m512)(A), \
759                                         (__v16sf)(__m512)(B), (int)(C), \
760                                         (__v16sf)(__m512)(W), (__mmask16)(U), \
761                                         _MM_FROUND_CUR_DIRECTION); })
762
763#define _mm512_maskz_range_ps(U, A, B, C) __extension__ ({      \
764  (__m512)__builtin_ia32_rangeps512_mask((__v16sf)(__m512)(A), \
765                                         (__v16sf)(__m512)(B), (int)(C), \
766                                         (__v16sf)_mm512_setzero_ps(), \
767                                         (__mmask16)(U), \
768                                         _MM_FROUND_CUR_DIRECTION); })
769
770#define _mm512_range_round_ps(A, B, C, R) __extension__ ({         \
771  (__m512)__builtin_ia32_rangeps512_mask((__v16sf)(__m512)(A), \
772                                         (__v16sf)(__m512)(B), (int)(C), \
773                                         (__v16sf)_mm512_setzero_ps(), \
774                                         (__mmask16)-1, (int)(R)); })
775
776#define _mm512_mask_range_round_ps(W, U, A, B, C, R) __extension__ ({ \
777  (__m512)__builtin_ia32_rangeps512_mask((__v16sf)(__m512)(A), \
778                                         (__v16sf)(__m512)(B), (int)(C), \
779                                         (__v16sf)(__m512)(W), (__mmask16)(U), \
780                                         (int)(R)); })
781
782#define _mm512_maskz_range_round_ps(U, A, B, C, R) __extension__ ({ \
783  (__m512)__builtin_ia32_rangeps512_mask((__v16sf)(__m512)(A), \
784                                         (__v16sf)(__m512)(B), (int)(C), \
785                                         (__v16sf)_mm512_setzero_ps(), \
786                                         (__mmask16)(U), (int)(R)); })
787
788#define _mm_range_round_ss(A, B, C, R) __extension__ ({           \
789  (__m128)__builtin_ia32_rangess128_round_mask((__v4sf)(__m128)(A), \
790                                               (__v4sf)(__m128)(B), \
791                                               (__v4sf)_mm_setzero_ps(), \
792                                               (__mmask8) -1, (int)(C),\
793                                               (int)(R)); })
794
795#define _mm_range_ss(A ,B , C) _mm_range_round_ss(A, B, C ,_MM_FROUND_CUR_DIRECTION)
796
797#define _mm_mask_range_round_ss(W, U, A, B, C, R) __extension__ ({ \
798  (__m128)__builtin_ia32_rangess128_round_mask((__v4sf)(__m128)(A), \
799                                               (__v4sf)(__m128)(B), \
800                                               (__v4sf)(__m128)(W),\
801                                               (__mmask8)(U), (int)(C),\
802                                               (int)(R)); })
803
804#define _mm_mask_range_ss(W , U, A, B, C) _mm_mask_range_round_ss(W, U, A, B, C , _MM_FROUND_CUR_DIRECTION)
805
806#define _mm_maskz_range_round_ss(U, A, B, C, R) __extension__ ({ \
807  (__m128)__builtin_ia32_rangess128_round_mask((__v4sf)(__m128)(A), \
808                                               (__v4sf)(__m128)(B), \
809                                               (__v4sf)_mm_setzero_ps(), \
810                                               (__mmask8)(U), (int)(C),\
811                                               (int)(R)); })
812
813#define _mm_maskz_range_ss(U, A ,B , C) _mm_maskz_range_round_ss(U, A, B, C ,_MM_FROUND_CUR_DIRECTION)
814
815#define _mm_range_round_sd(A, B, C, R) __extension__ ({           \
816  (__m128d)__builtin_ia32_rangesd128_round_mask((__v2df)(__m128d)(A), \
817                                                (__v2df)(__m128d)(B), \
818                                                (__v2df)_mm_setzero_pd(), \
819                                                (__mmask8) -1, (int)(C),\
820                                                (int)(R)); })
821
822#define _mm_range_sd(A ,B , C) _mm_range_round_sd(A, B, C ,_MM_FROUND_CUR_DIRECTION)
823
824#define _mm_mask_range_round_sd(W, U, A, B, C, R) __extension__ ({ \
825  (__m128d)__builtin_ia32_rangesd128_round_mask((__v2df)(__m128d)(A), \
826                                                (__v2df)(__m128d)(B), \
827                                                (__v2df)(__m128d)(W),\
828                                                (__mmask8)(U), (int)(C),\
829                                                (int)(R)); })
830
831#define _mm_mask_range_sd(W, U, A, B, C) _mm_mask_range_round_sd(W, U, A, B, C ,_MM_FROUND_CUR_DIRECTION)
832
833#define _mm_maskz_range_round_sd(U, A, B, C, R) __extension__ ({ \
834  (__m128d)__builtin_ia32_rangesd128_round_mask((__v2df)(__m128d)(A), \
835                                                (__v2df)(__m128d)(B), \
836                                                (__v2df)_mm_setzero_pd(), \
837                                                (__mmask8)(U), (int)(C),\
838                                                (int)(R)); })
839
840#define _mm_maskz_range_sd(U, A, B, C) _mm_maskz_range_round_sd(U, A, B, C ,_MM_FROUND_CUR_DIRECTION)
841
842#define _mm512_reduce_pd(A, B) __extension__ ({             \
843  (__m512d)__builtin_ia32_reducepd512_mask((__v8df)(__m512d)(A), (int)(B), \
844                                           (__v8df)_mm512_setzero_pd(), \
845                                           (__mmask8)-1, \
846                                           _MM_FROUND_CUR_DIRECTION); })
847
848#define _mm512_mask_reduce_pd(W, U, A, B) __extension__ ({ \
849  (__m512d)__builtin_ia32_reducepd512_mask((__v8df)(__m512d)(A), (int)(B), \
850                                           (__v8df)(__m512d)(W), \
851                                           (__mmask8)(U), \
852                                           _MM_FROUND_CUR_DIRECTION); })
853
854#define _mm512_maskz_reduce_pd(U, A, B) __extension__ ({  \
855  (__m512d)__builtin_ia32_reducepd512_mask((__v8df)(__m512d)(A), (int)(B), \
856                                           (__v8df)_mm512_setzero_pd(), \
857                                           (__mmask8)(U), \
858                                           _MM_FROUND_CUR_DIRECTION); })
859
860#define _mm512_reduce_ps(A, B) __extension__ ({              \
861  (__m512)__builtin_ia32_reduceps512_mask((__v16sf)(__m512)(A), (int)(B), \
862                                          (__v16sf)_mm512_setzero_ps(), \
863                                          (__mmask16)-1, \
864                                          _MM_FROUND_CUR_DIRECTION); })
865
866#define _mm512_mask_reduce_ps(W, U, A, B) __extension__ ({   \
867  (__m512)__builtin_ia32_reduceps512_mask((__v16sf)(__m512)(A), (int)(B), \
868                                          (__v16sf)(__m512)(W), \
869                                          (__mmask16)(U), \
870                                          _MM_FROUND_CUR_DIRECTION); })
871
872#define _mm512_maskz_reduce_ps(U, A, B) __extension__ ({       \
873  (__m512)__builtin_ia32_reduceps512_mask((__v16sf)(__m512)(A), (int)(B), \
874                                          (__v16sf)_mm512_setzero_ps(), \
875                                          (__mmask16)(U), \
876                                          _MM_FROUND_CUR_DIRECTION); })
877
878#define _mm512_reduce_round_pd(A, B, R) __extension__ ({\
879  (__m512d)__builtin_ia32_reducepd512_mask((__v8df)(__m512d)(A), (int)(B), \
880                                           (__v8df)_mm512_setzero_pd(), \
881                                           (__mmask8)-1, (int)(R)); })
882
883#define _mm512_mask_reduce_round_pd(W, U, A, B, R) __extension__ ({\
884  (__m512d)__builtin_ia32_reducepd512_mask((__v8df)(__m512d)(A), (int)(B), \
885                                           (__v8df)(__m512d)(W), \
886                                           (__mmask8)(U), (int)(R)); })
887
888#define _mm512_maskz_reduce_round_pd(U, A, B, R) __extension__ ({\
889  (__m512d)__builtin_ia32_reducepd512_mask((__v8df)(__m512d)(A), (int)(B), \
890                                           (__v8df)_mm512_setzero_pd(), \
891                                           (__mmask8)(U), (int)(R)); })
892
893#define _mm512_reduce_round_ps(A, B, R) __extension__ ({\
894  (__m512)__builtin_ia32_reduceps512_mask((__v16sf)(__m512)(A), (int)(B), \
895                                          (__v16sf)_mm512_setzero_ps(), \
896                                          (__mmask16)-1, (int)(R)); })
897
898#define _mm512_mask_reduce_round_ps(W, U, A, B, R) __extension__ ({\
899  (__m512)__builtin_ia32_reduceps512_mask((__v16sf)(__m512)(A), (int)(B), \
900                                          (__v16sf)(__m512)(W), \
901                                          (__mmask16)(U), (int)(R)); })
902
903#define _mm512_maskz_reduce_round_ps(U, A, B, R) __extension__ ({\
904  (__m512)__builtin_ia32_reduceps512_mask((__v16sf)(__m512)(A), (int)(B), \
905                                          (__v16sf)_mm512_setzero_ps(), \
906                                          (__mmask16)(U), (int)(R)); })
907
908#define _mm_reduce_ss(A, B, C) __extension__ ({              \
909  (__m128)__builtin_ia32_reducess_mask((__v4sf)(__m128)(A), \
910                                       (__v4sf)(__m128)(B), \
911                                       (__v4sf)_mm_setzero_ps(), (__mmask8)-1, \
912                                       (int)(C), _MM_FROUND_CUR_DIRECTION); })
913
914#define _mm_mask_reduce_ss(W, U, A, B, C) __extension__ ({   \
915  (__m128)__builtin_ia32_reducess_mask((__v4sf)(__m128)(A), \
916                                       (__v4sf)(__m128)(B), \
917                                       (__v4sf)(__m128)(W), (__mmask8)(U), \
918                                       (int)(C), _MM_FROUND_CUR_DIRECTION); })
919
920#define _mm_maskz_reduce_ss(U, A, B, C) __extension__ ({       \
921  (__m128)__builtin_ia32_reducess_mask((__v4sf)(__m128)(A), \
922                                       (__v4sf)(__m128)(B), \
923                                       (__v4sf)_mm_setzero_ps(), \
924                                       (__mmask8)(U), (int)(C), \
925                                       _MM_FROUND_CUR_DIRECTION); })
926
927#define _mm_reduce_round_ss(A, B, C, R) __extension__ ({              \
928  (__m128)__builtin_ia32_reducess_mask((__v4sf)(__m128)(A), \
929                                       (__v4sf)(__m128)(B), \
930                                       (__v4sf)_mm_setzero_ps(), (__mmask8)-1, \
931                                       (int)(C), (int)(R)); })
932
933#define _mm_mask_reduce_round_ss(W, U, A, B, C, R) __extension__ ({   \
934  (__m128)__builtin_ia32_reducess_mask((__v4sf)(__m128)(A), \
935                                       (__v4sf)(__m128)(B), \
936                                       (__v4sf)(__m128)(W), (__mmask8)(U), \
937                                       (int)(C), (int)(R)); })
938
939#define _mm_maskz_reduce_round_ss(U, A, B, C, R) __extension__ ({       \
940  (__m128)__builtin_ia32_reducess_mask((__v4sf)(__m128)(A), \
941                                       (__v4sf)(__m128)(B), \
942                                       (__v4sf)_mm_setzero_ps(), \
943                                       (__mmask8)(U), (int)(C), (int)(R)); })
944
945#define _mm_reduce_sd(A, B, C) __extension__ ({              \
946  (__m128d)__builtin_ia32_reducesd_mask((__v2df)(__m128d)(A), \
947                                        (__v2df)(__m128d)(B), \
948                                        (__v2df)_mm_setzero_pd(), \
949                                        (__mmask8)-1, (int)(C), \
950                                        _MM_FROUND_CUR_DIRECTION); })
951
952#define _mm_mask_reduce_sd(W, U, A, B, C) __extension__ ({   \
953  (__m128d)__builtin_ia32_reducesd_mask((__v2df)(__m128d)(A), \
954                                        (__v2df)(__m128d)(B), \
955                                        (__v2df)(__m128d)(W), (__mmask8)(U), \
956                                        (int)(C), _MM_FROUND_CUR_DIRECTION); })
957
958#define _mm_maskz_reduce_sd(U, A, B, C) __extension__ ({       \
959  (__m128d)__builtin_ia32_reducesd_mask((__v2df)(__m128d)(A), \
960                                        (__v2df)(__m128d)(B), \
961                                        (__v2df)_mm_setzero_pd(), \
962                                        (__mmask8)(U), (int)(C), \
963                                        _MM_FROUND_CUR_DIRECTION); })
964
965#define _mm_reduce_round_sd(A, B, C, R) __extension__ ({              \
966  (__m128d)__builtin_ia32_reducesd_mask((__v2df)(__m128d)(A), \
967                                        (__v2df)(__m128d)(B), \
968                                        (__v2df)_mm_setzero_pd(), \
969                                        (__mmask8)-1, (int)(C), (int)(R)); })
970
971#define _mm_mask_reduce_round_sd(W, U, A, B, C, R) __extension__ ({   \
972  (__m128d)__builtin_ia32_reducesd_mask((__v2df)(__m128d)(A), \
973                                        (__v2df)(__m128d)(B), \
974                                        (__v2df)(__m128d)(W), (__mmask8)(U), \
975                                        (int)(C), (int)(R)); })
976
977#define _mm_maskz_reduce_round_sd(U, A, B, C, R) __extension__ ({       \
978  (__m128d)__builtin_ia32_reducesd_mask((__v2df)(__m128d)(A), \
979                                        (__v2df)(__m128d)(B), \
980                                        (__v2df)_mm_setzero_pd(), \
981                                        (__mmask8)(U), (int)(C), (int)(R)); })
982
983static __inline__ __mmask16 __DEFAULT_FN_ATTRS
984_mm512_movepi32_mask (__m512i __A)
985{
986  return (__mmask16) __builtin_ia32_cvtd2mask512 ((__v16si) __A);
987}
988
989static __inline__ __m512i __DEFAULT_FN_ATTRS
990_mm512_movm_epi32 (__mmask16 __A)
991{
992  return (__m512i) __builtin_ia32_cvtmask2d512 (__A);
993}
994
995static __inline__ __m512i __DEFAULT_FN_ATTRS
996_mm512_movm_epi64 (__mmask8 __A)
997{
998  return (__m512i) __builtin_ia32_cvtmask2q512 (__A);
999}
1000
1001static __inline__ __mmask8 __DEFAULT_FN_ATTRS
1002_mm512_movepi64_mask (__m512i __A)
1003{
1004  return (__mmask8) __builtin_ia32_cvtq2mask512 ((__v8di) __A);
1005}
1006
1007
1008static __inline__ __m512 __DEFAULT_FN_ATTRS
1009_mm512_broadcast_f32x2 (__m128 __A)
1010{
1011  return (__m512) __builtin_ia32_broadcastf32x2_512_mask ((__v4sf) __A,
1012                (__v16sf)_mm512_undefined_ps(),
1013                (__mmask16) -1);
1014}
1015
1016static __inline__ __m512 __DEFAULT_FN_ATTRS
1017_mm512_mask_broadcast_f32x2 (__m512 __O, __mmask16 __M, __m128 __A)
1018{
1019  return (__m512) __builtin_ia32_broadcastf32x2_512_mask ((__v4sf) __A,
1020                (__v16sf)
1021                __O, __M);
1022}
1023
1024static __inline__ __m512 __DEFAULT_FN_ATTRS
1025_mm512_maskz_broadcast_f32x2 (__mmask16 __M, __m128 __A)
1026{
1027  return (__m512) __builtin_ia32_broadcastf32x2_512_mask ((__v4sf) __A,
1028                (__v16sf)_mm512_setzero_ps (),
1029                __M);
1030}
1031
1032static __inline__ __m512 __DEFAULT_FN_ATTRS
1033_mm512_broadcast_f32x8 (__m256 __A)
1034{
1035  return (__m512) __builtin_ia32_broadcastf32x8_512_mask ((__v8sf) __A,
1036                _mm512_undefined_ps(),
1037                (__mmask16) -1);
1038}
1039
1040static __inline__ __m512 __DEFAULT_FN_ATTRS
1041_mm512_mask_broadcast_f32x8 (__m512 __O, __mmask16 __M, __m256 __A)
1042{
1043  return (__m512) __builtin_ia32_broadcastf32x8_512_mask ((__v8sf) __A,
1044                (__v16sf)__O,
1045                __M);
1046}
1047
1048static __inline__ __m512 __DEFAULT_FN_ATTRS
1049_mm512_maskz_broadcast_f32x8 (__mmask16 __M, __m256 __A)
1050{
1051  return (__m512) __builtin_ia32_broadcastf32x8_512_mask ((__v8sf) __A,
1052                (__v16sf)_mm512_setzero_ps (),
1053                __M);
1054}
1055
1056static __inline__ __m512d __DEFAULT_FN_ATTRS
1057_mm512_broadcast_f64x2 (__m128d __A)
1058{
1059  return (__m512d) __builtin_ia32_broadcastf64x2_512_mask ((__v2df) __A,
1060                 (__v8df)_mm512_undefined_pd(),
1061                 (__mmask8) -1);
1062}
1063
1064static __inline__ __m512d __DEFAULT_FN_ATTRS
1065_mm512_mask_broadcast_f64x2 (__m512d __O, __mmask8 __M, __m128d __A)
1066{
1067  return (__m512d) __builtin_ia32_broadcastf64x2_512_mask ((__v2df) __A,
1068                 (__v8df)
1069                 __O, __M);
1070}
1071
1072static __inline__ __m512d __DEFAULT_FN_ATTRS
1073_mm512_maskz_broadcast_f64x2 (__mmask8 __M, __m128d __A)
1074{
1075  return (__m512d) __builtin_ia32_broadcastf64x2_512_mask ((__v2df) __A,
1076                 (__v8df)_mm512_setzero_ps (),
1077                 __M);
1078}
1079
1080static __inline__ __m512i __DEFAULT_FN_ATTRS
1081_mm512_broadcast_i32x2 (__m128i __A)
1082{
1083  return (__m512i) __builtin_ia32_broadcasti32x2_512_mask ((__v4si) __A,
1084                 (__v16si)_mm512_setzero_si512(),
1085                 (__mmask16) -1);
1086}
1087
1088static __inline__ __m512i __DEFAULT_FN_ATTRS
1089_mm512_mask_broadcast_i32x2 (__m512i __O, __mmask16 __M, __m128i __A)
1090{
1091  return (__m512i) __builtin_ia32_broadcasti32x2_512_mask ((__v4si) __A,
1092                 (__v16si)
1093                 __O, __M);
1094}
1095
1096static __inline__ __m512i __DEFAULT_FN_ATTRS
1097_mm512_maskz_broadcast_i32x2 (__mmask16 __M, __m128i __A)
1098{
1099  return (__m512i) __builtin_ia32_broadcasti32x2_512_mask ((__v4si) __A,
1100                 (__v16si)_mm512_setzero_si512 (),
1101                 __M);
1102}
1103
1104static __inline__ __m512i __DEFAULT_FN_ATTRS
1105_mm512_broadcast_i32x8 (__m256i __A)
1106{
1107  return (__m512i) __builtin_ia32_broadcasti32x8_512_mask ((__v8si) __A,
1108                 (__v16si)_mm512_setzero_si512(),
1109                 (__mmask16) -1);
1110}
1111
1112static __inline__ __m512i __DEFAULT_FN_ATTRS
1113_mm512_mask_broadcast_i32x8 (__m512i __O, __mmask16 __M, __m256i __A)
1114{
1115  return (__m512i) __builtin_ia32_broadcasti32x8_512_mask ((__v8si) __A,
1116                 (__v16si)__O,
1117                 __M);
1118}
1119
1120static __inline__ __m512i __DEFAULT_FN_ATTRS
1121_mm512_maskz_broadcast_i32x8 (__mmask16 __M, __m256i __A)
1122{
1123  return (__m512i) __builtin_ia32_broadcasti32x8_512_mask ((__v8si) __A,
1124                 (__v16si)
1125                 _mm512_setzero_si512 (),
1126                 __M);
1127}
1128
1129static __inline__ __m512i __DEFAULT_FN_ATTRS
1130_mm512_broadcast_i64x2 (__m128i __A)
1131{
1132  return (__m512i) __builtin_ia32_broadcasti64x2_512_mask ((__v2di) __A,
1133                 (__v8di)_mm512_setzero_si512(),
1134                 (__mmask8) -1);
1135}
1136
1137static __inline__ __m512i __DEFAULT_FN_ATTRS
1138_mm512_mask_broadcast_i64x2 (__m512i __O, __mmask8 __M, __m128i __A)
1139{
1140  return (__m512i) __builtin_ia32_broadcasti64x2_512_mask ((__v2di) __A,
1141                 (__v8di)
1142                 __O, __M);
1143}
1144
1145static __inline__ __m512i __DEFAULT_FN_ATTRS
1146_mm512_maskz_broadcast_i64x2 (__mmask8 __M, __m128i __A)
1147{
1148  return (__m512i) __builtin_ia32_broadcasti64x2_512_mask ((__v2di) __A,
1149                 (__v8di)_mm512_setzero_si512 (),
1150                 __M);
1151}
1152
1153#define _mm512_extractf32x8_ps(A, imm) __extension__ ({ \
1154  (__m256)__builtin_ia32_extractf32x8_mask((__v16sf)(__m512)(A), (int)(imm), \
1155                                           (__v8sf)_mm256_setzero_ps(), \
1156                                           (__mmask8)-1); })
1157
1158#define _mm512_mask_extractf32x8_ps(W, U, A, imm) __extension__ ({ \
1159  (__m256)__builtin_ia32_extractf32x8_mask((__v16sf)(__m512)(A), (int)(imm), \
1160                                           (__v8sf)(__m256)(W), \
1161                                           (__mmask8)(U)); })
1162
1163#define _mm512_maskz_extractf32x8_ps(U, A, imm) __extension__ ({ \
1164  (__m256)__builtin_ia32_extractf32x8_mask((__v16sf)(__m512)(A), (int)(imm), \
1165                                           (__v8sf)_mm256_setzero_ps(), \
1166                                           (__mmask8)(U)); })
1167
1168#define _mm512_extractf64x2_pd(A, imm) __extension__ ({ \
1169  (__m128d)__builtin_ia32_extractf64x2_512_mask((__v8df)(__m512d)(A), \
1170                                                (int)(imm), \
1171                                                (__v2df)_mm_setzero_pd(), \
1172                                                (__mmask8)-1); })
1173
1174#define _mm512_mask_extractf64x2_pd(W, U, A, imm) __extension__ ({ \
1175  (__m128d)__builtin_ia32_extractf64x2_512_mask((__v8df)(__m512d)(A), \
1176                                                (int)(imm), \
1177                                                (__v2df)(__m128d)(W), \
1178                                                (__mmask8)(U)); })
1179
1180#define _mm512_maskz_extractf64x2_pd(U, A, imm) __extension__ ({ \
1181  (__m128d)__builtin_ia32_extractf64x2_512_mask((__v8df)(__m512d)(A), \
1182                                                (int)(imm), \
1183                                                (__v2df)_mm_setzero_pd(), \
1184                                                (__mmask8)(U)); })
1185
1186#define _mm512_extracti32x8_epi32(A, imm) __extension__ ({ \
1187  (__m256i)__builtin_ia32_extracti32x8_mask((__v16si)(__m512i)(A), (int)(imm), \
1188                                            (__v8si)_mm256_setzero_si256(), \
1189                                            (__mmask8)-1); })
1190
1191#define _mm512_mask_extracti32x8_epi32(W, U, A, imm) __extension__ ({ \
1192  (__m256i)__builtin_ia32_extracti32x8_mask((__v16si)(__m512i)(A), (int)(imm), \
1193                                            (__v8si)(__m256i)(W), \
1194                                            (__mmask8)(U)); })
1195
1196#define _mm512_maskz_extracti32x8_epi32(U, A, imm) __extension__ ({ \
1197  (__m256i)__builtin_ia32_extracti32x8_mask((__v16si)(__m512i)(A), (int)(imm), \
1198                                            (__v8si)_mm256_setzero_si256(), \
1199                                            (__mmask8)(U)); })
1200
1201#define _mm512_extracti64x2_epi64(A, imm) __extension__ ({ \
1202  (__m128i)__builtin_ia32_extracti64x2_512_mask((__v8di)(__m512i)(A), \
1203                                                (int)(imm), \
1204                                                (__v2di)_mm_setzero_di(), \
1205                                                (__mmask8)-1); })
1206
1207#define _mm512_mask_extracti64x2_epi64(W, U, A, imm) __extension__ ({ \
1208  (__m128i)__builtin_ia32_extracti64x2_512_mask((__v8di)(__m512i)(A), \
1209                                                (int)(imm), \
1210                                                (__v2di)(__m128i)(W), \
1211                                                (__mmask8)(U)); })
1212
1213#define _mm512_maskz_extracti64x2_epi64(U, A, imm) __extension__ ({ \
1214  (__m128i)__builtin_ia32_extracti64x2_512_mask((__v8di)(__m512i)(A), \
1215                                                (int)(imm), \
1216                                                (__v2di)_mm_setzero_di(), \
1217                                                (__mmask8)(U)); })
1218
1219#define _mm512_insertf32x8(A, B, imm) __extension__ ({ \
1220  (__m512)__builtin_ia32_insertf32x8_mask((__v16sf)(__m512)(A), \
1221                                          (__v8sf)(__m256)(B), (int)(imm), \
1222                                          (__v16sf)_mm512_setzero_ps(), \
1223                                          (__mmask16)-1); })
1224
1225#define _mm512_mask_insertf32x8(W, U, A, B, imm) __extension__ ({ \
1226  (__m512)__builtin_ia32_insertf32x8_mask((__v16sf)(__m512)(A), \
1227                                          (__v8sf)(__m256)(B), (int)(imm), \
1228                                          (__v16sf)(__m512)(W), \
1229                                          (__mmask16)(U)); })
1230
1231#define _mm512_maskz_insertf32x8(U, A, B, imm) __extension__ ({ \
1232  (__m512)__builtin_ia32_insertf32x8_mask((__v16sf)(__m512)(A), \
1233                                          (__v8sf)(__m256)(B), (int)(imm), \
1234                                          (__v16sf)_mm512_setzero_ps(), \
1235                                          (__mmask16)(U)); })
1236
1237#define _mm512_insertf64x2(A, B, imm) __extension__ ({ \
1238  (__m512d)__builtin_ia32_insertf64x2_512_mask((__v8df)(__m512d)(A), \
1239                                               (__v2df)(__m128d)(B), \
1240                                               (int)(imm), \
1241                                               (__v8df)_mm512_setzero_pd(), \
1242                                               (__mmask8)-1); })
1243
1244#define _mm512_mask_insertf64x2(W, U, A, B, imm) __extension__ ({ \
1245  (__m512d)__builtin_ia32_insertf64x2_512_mask((__v8df)(__m512d)(A), \
1246                                               (__v2df)(__m128d)(B), \
1247                                               (int)(imm), \
1248                                               (__v8df)(__m512d)(W), \
1249                                               (__mmask8)(U)); })
1250
1251#define _mm512_maskz_insertf64x2(U, A, B, imm) __extension__ ({ \
1252  (__m512d)__builtin_ia32_insertf64x2_512_mask((__v8df)(__m512d)(A), \
1253                                               (__v2df)(__m128d)(B), \
1254                                               (int)(imm), \
1255                                               (__v8df)_mm512_setzero_pd(), \
1256                                               (__mmask8)(U)); })
1257
1258#define _mm512_inserti32x8(A, B, imm) __extension__ ({ \
1259  (__m512i)__builtin_ia32_inserti32x8_mask((__v16si)(__m512i)(A), \
1260                                           (__v8si)(__m256i)(B), (int)(imm), \
1261                                           (__v16si)_mm512_setzero_si512(), \
1262                                           (__mmask16)-1); })
1263
1264#define _mm512_mask_inserti32x8(W, U, A, B, imm) __extension__ ({ \
1265  (__m512i)__builtin_ia32_inserti32x8_mask((__v16si)(__m512i)(A), \
1266                                           (__v8si)(__m256i)(B), (int)(imm), \
1267                                           (__v16si)(__m512i)(W), \
1268                                           (__mmask16)(U)); })
1269
1270#define _mm512_maskz_inserti32x8(U, A, B, imm) __extension__ ({ \
1271  (__m512i)__builtin_ia32_inserti32x8_mask((__v16si)(__m512i)(A), \
1272                                           (__v8si)(__m256i)(B), (int)(imm), \
1273                                           (__v16si)_mm512_setzero_si512(), \
1274                                           (__mmask16)(U)); })
1275
1276#define _mm512_inserti64x2(A, B, imm) __extension__ ({ \
1277  (__m512i)__builtin_ia32_inserti64x2_512_mask((__v8di)(__m512i)(A), \
1278                                               (__v2di)(__m128i)(B), \
1279                                               (int)(imm), \
1280                                               (__v8di)_mm512_setzero_si512(), \
1281                                               (__mmask8)-1); })
1282
1283#define _mm512_mask_inserti64x2(W, U, A, B, imm) __extension__ ({ \
1284  (__m512i)__builtin_ia32_inserti64x2_512_mask((__v8di)(__m512i)(A), \
1285                                               (__v2di)(__m128i)(B), \
1286                                               (int)(imm), \
1287                                               (__v8di)(__m512i)(W), \
1288                                               (__mmask8)(U)); })
1289
1290#define _mm512_maskz_inserti64x2(U, A, B, imm) __extension__ ({ \
1291  (__m512i)__builtin_ia32_inserti64x2_512_mask((__v8di)(__m512i)(A), \
1292                                               (__v2di)(__m128i)(B), \
1293                                               (int)(imm), \
1294                                               (__v8di)_mm512_setzero_si512(), \
1295                                               (__mmask8)(U)); })
1296
1297#define _mm512_mask_fpclass_ps_mask(U, A, imm) __extension__ ({ \
1298  (__mmask16)__builtin_ia32_fpclassps512_mask((__v16sf)(__m512)(A), \
1299                                              (int)(imm), (__mmask16)(U)); })
1300
1301#define _mm512_fpclass_ps_mask(A, imm) __extension__ ({ \
1302  (__mmask16)__builtin_ia32_fpclassps512_mask((__v16sf)(__m512)(A), \
1303                                              (int)(imm), (__mmask16)-1); })
1304
1305#define _mm512_mask_fpclass_pd_mask(U, A, imm) __extension__ ({ \
1306  (__mmask8)__builtin_ia32_fpclasspd512_mask((__v8df)(__m512d)(A), (int)(imm), \
1307                                             (__mmask8)(U)); })
1308
1309#define _mm512_fpclass_pd_mask(A, imm) __extension__ ({ \
1310  (__mmask8)__builtin_ia32_fpclasspd512_mask((__v8df)(__m512d)(A), (int)(imm), \
1311                                             (__mmask8)-1); })
1312
1313#define _mm_fpclass_sd_mask(A, imm) __extension__ ({ \
1314  (__mmask8)__builtin_ia32_fpclasssd_mask((__v2df)(__m128d)(A), (int)(imm), \
1315                                          (__mmask8)-1); })
1316
1317#define _mm_mask_fpclass_sd_mask(U, A, imm) __extension__ ({ \
1318  (__mmask8)__builtin_ia32_fpclasssd_mask((__v2df)(__m128d)(A), (int)(imm), \
1319                                          (__mmask8)(U)); })
1320
1321#define _mm_fpclass_ss_mask(A, imm) __extension__ ({ \
1322  (__mmask8)__builtin_ia32_fpclassss_mask((__v4sf)(__m128)(A), (int)(imm), \
1323                                          (__mmask8)-1); })
1324
1325#define _mm_mask_fpclass_ss_mask(U, A, imm) __extension__ ({ \
1326  (__mmask8)__builtin_ia32_fpclassss_mask((__v4sf)(__m128)(A), (int)(imm), \
1327                                          (__mmask8)(U)); })
1328
1329#undef __DEFAULT_FN_ATTRS
1330
1331#endif
1332