avx512fintrin.h revision 296417
1/*===---- avx512fintrin.h - AVX512F intrinsics -----------------------------===
2 *
3 * Permission is hereby granted, free of charge, to any person obtaining a copy
4 * of this software and associated documentation files (the "Software"), to deal
5 * in the Software without restriction, including without limitation the rights
6 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7 * copies of the Software, and to permit persons to whom the Software is
8 * furnished to do so, subject to the following conditions:
9 *
10 * The above copyright notice and this permission notice shall be included in
11 * all copies or substantial portions of the Software.
12 *
13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19 * THE SOFTWARE.
20 *
21 *===-----------------------------------------------------------------------===
22 */
23#ifndef __IMMINTRIN_H
24#error "Never use <avx512fintrin.h> directly; include <immintrin.h> instead."
25#endif
26
27#ifndef __AVX512FINTRIN_H
28#define __AVX512FINTRIN_H
29
30typedef double __v8df __attribute__((__vector_size__(64)));
31typedef float __v16sf __attribute__((__vector_size__(64)));
32typedef long long __v8di __attribute__((__vector_size__(64)));
33typedef int __v16si __attribute__((__vector_size__(64)));
34
35typedef float __m512 __attribute__((__vector_size__(64)));
36typedef double __m512d __attribute__((__vector_size__(64)));
37typedef long long __m512i __attribute__((__vector_size__(64)));
38
39typedef unsigned char __mmask8;
40typedef unsigned short __mmask16;
41
42/* Rounding mode macros.  */
43#define _MM_FROUND_TO_NEAREST_INT   0x00
44#define _MM_FROUND_TO_NEG_INF       0x01
45#define _MM_FROUND_TO_POS_INF       0x02
46#define _MM_FROUND_TO_ZERO          0x03
47#define _MM_FROUND_CUR_DIRECTION    0x04
48
49/* Define the default attributes for the functions in this file. */
50#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512f")))
51
52/* Create vectors with repeated elements */
53
54static  __inline __m512i __DEFAULT_FN_ATTRS
55_mm512_setzero_si512(void)
56{
57  return (__m512i)(__v8di){ 0, 0, 0, 0, 0, 0, 0, 0 };
58}
59
60static __inline__ __m512d __DEFAULT_FN_ATTRS
61_mm512_undefined_pd()
62{
63  return (__m512d)__builtin_ia32_undef512();
64}
65
66static __inline__ __m512 __DEFAULT_FN_ATTRS
67_mm512_undefined()
68{
69  return (__m512)__builtin_ia32_undef512();
70}
71
72static __inline__ __m512 __DEFAULT_FN_ATTRS
73_mm512_undefined_ps()
74{
75  return (__m512)__builtin_ia32_undef512();
76}
77
78static __inline__ __m512i __DEFAULT_FN_ATTRS
79_mm512_undefined_epi32()
80{
81  return (__m512i)__builtin_ia32_undef512();
82}
83
84static __inline __m512i __DEFAULT_FN_ATTRS
85_mm512_maskz_set1_epi32(__mmask16 __M, int __A)
86{
87  return (__m512i) __builtin_ia32_pbroadcastd512_gpr_mask (__A,
88                 (__v16si)
89                 _mm512_setzero_si512 (),
90                 __M);
91}
92
93static __inline __m512i __DEFAULT_FN_ATTRS
94_mm512_maskz_set1_epi64(__mmask8 __M, long long __A)
95{
96#ifdef __x86_64__
97  return (__m512i) __builtin_ia32_pbroadcastq512_gpr_mask (__A,
98                 (__v8di)
99                 _mm512_setzero_si512 (),
100                 __M);
101#else
102  return (__m512i) __builtin_ia32_pbroadcastq512_mem_mask (__A,
103                 (__v8di)
104                 _mm512_setzero_si512 (),
105                 __M);
106#endif
107}
108
109static __inline __m512 __DEFAULT_FN_ATTRS
110_mm512_setzero_ps(void)
111{
112  return (__m512){ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
113                   0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 };
114}
115static  __inline __m512d __DEFAULT_FN_ATTRS
116_mm512_setzero_pd(void)
117{
118  return (__m512d){ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 };
119}
120
121static __inline __m512 __DEFAULT_FN_ATTRS
122_mm512_set1_ps(float __w)
123{
124  return (__m512){ __w, __w, __w, __w, __w, __w, __w, __w,
125                   __w, __w, __w, __w, __w, __w, __w, __w  };
126}
127
128static __inline __m512d __DEFAULT_FN_ATTRS
129_mm512_set1_pd(double __w)
130{
131  return (__m512d){ __w, __w, __w, __w, __w, __w, __w, __w };
132}
133
134static __inline __m512i __DEFAULT_FN_ATTRS
135_mm512_set1_epi32(int __s)
136{
137  return (__m512i)(__v16si){ __s, __s, __s, __s, __s, __s, __s, __s,
138                             __s, __s, __s, __s, __s, __s, __s, __s };
139}
140
141static __inline __m512i __DEFAULT_FN_ATTRS
142_mm512_set1_epi64(long long __d)
143{
144  return (__m512i)(__v8di){ __d, __d, __d, __d, __d, __d, __d, __d };
145}
146
147static __inline__ __m512 __DEFAULT_FN_ATTRS
148_mm512_broadcastss_ps(__m128 __X)
149{
150  float __f = __X[0];
151  return (__v16sf){ __f, __f, __f, __f,
152                    __f, __f, __f, __f,
153                    __f, __f, __f, __f,
154                    __f, __f, __f, __f };
155}
156
157static __inline__ __m512d __DEFAULT_FN_ATTRS
158_mm512_broadcastsd_pd(__m128d __X)
159{
160  double __d = __X[0];
161  return (__v8df){ __d, __d, __d, __d,
162                   __d, __d, __d, __d };
163}
164
165/* Cast between vector types */
166
167static __inline __m512d __DEFAULT_FN_ATTRS
168_mm512_castpd256_pd512(__m256d __a)
169{
170  return __builtin_shufflevector(__a, __a, 0, 1, 2, 3, -1, -1, -1, -1);
171}
172
173static __inline __m512 __DEFAULT_FN_ATTRS
174_mm512_castps256_ps512(__m256 __a)
175{
176  return __builtin_shufflevector(__a, __a, 0,  1,  2,  3,  4,  5,  6,  7,
177                                          -1, -1, -1, -1, -1, -1, -1, -1);
178}
179
180static __inline __m128d __DEFAULT_FN_ATTRS
181_mm512_castpd512_pd128(__m512d __a)
182{
183  return __builtin_shufflevector(__a, __a, 0, 1);
184}
185
186static __inline __m128 __DEFAULT_FN_ATTRS
187_mm512_castps512_ps128(__m512 __a)
188{
189  return __builtin_shufflevector(__a, __a, 0, 1, 2, 3);
190}
191
192/* Bitwise operators */
193static __inline__ __m512i __DEFAULT_FN_ATTRS
194_mm512_and_epi32(__m512i __a, __m512i __b)
195{
196  return __a & __b;
197}
198
199static __inline__ __m512i __DEFAULT_FN_ATTRS
200_mm512_mask_and_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b)
201{
202  return (__m512i) __builtin_ia32_pandd512_mask((__v16si) __a,
203              (__v16si) __b,
204              (__v16si) __src,
205              (__mmask16) __k);
206}
207static __inline__ __m512i __DEFAULT_FN_ATTRS
208_mm512_maskz_and_epi32(__mmask16 __k, __m512i __a, __m512i __b)
209{
210  return (__m512i) __builtin_ia32_pandd512_mask((__v16si) __a,
211              (__v16si) __b,
212              (__v16si)
213              _mm512_setzero_si512 (),
214              (__mmask16) __k);
215}
216
217static __inline__ __m512i __DEFAULT_FN_ATTRS
218_mm512_and_epi64(__m512i __a, __m512i __b)
219{
220  return __a & __b;
221}
222
223static __inline__ __m512i __DEFAULT_FN_ATTRS
224_mm512_mask_and_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b)
225{
226  return (__m512i) __builtin_ia32_pandq512_mask ((__v8di) __a,
227              (__v8di) __b,
228              (__v8di) __src,
229              (__mmask8) __k);
230}
231static __inline__ __m512i __DEFAULT_FN_ATTRS
232_mm512_maskz_and_epi64(__mmask8 __k, __m512i __a, __m512i __b)
233{
234  return (__m512i) __builtin_ia32_pandq512_mask ((__v8di) __a,
235              (__v8di) __b,
236              (__v8di)
237              _mm512_setzero_si512 (),
238              (__mmask8) __k);
239}
240
241static __inline__ __m512i __DEFAULT_FN_ATTRS
242_mm512_andnot_epi32 (__m512i __A, __m512i __B)
243{
244  return (__m512i) __builtin_ia32_pandnd512_mask ((__v16si) __A,
245              (__v16si) __B,
246              (__v16si)
247              _mm512_setzero_si512 (),
248              (__mmask16) -1);
249}
250
251static __inline__ __m512i __DEFAULT_FN_ATTRS
252_mm512_mask_andnot_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
253{
254  return (__m512i) __builtin_ia32_pandnd512_mask ((__v16si) __A,
255              (__v16si) __B,
256              (__v16si) __W,
257              (__mmask16) __U);
258}
259
260static __inline__ __m512i __DEFAULT_FN_ATTRS
261_mm512_maskz_andnot_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
262{
263  return (__m512i) __builtin_ia32_pandnd512_mask ((__v16si) __A,
264              (__v16si) __B,
265              (__v16si)
266              _mm512_setzero_si512 (),
267              (__mmask16) __U);
268}
269
270static __inline__ __m512i __DEFAULT_FN_ATTRS
271_mm512_andnot_epi64 (__m512i __A, __m512i __B)
272{
273  return (__m512i) __builtin_ia32_pandnq512_mask ((__v8di) __A,
274              (__v8di) __B,
275              (__v8di)
276              _mm512_setzero_si512 (),
277              (__mmask8) -1);
278}
279
280static __inline__ __m512i __DEFAULT_FN_ATTRS
281_mm512_mask_andnot_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
282{
283  return (__m512i) __builtin_ia32_pandnq512_mask ((__v8di) __A,
284              (__v8di) __B,
285              (__v8di) __W, __U);
286}
287
288static __inline__ __m512i __DEFAULT_FN_ATTRS
289_mm512_maskz_andnot_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
290{
291  return (__m512i) __builtin_ia32_pandnq512_mask ((__v8di) __A,
292              (__v8di) __B,
293              (__v8di)
294              _mm512_setzero_pd (),
295              __U);
296}
297static __inline__ __m512i __DEFAULT_FN_ATTRS
298_mm512_or_epi32(__m512i __a, __m512i __b)
299{
300  return __a | __b;
301}
302
303static __inline__ __m512i __DEFAULT_FN_ATTRS
304_mm512_mask_or_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b)
305{
306  return (__m512i) __builtin_ia32_pord512_mask((__v16si) __a,
307              (__v16si) __b,
308              (__v16si) __src,
309              (__mmask16) __k);
310}
311static __inline__ __m512i __DEFAULT_FN_ATTRS
312_mm512_maskz_or_epi32(__mmask16 __k, __m512i __a, __m512i __b)
313{
314  return (__m512i) __builtin_ia32_pord512_mask((__v16si) __a,
315              (__v16si) __b,
316              (__v16si)
317              _mm512_setzero_si512 (),
318              (__mmask16) __k);
319}
320
321static __inline__ __m512i __DEFAULT_FN_ATTRS
322_mm512_or_epi64(__m512i __a, __m512i __b)
323{
324  return __a | __b;
325}
326
327static __inline__ __m512i __DEFAULT_FN_ATTRS
328_mm512_mask_or_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b)
329{
330  return (__m512i) __builtin_ia32_porq512_mask ((__v8di) __a,
331              (__v8di) __b,
332              (__v8di) __src,
333              (__mmask8) __k);
334}
335static __inline__ __m512i __DEFAULT_FN_ATTRS
336_mm512_maskz_or_epi64(__mmask8 __k, __m512i __a, __m512i __b)
337{
338  return (__m512i) __builtin_ia32_porq512_mask ((__v8di) __a,
339              (__v8di) __b,
340              (__v8di)
341              _mm512_setzero_si512 (),
342              (__mmask8) __k);
343}
344
345static __inline__ __m512i __DEFAULT_FN_ATTRS
346_mm512_xor_epi32(__m512i __a, __m512i __b)
347{
348  return __a ^ __b;
349}
350
351static __inline__ __m512i __DEFAULT_FN_ATTRS
352_mm512_mask_xor_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b)
353{
354  return (__m512i) __builtin_ia32_pxord512_mask((__v16si) __a,
355              (__v16si) __b,
356              (__v16si) __src,
357              (__mmask16) __k);
358}
359static __inline__ __m512i __DEFAULT_FN_ATTRS
360_mm512_maskz_xor_epi32(__mmask16 __k, __m512i __a, __m512i __b)
361{
362  return (__m512i) __builtin_ia32_pxord512_mask((__v16si) __a,
363              (__v16si) __b,
364              (__v16si)
365              _mm512_setzero_si512 (),
366              (__mmask16) __k);
367}
368
369static __inline__ __m512i __DEFAULT_FN_ATTRS
370_mm512_xor_epi64(__m512i __a, __m512i __b)
371{
372  return __a ^ __b;
373}
374
375static __inline__ __m512i __DEFAULT_FN_ATTRS
376_mm512_mask_xor_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b)
377{
378  return (__m512i) __builtin_ia32_pxorq512_mask ((__v8di) __a,
379              (__v8di) __b,
380              (__v8di) __src,
381              (__mmask8) __k);
382}
383static __inline__ __m512i __DEFAULT_FN_ATTRS
384_mm512_maskz_xor_epi64(__mmask8 __k, __m512i __a, __m512i __b)
385{
386  return (__m512i) __builtin_ia32_pxorq512_mask ((__v8di) __a,
387              (__v8di) __b,
388              (__v8di)
389              _mm512_setzero_si512 (),
390              (__mmask8) __k);
391}
392
393static __inline__ __m512i __DEFAULT_FN_ATTRS
394_mm512_and_si512(__m512i __a, __m512i __b)
395{
396  return __a & __b;
397}
398
399static __inline__ __m512i __DEFAULT_FN_ATTRS
400_mm512_or_si512(__m512i __a, __m512i __b)
401{
402  return __a | __b;
403}
404
405static __inline__ __m512i __DEFAULT_FN_ATTRS
406_mm512_xor_si512(__m512i __a, __m512i __b)
407{
408  return __a ^ __b;
409}
410/* Arithmetic */
411
412static __inline __m512d __DEFAULT_FN_ATTRS
413_mm512_add_pd(__m512d __a, __m512d __b)
414{
415  return __a + __b;
416}
417
418static __inline __m512 __DEFAULT_FN_ATTRS
419_mm512_add_ps(__m512 __a, __m512 __b)
420{
421  return __a + __b;
422}
423
424static __inline __m512d __DEFAULT_FN_ATTRS
425_mm512_mul_pd(__m512d __a, __m512d __b)
426{
427  return __a * __b;
428}
429
430static __inline __m512 __DEFAULT_FN_ATTRS
431_mm512_mul_ps(__m512 __a, __m512 __b)
432{
433  return __a * __b;
434}
435
436static __inline __m512d __DEFAULT_FN_ATTRS
437_mm512_sub_pd(__m512d __a, __m512d __b)
438{
439  return __a - __b;
440}
441
442static __inline __m512 __DEFAULT_FN_ATTRS
443_mm512_sub_ps(__m512 __a, __m512 __b)
444{
445  return __a - __b;
446}
447
448static __inline__ __m512i __DEFAULT_FN_ATTRS
449_mm512_add_epi64 (__m512i __A, __m512i __B)
450{
451  return (__m512i) ((__v8di) __A + (__v8di) __B);
452}
453
454static __inline__ __m512i __DEFAULT_FN_ATTRS
455_mm512_mask_add_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
456{
457  return (__m512i) __builtin_ia32_paddq512_mask ((__v8di) __A,
458             (__v8di) __B,
459             (__v8di) __W,
460             (__mmask8) __U);
461}
462
463static __inline__ __m512i __DEFAULT_FN_ATTRS
464_mm512_maskz_add_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
465{
466  return (__m512i) __builtin_ia32_paddq512_mask ((__v8di) __A,
467             (__v8di) __B,
468             (__v8di)
469             _mm512_setzero_si512 (),
470             (__mmask8) __U);
471}
472
473static __inline__ __m512i __DEFAULT_FN_ATTRS
474_mm512_sub_epi64 (__m512i __A, __m512i __B)
475{
476  return (__m512i) ((__v8di) __A - (__v8di) __B);
477}
478
479static __inline__ __m512i __DEFAULT_FN_ATTRS
480_mm512_mask_sub_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
481{
482  return (__m512i) __builtin_ia32_psubq512_mask ((__v8di) __A,
483             (__v8di) __B,
484             (__v8di) __W,
485             (__mmask8) __U);
486}
487
488static __inline__ __m512i __DEFAULT_FN_ATTRS
489_mm512_maskz_sub_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
490{
491  return (__m512i) __builtin_ia32_psubq512_mask ((__v8di) __A,
492             (__v8di) __B,
493             (__v8di)
494             _mm512_setzero_si512 (),
495             (__mmask8) __U);
496}
497
498static __inline__ __m512i __DEFAULT_FN_ATTRS
499_mm512_add_epi32 (__m512i __A, __m512i __B)
500{
501  return (__m512i) ((__v16si) __A + (__v16si) __B);
502}
503
504static __inline__ __m512i __DEFAULT_FN_ATTRS
505_mm512_mask_add_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
506{
507  return (__m512i) __builtin_ia32_paddd512_mask ((__v16si) __A,
508             (__v16si) __B,
509             (__v16si) __W,
510             (__mmask16) __U);
511}
512
513static __inline__ __m512i __DEFAULT_FN_ATTRS
514_mm512_maskz_add_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
515{
516  return (__m512i) __builtin_ia32_paddd512_mask ((__v16si) __A,
517             (__v16si) __B,
518             (__v16si)
519             _mm512_setzero_si512 (),
520             (__mmask16) __U);
521}
522
523static __inline__ __m512i __DEFAULT_FN_ATTRS
524_mm512_sub_epi32 (__m512i __A, __m512i __B)
525{
526  return (__m512i) ((__v16si) __A - (__v16si) __B);
527}
528
529static __inline__ __m512i __DEFAULT_FN_ATTRS
530_mm512_mask_sub_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
531{
532  return (__m512i) __builtin_ia32_psubd512_mask ((__v16si) __A,
533             (__v16si) __B,
534             (__v16si) __W,
535             (__mmask16) __U);
536}
537
538static __inline__ __m512i __DEFAULT_FN_ATTRS
539_mm512_maskz_sub_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
540{
541  return (__m512i) __builtin_ia32_psubd512_mask ((__v16si) __A,
542             (__v16si) __B,
543             (__v16si)
544             _mm512_setzero_si512 (),
545             (__mmask16) __U);
546}
547
548static  __inline__ __m512d __DEFAULT_FN_ATTRS
549_mm512_max_pd(__m512d __A, __m512d __B)
550{
551  return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
552             (__v8df) __B,
553             (__v8df)
554             _mm512_setzero_pd (),
555             (__mmask8) -1,
556             _MM_FROUND_CUR_DIRECTION);
557}
558
559static  __inline__ __m512 __DEFAULT_FN_ATTRS
560_mm512_max_ps(__m512 __A, __m512 __B)
561{
562  return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
563            (__v16sf) __B,
564            (__v16sf)
565            _mm512_setzero_ps (),
566            (__mmask16) -1,
567            _MM_FROUND_CUR_DIRECTION);
568}
569
570static __inline__ __m128 __DEFAULT_FN_ATTRS
571_mm_mask_max_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
572  return (__m128) __builtin_ia32_maxss_round ((__v4sf) __A,
573                (__v4sf) __B,
574                (__v4sf) __W,
575                (__mmask8) __U,
576                _MM_FROUND_CUR_DIRECTION);
577}
578
579static __inline__ __m128 __DEFAULT_FN_ATTRS
580_mm_maskz_max_ss(__mmask8 __U,__m128 __A, __m128 __B) {
581  return (__m128) __builtin_ia32_maxss_round ((__v4sf) __A,
582                (__v4sf) __B,
583                (__v4sf)  _mm_setzero_ps (),
584                (__mmask8) __U,
585                _MM_FROUND_CUR_DIRECTION);
586}
587
588#define _mm_max_round_ss(__A, __B, __R) __extension__ ({ \
589  (__m128) __builtin_ia32_maxss_round ((__v4sf) __A, (__v4sf) __B, \
590                (__v4sf) _mm_setzero_ps(), (__mmask8) -1, __R); })
591
592#define _mm_mask_max_round_ss(__W, __U, __A, __B, __R) __extension__ ({ \
593  (__m128) __builtin_ia32_maxss_round ((__v4sf) __A, (__v4sf) __B, \
594                (__v4sf)  __W, (__mmask8) __U,__R); })
595
596#define _mm_maskz_max_round_ss(__U, __A, __B, __R) __extension__ ({ \
597  (__m128) __builtin_ia32_maxss_round ((__v4sf) __A, (__v4sf) __B, \
598                (__v4sf)  _mm_setzero_ps(), (__mmask8) __U,__R); })
599
600static __inline__ __m128d __DEFAULT_FN_ATTRS
601_mm_mask_max_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
602  return (__m128d) __builtin_ia32_maxsd_round ((__v2df) __A,
603                (__v2df) __B,
604                (__v2df) __W,
605                (__mmask8) __U,
606                _MM_FROUND_CUR_DIRECTION);
607}
608
609static __inline__ __m128d __DEFAULT_FN_ATTRS
610_mm_maskz_max_sd(__mmask8 __U,__m128d __A, __m128d __B) {
611  return (__m128d) __builtin_ia32_maxsd_round ((__v2df) __A,
612                (__v2df) __B,
613                (__v2df)  _mm_setzero_pd (),
614                (__mmask8) __U,
615                _MM_FROUND_CUR_DIRECTION);
616}
617
618#define _mm_max_round_sd(__A, __B, __R) __extension__ ({ \
619  (__m128d) __builtin_ia32_maxsd_round ((__v2df) __A, (__v2df) __B, \
620                (__v2df) _mm_setzero_pd(), (__mmask8) -1, __R); })
621
622#define _mm_mask_max_round_sd(__W, __U, __A, __B, __R) __extension__ ({ \
623  (__m128d) __builtin_ia32_maxsd_round ((__v2df) __A, (__v2df) __B, \
624                (__v2df)  __W, (__mmask8) __U,__R); })
625
626#define _mm_maskz_max_round_sd(__U, __A, __B, __R) __extension__ ({ \
627  (__m128d) __builtin_ia32_maxsd_round ((__v2df) __A, (__v2df) __B, \
628                (__v2df)  _mm_setzero_pd(), (__mmask8) __U,__R); })
629
630static __inline __m512i
631__DEFAULT_FN_ATTRS
632_mm512_max_epi32(__m512i __A, __m512i __B)
633{
634  return (__m512i) __builtin_ia32_pmaxsd512_mask ((__v16si) __A,
635              (__v16si) __B,
636              (__v16si)
637              _mm512_setzero_si512 (),
638              (__mmask16) -1);
639}
640
641static __inline __m512i __DEFAULT_FN_ATTRS
642_mm512_max_epu32(__m512i __A, __m512i __B)
643{
644  return (__m512i) __builtin_ia32_pmaxud512_mask ((__v16si) __A,
645              (__v16si) __B,
646              (__v16si)
647              _mm512_setzero_si512 (),
648              (__mmask16) -1);
649}
650
651static __inline __m512i __DEFAULT_FN_ATTRS
652_mm512_max_epi64(__m512i __A, __m512i __B)
653{
654  return (__m512i) __builtin_ia32_pmaxsq512_mask ((__v8di) __A,
655              (__v8di) __B,
656              (__v8di)
657              _mm512_setzero_si512 (),
658              (__mmask8) -1);
659}
660
661static __inline __m512i __DEFAULT_FN_ATTRS
662_mm512_max_epu64(__m512i __A, __m512i __B)
663{
664  return (__m512i) __builtin_ia32_pmaxuq512_mask ((__v8di) __A,
665              (__v8di) __B,
666              (__v8di)
667              _mm512_setzero_si512 (),
668              (__mmask8) -1);
669}
670
671static  __inline__ __m512d __DEFAULT_FN_ATTRS
672_mm512_min_pd(__m512d __A, __m512d __B)
673{
674  return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
675             (__v8df) __B,
676             (__v8df)
677             _mm512_setzero_pd (),
678             (__mmask8) -1,
679             _MM_FROUND_CUR_DIRECTION);
680}
681
682static  __inline__ __m512 __DEFAULT_FN_ATTRS
683_mm512_min_ps(__m512 __A, __m512 __B)
684{
685  return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
686            (__v16sf) __B,
687            (__v16sf)
688            _mm512_setzero_ps (),
689            (__mmask16) -1,
690            _MM_FROUND_CUR_DIRECTION);
691}
692
693static __inline__ __m128 __DEFAULT_FN_ATTRS
694_mm_mask_min_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
695  return (__m128) __builtin_ia32_minss_round ((__v4sf) __A,
696                (__v4sf) __B,
697                (__v4sf) __W,
698                (__mmask8) __U,
699                _MM_FROUND_CUR_DIRECTION);
700}
701
702static __inline__ __m128 __DEFAULT_FN_ATTRS
703_mm_maskz_min_ss(__mmask8 __U,__m128 __A, __m128 __B) {
704  return (__m128) __builtin_ia32_minss_round ((__v4sf) __A,
705                (__v4sf) __B,
706                (__v4sf)  _mm_setzero_ps (),
707                (__mmask8) __U,
708                _MM_FROUND_CUR_DIRECTION);
709}
710
711#define _mm_min_round_ss(__A, __B, __R) __extension__ ({ \
712  (__m128) __builtin_ia32_minss_round ((__v4sf) __A, (__v4sf) __B, \
713                (__v4sf) _mm_setzero_ps(), (__mmask8) -1, __R); })
714
715#define _mm_mask_min_round_ss(__W, __U, __A, __B, __R) __extension__ ({ \
716  (__m128) __builtin_ia32_minss_round ((__v4sf) __A, (__v4sf) __B, \
717                (__v4sf)  __W, (__mmask8) __U,__R); })
718
719#define _mm_maskz_min_round_ss(__U, __A, __B, __R) __extension__ ({ \
720  (__m128) __builtin_ia32_minss_round ((__v4sf) __A, (__v4sf) __B, \
721                (__v4sf)  _mm_setzero_ps(), (__mmask8) __U,__R); })
722
723static __inline__ __m128d __DEFAULT_FN_ATTRS
724_mm_mask_min_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
725  return (__m128d) __builtin_ia32_minsd_round ((__v2df) __A,
726                (__v2df) __B,
727                (__v2df) __W,
728                (__mmask8) __U,
729                _MM_FROUND_CUR_DIRECTION);
730}
731
732static __inline__ __m128d __DEFAULT_FN_ATTRS
733_mm_maskz_min_sd(__mmask8 __U,__m128d __A, __m128d __B) {
734  return (__m128d) __builtin_ia32_minsd_round ((__v2df) __A,
735                (__v2df) __B,
736                (__v2df)  _mm_setzero_pd (),
737                (__mmask8) __U,
738                _MM_FROUND_CUR_DIRECTION);
739}
740
741#define _mm_min_round_sd(__A, __B, __R) __extension__ ({ \
742  (__m128d) __builtin_ia32_minsd_round ((__v2df) __A, (__v2df) __B, \
743                (__v2df) _mm_setzero_pd(), (__mmask8) -1, __R); })
744
745#define _mm_mask_min_round_sd(__W, __U, __A, __B, __R) __extension__ ({ \
746  (__m128d) __builtin_ia32_minsd_round ((__v2df) __A, (__v2df) __B, \
747                (__v2df)  __W, (__mmask8) __U,__R); })
748
749#define _mm_maskz_min_round_sd(__U, __A, __B, __R) __extension__ ({ \
750  (__m128d) __builtin_ia32_minsd_round ((__v2df) __A, (__v2df) __B, \
751                (__v2df)  _mm_setzero_pd(), (__mmask8) __U,__R); })
752
753static __inline __m512i
754__DEFAULT_FN_ATTRS
755_mm512_min_epi32(__m512i __A, __m512i __B)
756{
757  return (__m512i) __builtin_ia32_pminsd512_mask ((__v16si) __A,
758              (__v16si) __B,
759              (__v16si)
760              _mm512_setzero_si512 (),
761              (__mmask16) -1);
762}
763
764static __inline __m512i __DEFAULT_FN_ATTRS
765_mm512_min_epu32(__m512i __A, __m512i __B)
766{
767  return (__m512i) __builtin_ia32_pminud512_mask ((__v16si) __A,
768              (__v16si) __B,
769              (__v16si)
770              _mm512_setzero_si512 (),
771              (__mmask16) -1);
772}
773
774static __inline __m512i __DEFAULT_FN_ATTRS
775_mm512_min_epi64(__m512i __A, __m512i __B)
776{
777  return (__m512i) __builtin_ia32_pminsq512_mask ((__v8di) __A,
778              (__v8di) __B,
779              (__v8di)
780              _mm512_setzero_si512 (),
781              (__mmask8) -1);
782}
783
784static __inline __m512i __DEFAULT_FN_ATTRS
785_mm512_min_epu64(__m512i __A, __m512i __B)
786{
787  return (__m512i) __builtin_ia32_pminuq512_mask ((__v8di) __A,
788              (__v8di) __B,
789              (__v8di)
790              _mm512_setzero_si512 (),
791              (__mmask8) -1);
792}
793
794static __inline __m512i __DEFAULT_FN_ATTRS
795_mm512_mul_epi32(__m512i __X, __m512i __Y)
796{
797  return (__m512i) __builtin_ia32_pmuldq512_mask ((__v16si) __X,
798              (__v16si) __Y,
799              (__v8di)
800              _mm512_setzero_si512 (),
801              (__mmask8) -1);
802}
803
804static __inline __m512i __DEFAULT_FN_ATTRS
805_mm512_mask_mul_epi32 (__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y)
806{
807  return (__m512i) __builtin_ia32_pmuldq512_mask ((__v16si) __X,
808              (__v16si) __Y,
809              (__v8di) __W, __M);
810}
811
812static __inline __m512i __DEFAULT_FN_ATTRS
813_mm512_maskz_mul_epi32 (__mmask8 __M, __m512i __X, __m512i __Y)
814{
815  return (__m512i) __builtin_ia32_pmuldq512_mask ((__v16si) __X,
816              (__v16si) __Y,
817              (__v8di)
818              _mm512_setzero_si512 (),
819              __M);
820}
821
822static __inline __m512i __DEFAULT_FN_ATTRS
823_mm512_mul_epu32(__m512i __X, __m512i __Y)
824{
825  return (__m512i) __builtin_ia32_pmuludq512_mask ((__v16si) __X,
826               (__v16si) __Y,
827               (__v8di)
828               _mm512_setzero_si512 (),
829               (__mmask8) -1);
830}
831
832static __inline __m512i __DEFAULT_FN_ATTRS
833_mm512_mask_mul_epu32 (__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y)
834{
835  return (__m512i) __builtin_ia32_pmuludq512_mask ((__v16si) __X,
836               (__v16si) __Y,
837               (__v8di) __W, __M);
838}
839
840static __inline __m512i __DEFAULT_FN_ATTRS
841_mm512_maskz_mul_epu32 (__mmask8 __M, __m512i __X, __m512i __Y)
842{
843  return (__m512i) __builtin_ia32_pmuludq512_mask ((__v16si) __X,
844               (__v16si) __Y,
845               (__v8di)
846               _mm512_setzero_si512 (),
847               __M);
848}
849
850static __inline __m512i __DEFAULT_FN_ATTRS
851_mm512_mullo_epi32 (__m512i __A, __m512i __B)
852{
853  return (__m512i) ((__v16si) __A * (__v16si) __B);
854}
855
856static __inline __m512i __DEFAULT_FN_ATTRS
857_mm512_maskz_mullo_epi32 (__mmask16 __M, __m512i __A, __m512i __B)
858{
859  return (__m512i) __builtin_ia32_pmulld512_mask ((__v16si) __A,
860              (__v16si) __B,
861              (__v16si)
862              _mm512_setzero_si512 (),
863              __M);
864}
865
866static __inline __m512i __DEFAULT_FN_ATTRS
867_mm512_mask_mullo_epi32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
868{
869  return (__m512i) __builtin_ia32_pmulld512_mask ((__v16si) __A,
870              (__v16si) __B,
871              (__v16si) __W, __M);
872}
873
874static  __inline__ __m512d __DEFAULT_FN_ATTRS
875_mm512_sqrt_pd(__m512d __a)
876{
877  return (__m512d)__builtin_ia32_sqrtpd512_mask((__v8df)__a,
878                                                (__v8df) _mm512_setzero_pd (),
879                                                (__mmask8) -1,
880                                                _MM_FROUND_CUR_DIRECTION);
881}
882
883static  __inline__ __m512 __DEFAULT_FN_ATTRS
884_mm512_sqrt_ps(__m512 __a)
885{
886  return (__m512)__builtin_ia32_sqrtps512_mask((__v16sf)__a,
887                                               (__v16sf) _mm512_setzero_ps (),
888                                               (__mmask16) -1,
889                                               _MM_FROUND_CUR_DIRECTION);
890}
891
892static  __inline__ __m512d __DEFAULT_FN_ATTRS
893_mm512_rsqrt14_pd(__m512d __A)
894{
895  return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
896                 (__v8df)
897                 _mm512_setzero_pd (),
898                 (__mmask8) -1);}
899
900static  __inline__ __m512 __DEFAULT_FN_ATTRS
901_mm512_rsqrt14_ps(__m512 __A)
902{
903  return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
904                (__v16sf)
905                _mm512_setzero_ps (),
906                (__mmask16) -1);
907}
908
909static  __inline__ __m128 __DEFAULT_FN_ATTRS
910_mm_rsqrt14_ss(__m128 __A, __m128 __B)
911{
912  return (__m128) __builtin_ia32_rsqrt14ss ((__v4sf) __A,
913             (__v4sf) __B,
914             (__v4sf)
915             _mm_setzero_ps (),
916             (__mmask8) -1);
917}
918
919static  __inline__ __m128d __DEFAULT_FN_ATTRS
920_mm_rsqrt14_sd(__m128d __A, __m128d __B)
921{
922  return (__m128d) __builtin_ia32_rsqrt14sd ((__v2df) __A,
923              (__v2df) __B,
924              (__v2df)
925              _mm_setzero_pd (),
926              (__mmask8) -1);
927}
928
929static  __inline__ __m512d __DEFAULT_FN_ATTRS
930_mm512_rcp14_pd(__m512d __A)
931{
932  return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
933               (__v8df)
934               _mm512_setzero_pd (),
935               (__mmask8) -1);
936}
937
938static  __inline__ __m512 __DEFAULT_FN_ATTRS
939_mm512_rcp14_ps(__m512 __A)
940{
941  return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
942              (__v16sf)
943              _mm512_setzero_ps (),
944              (__mmask16) -1);
945}
946static  __inline__ __m128 __DEFAULT_FN_ATTRS
947_mm_rcp14_ss(__m128 __A, __m128 __B)
948{
949  return (__m128) __builtin_ia32_rcp14ss ((__v4sf) __A,
950                 (__v4sf) __B,
951                 (__v4sf)
952                 _mm_setzero_ps (),
953                 (__mmask8) -1);
954}
955
956static  __inline__ __m128d __DEFAULT_FN_ATTRS
957_mm_rcp14_sd(__m128d __A, __m128d __B)
958{
959  return (__m128d) __builtin_ia32_rcp14sd ((__v2df) __A,
960            (__v2df) __B,
961            (__v2df)
962            _mm_setzero_pd (),
963            (__mmask8) -1);
964}
965
966static __inline __m512 __DEFAULT_FN_ATTRS
967_mm512_floor_ps(__m512 __A)
968{
969  return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
970                                                  _MM_FROUND_FLOOR,
971                                                  (__v16sf) __A, -1,
972                                                  _MM_FROUND_CUR_DIRECTION);
973}
974
975static __inline __m512d __DEFAULT_FN_ATTRS
976_mm512_floor_pd(__m512d __A)
977{
978  return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
979                                                   _MM_FROUND_FLOOR,
980                                                   (__v8df) __A, -1,
981                                                   _MM_FROUND_CUR_DIRECTION);
982}
983
984static __inline __m512 __DEFAULT_FN_ATTRS
985_mm512_ceil_ps(__m512 __A)
986{
987  return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
988                                                  _MM_FROUND_CEIL,
989                                                  (__v16sf) __A, -1,
990                                                  _MM_FROUND_CUR_DIRECTION);
991}
992
993static __inline __m512d __DEFAULT_FN_ATTRS
994_mm512_ceil_pd(__m512d __A)
995{
996  return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
997                                                   _MM_FROUND_CEIL,
998                                                   (__v8df) __A, -1,
999                                                   _MM_FROUND_CUR_DIRECTION);
1000}
1001
1002static __inline __m512i __DEFAULT_FN_ATTRS
1003_mm512_abs_epi64(__m512i __A)
1004{
1005  return (__m512i) __builtin_ia32_pabsq512_mask ((__v8di) __A,
1006             (__v8di)
1007             _mm512_setzero_si512 (),
1008             (__mmask8) -1);
1009}
1010
1011static __inline __m512i __DEFAULT_FN_ATTRS
1012_mm512_abs_epi32(__m512i __A)
1013{
1014  return (__m512i) __builtin_ia32_pabsd512_mask ((__v16si) __A,
1015             (__v16si)
1016             _mm512_setzero_si512 (),
1017             (__mmask16) -1);
1018}
1019
1020static __inline__ __m128 __DEFAULT_FN_ATTRS
1021_mm_mask_add_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
1022  return (__m128) __builtin_ia32_addss_round ((__v4sf) __A,
1023                (__v4sf) __B,
1024                (__v4sf) __W,
1025                (__mmask8) __U,
1026                _MM_FROUND_CUR_DIRECTION);
1027}
1028
1029static __inline__ __m128 __DEFAULT_FN_ATTRS
1030_mm_maskz_add_ss(__mmask8 __U,__m128 __A, __m128 __B) {
1031  return (__m128) __builtin_ia32_addss_round ((__v4sf) __A,
1032                (__v4sf) __B,
1033                (__v4sf)  _mm_setzero_ps (),
1034                (__mmask8) __U,
1035                _MM_FROUND_CUR_DIRECTION);
1036}
1037
1038#define _mm_add_round_ss(__A, __B, __R) __extension__ ({ \
1039  (__m128) __builtin_ia32_addss_round ((__v4sf) __A, (__v4sf) __B, \
1040                (__v4sf) _mm_setzero_ps(), (__mmask8) -1, __R); })
1041
1042#define _mm_mask_add_round_ss(__W, __U, __A, __B, __R) __extension__ ({ \
1043  (__m128) __builtin_ia32_addss_round ((__v4sf) __A, (__v4sf) __B, \
1044                (__v4sf)  __W, (__mmask8) __U,__R); })
1045
1046#define _mm_maskz_add_round_ss(__U, __A, __B, __R) __extension__ ({ \
1047  (__m128) __builtin_ia32_addss_round ((__v4sf) __A, (__v4sf) __B, \
1048                (__v4sf)  _mm_setzero_ps(), (__mmask8) __U,__R); })
1049
1050static __inline__ __m128d __DEFAULT_FN_ATTRS
1051_mm_mask_add_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
1052  return (__m128d) __builtin_ia32_addsd_round ((__v2df) __A,
1053                (__v2df) __B,
1054                (__v2df) __W,
1055                (__mmask8) __U,
1056                _MM_FROUND_CUR_DIRECTION);
1057}
1058
1059static __inline__ __m128d __DEFAULT_FN_ATTRS
1060_mm_maskz_add_sd(__mmask8 __U,__m128d __A, __m128d __B) {
1061  return (__m128d) __builtin_ia32_addsd_round ((__v2df) __A,
1062                (__v2df) __B,
1063                (__v2df)  _mm_setzero_pd (),
1064                (__mmask8) __U,
1065                _MM_FROUND_CUR_DIRECTION);
1066}
1067#define _mm_add_round_sd(__A, __B, __R) __extension__ ({ \
1068  (__m128d) __builtin_ia32_addsd_round ((__v2df) __A, (__v2df) __B, \
1069                (__v2df) _mm_setzero_pd(), (__mmask8) -1, __R); })
1070
1071#define _mm_mask_add_round_sd(__W, __U, __A, __B, __R) __extension__ ({ \
1072  (__m128d) __builtin_ia32_addsd_round ((__v2df) __A, (__v2df) __B, \
1073                (__v2df)  __W, (__mmask8) __U,__R); })
1074
1075#define _mm_maskz_add_round_sd(__U, __A, __B, __R) __extension__ ({ \
1076  (__m128d) __builtin_ia32_addsd_round ((__v2df) __A, (__v2df) __B, \
1077                (__v2df)  _mm_setzero_pd(), (__mmask8) __U,__R); })
1078
1079static __inline__ __m512d __DEFAULT_FN_ATTRS
1080_mm512_mask_add_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
1081  return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A,
1082             (__v8df) __B,
1083             (__v8df) __W,
1084             (__mmask8) __U,
1085             _MM_FROUND_CUR_DIRECTION);
1086}
1087
1088static __inline__ __m512d __DEFAULT_FN_ATTRS
1089_mm512_maskz_add_pd(__mmask8 __U, __m512d __A, __m512d __B) {
1090  return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A,
1091             (__v8df) __B,
1092             (__v8df) _mm512_setzero_pd (),
1093             (__mmask8) __U,
1094             _MM_FROUND_CUR_DIRECTION);
1095}
1096
1097static __inline__ __m512 __DEFAULT_FN_ATTRS
1098_mm512_mask_add_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
1099  return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A,
1100            (__v16sf) __B,
1101            (__v16sf) __W,
1102            (__mmask16) __U,
1103            _MM_FROUND_CUR_DIRECTION);
1104}
1105
1106static __inline__ __m512 __DEFAULT_FN_ATTRS
1107_mm512_maskz_add_ps(__mmask16 __U, __m512 __A, __m512 __B) {
1108  return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A,
1109            (__v16sf) __B,
1110            (__v16sf) _mm512_setzero_ps (),
1111            (__mmask16) __U,
1112            _MM_FROUND_CUR_DIRECTION);
1113}
1114
1115#define _mm512_add_round_pd(__A, __B, __R) __extension__ ({ \
1116  (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A, (__v8df) __B, \
1117               (__v8df) _mm512_setzero_pd(), (__mmask8) -1, __R); })
1118
1119#define _mm512_mask_add_round_pd(__W, __U, __A, __B, __R) __extension__ ({ \
1120  (__m512d) __builtin_ia32_addpd512_mask((__v8df) __A, (__v8df) __B, \
1121                (__v8df) __W, (__mmask8) __U, __R); })
1122
1123#define _mm512_maskz_add_round_pd(__U, __A, __B, __R) __extension__ ({ \
1124  (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A, (__v8df) __B, \
1125                (__v8df) _mm512_setzero_pd(), (__mmask8) __U, __R); })
1126
1127#define _mm512_add_round_ps(__A, __B, __R) __extension__ ({ \
1128  (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A, (__v16sf) __B, \
1129                (__v16sf) _mm512_setzero_ps(), (__mmask16) -1, __R); })
1130
1131#define _mm512_mask_add_round_ps(__W, __U, __A, __B, __R) __extension__ ({ \
1132  (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A, (__v16sf) __B, \
1133                (__v16sf) __W, (__mmask16)__U, __R); })
1134
1135#define _mm512_maskz_add_round_ps(__U, __A, __B, __R) __extension__ ({ \
1136  (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A, (__v16sf) __B, \
1137                (__v16sf) _mm512_setzero_ps(), (__mmask16)__U, __R); })
1138
1139static __inline__ __m128 __DEFAULT_FN_ATTRS
1140_mm_mask_sub_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
1141  return (__m128) __builtin_ia32_subss_round ((__v4sf) __A,
1142                (__v4sf) __B,
1143                (__v4sf) __W,
1144                (__mmask8) __U,
1145                _MM_FROUND_CUR_DIRECTION);
1146}
1147
1148static __inline__ __m128 __DEFAULT_FN_ATTRS
1149_mm_maskz_sub_ss(__mmask8 __U,__m128 __A, __m128 __B) {
1150  return (__m128) __builtin_ia32_subss_round ((__v4sf) __A,
1151                (__v4sf) __B,
1152                (__v4sf)  _mm_setzero_ps (),
1153                (__mmask8) __U,
1154                _MM_FROUND_CUR_DIRECTION);
1155}
1156#define _mm_sub_round_ss(__A, __B, __R) __extension__ ({ \
1157  (__m128) __builtin_ia32_subss_round ((__v4sf) __A, (__v4sf) __B, \
1158                (__v4sf) _mm_setzero_ps(), (__mmask8) -1, __R); })
1159
1160#define _mm_mask_sub_round_ss(__W, __U, __A, __B, __R) __extension__ ({ \
1161  (__m128) __builtin_ia32_subss_round ((__v4sf) __A, (__v4sf) __B, \
1162                (__v4sf)  __W, (__mmask8) __U,__R); })
1163
1164#define _mm_maskz_sub_round_ss(__U, __A, __B, __R) __extension__ ({ \
1165  (__m128) __builtin_ia32_subss_round ((__v4sf) __A, (__v4sf) __B, \
1166                (__v4sf)  _mm_setzero_ps(), (__mmask8) __U,__R); })
1167
1168static __inline__ __m128d __DEFAULT_FN_ATTRS
1169_mm_mask_sub_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
1170  return (__m128d) __builtin_ia32_subsd_round ((__v2df) __A,
1171                (__v2df) __B,
1172                (__v2df) __W,
1173                (__mmask8) __U,
1174                _MM_FROUND_CUR_DIRECTION);
1175}
1176
1177static __inline__ __m128d __DEFAULT_FN_ATTRS
1178_mm_maskz_sub_sd(__mmask8 __U,__m128d __A, __m128d __B) {
1179  return (__m128d) __builtin_ia32_subsd_round ((__v2df) __A,
1180                (__v2df) __B,
1181                (__v2df)  _mm_setzero_pd (),
1182                (__mmask8) __U,
1183                _MM_FROUND_CUR_DIRECTION);
1184}
1185
1186#define _mm_sub_round_sd(__A, __B, __R) __extension__ ({ \
1187  (__m128d) __builtin_ia32_subsd_round ((__v2df) __A, (__v2df) __B, \
1188                (__v2df) _mm_setzero_pd(), (__mmask8) -1, __R); })
1189
1190#define _mm_mask_sub_round_sd(__W, __U, __A, __B, __R) __extension__ ({ \
1191  (__m128d) __builtin_ia32_subsd_round ((__v2df) __A, (__v2df) __B, \
1192                (__v2df)  __W, (__mmask8) __U,__R); })
1193
1194#define _mm_maskz_sub_round_sd(__U, __A, __B, __R) __extension__ ({ \
1195  (__m128d) __builtin_ia32_subsd_round ((__v2df) __A, (__v2df) __B, \
1196                (__v2df)  _mm_setzero_pd(), (__mmask8) __U,__R); })
1197
1198static __inline__ __m512d __DEFAULT_FN_ATTRS
1199_mm512_mask_sub_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
1200  return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A,
1201             (__v8df) __B,
1202             (__v8df) __W,
1203             (__mmask8) __U,
1204             _MM_FROUND_CUR_DIRECTION);
1205}
1206
1207static __inline__ __m512d __DEFAULT_FN_ATTRS
1208_mm512_maskz_sub_pd(__mmask8 __U, __m512d __A, __m512d __B) {
1209  return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A,
1210             (__v8df) __B,
1211             (__v8df)
1212             _mm512_setzero_pd (),
1213             (__mmask8) __U,
1214             _MM_FROUND_CUR_DIRECTION);
1215}
1216
1217static __inline__ __m512 __DEFAULT_FN_ATTRS
1218_mm512_mask_sub_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
1219  return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A,
1220            (__v16sf) __B,
1221            (__v16sf) __W,
1222            (__mmask16) __U,
1223            _MM_FROUND_CUR_DIRECTION);
1224}
1225
1226static __inline__ __m512 __DEFAULT_FN_ATTRS
1227_mm512_maskz_sub_ps(__mmask16 __U, __m512 __A, __m512 __B) {
1228  return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A,
1229            (__v16sf) __B,
1230            (__v16sf)
1231            _mm512_setzero_ps (),
1232            (__mmask16) __U,
1233            _MM_FROUND_CUR_DIRECTION);
1234}
1235
1236#define _mm512_sub_round_pd(__A, __B, __R) __extension__ ({ \
1237  (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A, (__v8df) __B,\
1238             (__v8df) _mm512_setzero_pd(), (__mmask8) -1, __R); })
1239
1240#define _mm512_mask_sub_round_pd(__W, __U, __A, __B, __R) __extension__ ({ \
1241  (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A, (__v8df) __B, \
1242             (__v8df) __W, (__mmask8) __U, __R); })
1243
1244#define _mm512_maskz_sub_round_pd(__U, __A, __B, __R) __extension__ ({ \
1245   (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A, (__v8df) __B, \
1246             (__v8df) _mm512_setzero_pd(), (__mmask8) __U, __R);})
1247
1248#define _mm512_sub_round_ps(__A, __B, __R) __extension__ ({ \
1249  (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A, (__v16sf) __B, \
1250            (__v16sf) _mm512_setzero_ps (), (__mmask16) -1, __R);})
1251
1252#define _mm512_mask_sub_round_ps(__W, __U, __A, __B, __R)  __extension__ ({ \
1253  (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A, (__v16sf) __B, \
1254            (__v16sf) __W, (__mmask16) __U, __R); });
1255
1256#define _mm512_maskz_sub_round_ps(__U, __A, __B, __R)  __extension__ ({ \
1257  (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A, (__v16sf) __B, \
1258            (__v16sf) _mm512_setzero_ps (), (__mmask16) __U, __R);});
1259
1260static __inline__ __m128 __DEFAULT_FN_ATTRS
1261_mm_mask_mul_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
1262  return (__m128) __builtin_ia32_mulss_round ((__v4sf) __A,
1263                (__v4sf) __B,
1264                (__v4sf) __W,
1265                (__mmask8) __U,
1266                _MM_FROUND_CUR_DIRECTION);
1267}
1268
1269static __inline__ __m128 __DEFAULT_FN_ATTRS
1270_mm_maskz_mul_ss(__mmask8 __U,__m128 __A, __m128 __B) {
1271  return (__m128) __builtin_ia32_mulss_round ((__v4sf) __A,
1272                (__v4sf) __B,
1273                (__v4sf)  _mm_setzero_ps (),
1274                (__mmask8) __U,
1275                _MM_FROUND_CUR_DIRECTION);
1276}
1277#define _mm_mul_round_ss(__A, __B, __R) __extension__ ({ \
1278  (__m128) __builtin_ia32_mulss_round ((__v4sf) __A, (__v4sf) __B, \
1279                (__v4sf) _mm_setzero_ps(), (__mmask8) -1, __R); })
1280
1281#define _mm_mask_mul_round_ss(__W, __U, __A, __B, __R) __extension__ ({ \
1282  (__m128) __builtin_ia32_mulss_round ((__v4sf) __A, (__v4sf) __B, \
1283                (__v4sf)  __W, (__mmask8) __U,__R); })
1284
1285#define _mm_maskz_mul_round_ss(__U, __A, __B, __R) __extension__ ({ \
1286  (__m128) __builtin_ia32_mulss_round ((__v4sf) __A, (__v4sf) __B, \
1287                (__v4sf)  _mm_setzero_ps(), (__mmask8) __U,__R); })
1288
1289static __inline__ __m128d __DEFAULT_FN_ATTRS
1290_mm_mask_mul_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
1291  return (__m128d) __builtin_ia32_mulsd_round ((__v2df) __A,
1292                (__v2df) __B,
1293                (__v2df) __W,
1294                (__mmask8) __U,
1295                _MM_FROUND_CUR_DIRECTION);
1296}
1297
1298static __inline__ __m128d __DEFAULT_FN_ATTRS
1299_mm_maskz_mul_sd(__mmask8 __U,__m128d __A, __m128d __B) {
1300  return (__m128d) __builtin_ia32_mulsd_round ((__v2df) __A,
1301                (__v2df) __B,
1302                (__v2df)  _mm_setzero_pd (),
1303                (__mmask8) __U,
1304                _MM_FROUND_CUR_DIRECTION);
1305}
1306
1307#define _mm_mul_round_sd(__A, __B, __R) __extension__ ({ \
1308  (__m128d) __builtin_ia32_mulsd_round ((__v2df) __A, (__v2df) __B, \
1309                (__v2df) _mm_setzero_pd(), (__mmask8) -1, __R); })
1310
1311#define _mm_mask_mul_round_sd(__W, __U, __A, __B, __R) __extension__ ({ \
1312  (__m128d) __builtin_ia32_mulsd_round ((__v2df) __A, (__v2df) __B, \
1313                (__v2df)  __W, (__mmask8) __U,__R); })
1314
1315#define _mm_maskz_mul_round_sd(__U, __A, __B, __R) __extension__ ({ \
1316  (__m128d) __builtin_ia32_mulsd_round ((__v2df) __A, (__v2df) __B, \
1317                (__v2df)  _mm_setzero_pd(), (__mmask8) __U,__R); })
1318
1319static __inline__ __m512d __DEFAULT_FN_ATTRS
1320_mm512_mask_mul_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
1321  return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A,
1322             (__v8df) __B,
1323             (__v8df) __W,
1324             (__mmask8) __U,
1325             _MM_FROUND_CUR_DIRECTION);
1326}
1327
1328static __inline__ __m512d __DEFAULT_FN_ATTRS
1329_mm512_maskz_mul_pd(__mmask8 __U, __m512d __A, __m512d __B) {
1330  return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A,
1331             (__v8df) __B,
1332             (__v8df)
1333             _mm512_setzero_pd (),
1334             (__mmask8) __U,
1335             _MM_FROUND_CUR_DIRECTION);
1336}
1337
1338static __inline__ __m512 __DEFAULT_FN_ATTRS
1339_mm512_mask_mul_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
1340  return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A,
1341            (__v16sf) __B,
1342            (__v16sf) __W,
1343            (__mmask16) __U,
1344            _MM_FROUND_CUR_DIRECTION);
1345}
1346
1347static __inline__ __m512 __DEFAULT_FN_ATTRS
1348_mm512_maskz_mul_ps(__mmask16 __U, __m512 __A, __m512 __B) {
1349  return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A,
1350            (__v16sf) __B,
1351            (__v16sf)
1352            _mm512_setzero_ps (),
1353            (__mmask16) __U,
1354            _MM_FROUND_CUR_DIRECTION);
1355}
1356
1357#define _mm512_mul_round_pd(__A, __B, __R) __extension__ ({ \
1358  (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A, (__v8df) __B,\
1359             (__v8df) _mm512_setzero_pd(), (__mmask8) -1, __R); })
1360
1361#define _mm512_mask_mul_round_pd(__W, __U, __A, __B, __R) __extension__ ({ \
1362  (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A, (__v8df) __B, \
1363             (__v8df) __W, (__mmask8) __U, __R); })
1364
1365#define _mm512_maskz_mul_round_pd(__U, __A, __B, __R) __extension__ ({ \
1366   (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A, (__v8df) __B, \
1367             (__v8df) _mm512_setzero_pd(), (__mmask8) __U, __R);})
1368
1369#define _mm512_mul_round_ps(__A, __B, __R) __extension__ ({ \
1370  (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A, (__v16sf) __B, \
1371            (__v16sf) _mm512_setzero_ps (), (__mmask16) -1, __R);})
1372
1373#define _mm512_mask_mul_round_ps(__W, __U, __A, __B, __R)  __extension__ ({ \
1374  (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A, (__v16sf) __B, \
1375            (__v16sf) __W, (__mmask16) __U, __R); });
1376
1377#define _mm512_maskz_mul_round_ps(__U, __A, __B, __R)  __extension__ ({ \
1378  (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A, (__v16sf) __B, \
1379            (__v16sf) _mm512_setzero_ps (), (__mmask16) __U, __R);});
1380
1381static __inline__ __m128 __DEFAULT_FN_ATTRS
1382_mm_mask_div_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
1383  return (__m128) __builtin_ia32_divss_round ((__v4sf) __A,
1384                (__v4sf) __B,
1385                (__v4sf) __W,
1386                (__mmask8) __U,
1387                _MM_FROUND_CUR_DIRECTION);
1388}
1389
1390static __inline__ __m128 __DEFAULT_FN_ATTRS
1391_mm_maskz_div_ss(__mmask8 __U,__m128 __A, __m128 __B) {
1392  return (__m128) __builtin_ia32_divss_round ((__v4sf) __A,
1393                (__v4sf) __B,
1394                (__v4sf)  _mm_setzero_ps (),
1395                (__mmask8) __U,
1396                _MM_FROUND_CUR_DIRECTION);
1397}
1398
1399#define _mm_div_round_ss(__A, __B, __R) __extension__ ({ \
1400  (__m128) __builtin_ia32_divss_round ((__v4sf) __A, (__v4sf) __B, \
1401                (__v4sf) _mm_setzero_ps(), (__mmask8) -1, __R); })
1402
1403#define _mm_mask_div_round_ss(__W, __U, __A, __B, __R) __extension__ ({ \
1404  (__m128) __builtin_ia32_divss_round ((__v4sf) __A, (__v4sf) __B, \
1405                (__v4sf)  __W, (__mmask8) __U,__R); })
1406
1407#define _mm_maskz_div_round_ss(__U, __A, __B, __R) __extension__ ({ \
1408  (__m128) __builtin_ia32_divss_round ((__v4sf) __A, (__v4sf) __B, \
1409                (__v4sf)  _mm_setzero_ps(), (__mmask8) __U,__R); })
1410
1411static __inline__ __m128d __DEFAULT_FN_ATTRS
1412_mm_mask_div_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
1413  return (__m128d) __builtin_ia32_divsd_round ((__v2df) __A,
1414                (__v2df) __B,
1415                (__v2df) __W,
1416                (__mmask8) __U,
1417                _MM_FROUND_CUR_DIRECTION);
1418}
1419
1420static __inline__ __m128d __DEFAULT_FN_ATTRS
1421_mm_maskz_div_sd(__mmask8 __U,__m128d __A, __m128d __B) {
1422  return (__m128d) __builtin_ia32_divsd_round ((__v2df) __A,
1423                (__v2df) __B,
1424                (__v2df)  _mm_setzero_pd (),
1425                (__mmask8) __U,
1426                _MM_FROUND_CUR_DIRECTION);
1427}
1428
1429#define _mm_div_round_sd(__A, __B, __R) __extension__ ({ \
1430  (__m128d) __builtin_ia32_divsd_round ((__v2df) __A, (__v2df) __B, \
1431                (__v2df) _mm_setzero_pd(), (__mmask8) -1, __R); })
1432
1433#define _mm_mask_div_round_sd(__W, __U, __A, __B, __R) __extension__ ({ \
1434  (__m128d) __builtin_ia32_divsd_round ((__v2df) __A, (__v2df) __B, \
1435                (__v2df)  __W, (__mmask8) __U,__R); })
1436
1437#define _mm_maskz_div_round_sd(__U, __A, __B, __R) __extension__ ({ \
1438  (__m128d) __builtin_ia32_divsd_round ((__v2df) __A, (__v2df) __B, \
1439                (__v2df)  _mm_setzero_pd(), (__mmask8) __U,__R); })
1440
1441static __inline__ __m512d __DEFAULT_FN_ATTRS
1442_mm512_mask_div_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
1443  return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __A,
1444             (__v8df) __B,
1445             (__v8df) __W,
1446             (__mmask8) __U,
1447             _MM_FROUND_CUR_DIRECTION);
1448}
1449
1450static __inline__ __m512d __DEFAULT_FN_ATTRS
1451_mm512_maskz_div_pd(__mmask8 __U, __m512d __A, __m512d __B) {
1452  return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __A,
1453             (__v8df) __B,
1454             (__v8df)
1455             _mm512_setzero_pd (),
1456             (__mmask8) __U,
1457             _MM_FROUND_CUR_DIRECTION);
1458}
1459
1460static __inline__ __m512 __DEFAULT_FN_ATTRS
1461_mm512_mask_div_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
1462  return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A,
1463            (__v16sf) __B,
1464            (__v16sf) __W,
1465            (__mmask16) __U,
1466            _MM_FROUND_CUR_DIRECTION);
1467}
1468
1469static __inline__ __m512 __DEFAULT_FN_ATTRS
1470_mm512_maskz_div_ps(__mmask16 __U, __m512 __A, __m512 __B) {
1471  return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A,
1472            (__v16sf) __B,
1473            (__v16sf)
1474            _mm512_setzero_ps (),
1475            (__mmask16) __U,
1476            _MM_FROUND_CUR_DIRECTION);
1477}
1478
1479#define _mm512_div_round_pd(__A, __B, __R) __extension__ ({ \
1480  (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __A, (__v8df) __B,\
1481             (__v8df) _mm512_setzero_pd(), (__mmask8) -1, __R); })
1482
1483#define _mm512_mask_div_round_pd(__W, __U, __A, __B, __R) __extension__ ({ \
1484  (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __A, (__v8df) __B, \
1485             (__v8df) __W, (__mmask8) __U, __R); })
1486
1487#define _mm512_maskz_div_round_pd(__U, __A, __B, __R) __extension__ ({ \
1488   (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __A, (__v8df) __B, \
1489             (__v8df) _mm512_setzero_pd(), (__mmask8) __U, __R);})
1490
1491#define _mm512_div_round_ps(__A, __B, __R) __extension__ ({ \
1492  (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A, (__v16sf) __B, \
1493            (__v16sf) _mm512_setzero_ps (), (__mmask16) -1, __R);})
1494
1495#define _mm512_mask_div_round_ps(__W, __U, __A, __B, __R)  __extension__ ({ \
1496  (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A, (__v16sf) __B, \
1497            (__v16sf) __W, (__mmask16) __U, __R); });
1498
1499#define _mm512_maskz_div_round_ps(__U, __A, __B, __R)  __extension__ ({ \
1500  (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A, (__v16sf) __B, \
1501            (__v16sf) _mm512_setzero_ps (), (__mmask16) __U, __R);});
1502
1503#define _mm512_roundscale_ps(A, B) __extension__ ({ \
1504  (__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(A), (B), (__v16sf)(A), \
1505                                         -1, _MM_FROUND_CUR_DIRECTION); })
1506
1507#define _mm512_roundscale_pd(A, B) __extension__ ({ \
1508  (__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(A), (B), (__v8df)(A), \
1509                                          -1, _MM_FROUND_CUR_DIRECTION); })
1510
1511#define _mm512_fmadd_round_pd(A, B, C, R) __extension__ ({ \
1512  (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) (A), \
1513                                             (__v8df) (B), (__v8df) (C), \
1514                                             (__mmask8) -1, (R)); })
1515
1516
1517#define _mm512_mask_fmadd_round_pd(A, U, B, C, R) __extension__ ({ \
1518  (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) (A), \
1519                                             (__v8df) (B), (__v8df) (C), \
1520                                             (__mmask8) (U), (R)); })
1521
1522
1523#define _mm512_mask3_fmadd_round_pd(A, B, C, U, R) __extension__ ({ \
1524  (__m512d) __builtin_ia32_vfmaddpd512_mask3 ((__v8df) (A), \
1525                                              (__v8df) (B), (__v8df) (C), \
1526                                              (__mmask8) (U), (R)); })
1527
1528
1529#define _mm512_maskz_fmadd_round_pd(U, A, B, C, R) __extension__ ({ \
1530  (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) (A), \
1531                                              (__v8df) (B), (__v8df) (C), \
1532                                              (__mmask8) (U), (R)); })
1533
1534
1535#define _mm512_fmsub_round_pd(A, B, C, R) __extension__ ({ \
1536  (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) (A), \
1537                                             (__v8df) (B), -(__v8df) (C), \
1538                                             (__mmask8) -1, (R)); })
1539
1540
1541#define _mm512_mask_fmsub_round_pd(A, U, B, C, R) __extension__ ({ \
1542  (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) (A), \
1543                                             (__v8df) (B), -(__v8df) (C), \
1544                                             (__mmask8) (U), (R)); })
1545
1546
1547#define _mm512_maskz_fmsub_round_pd(U, A, B, C, R) __extension__ ({ \
1548  (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) (A), \
1549                                              (__v8df) (B), -(__v8df) (C), \
1550                                              (__mmask8) (U), (R)); })
1551
1552
1553#define _mm512_fnmadd_round_pd(A, B, C, R) __extension__ ({ \
1554  (__m512d) __builtin_ia32_vfmaddpd512_mask (-(__v8df) (A), \
1555                                             (__v8df) (B), (__v8df) (C), \
1556                                             (__mmask8) -1, (R)); })
1557
1558
1559#define _mm512_mask3_fnmadd_round_pd(A, B, C, U, R) __extension__ ({ \
1560  (__m512d) __builtin_ia32_vfmaddpd512_mask3 (-(__v8df) (A), \
1561                                              (__v8df) (B), (__v8df) (C), \
1562                                              (__mmask8) (U), (R)); })
1563
1564
1565#define _mm512_maskz_fnmadd_round_pd(U, A, B, C, R) __extension__ ({ \
1566  (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) (A), \
1567                                              (__v8df) (B), (__v8df) (C), \
1568                                              (__mmask8) (U), (R)); })
1569
1570
1571#define _mm512_fnmsub_round_pd(A, B, C, R) __extension__ ({ \
1572  (__m512d) __builtin_ia32_vfmaddpd512_mask (-(__v8df) (A), \
1573                                             (__v8df) (B), -(__v8df) (C), \
1574                                             (__mmask8) -1, (R)); })
1575
1576
1577#define _mm512_maskz_fnmsub_round_pd(U, A, B, C, R) __extension__ ({ \
1578  (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) (A), \
1579                                              (__v8df) (B), -(__v8df) (C), \
1580                                              (__mmask8) (U), (R)); })
1581
1582
1583static __inline__ __m512d __DEFAULT_FN_ATTRS
1584_mm512_fmadd_pd(__m512d __A, __m512d __B, __m512d __C)
1585{
1586  return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
1587                                                    (__v8df) __B,
1588                                                    (__v8df) __C,
1589                                                    (__mmask8) -1,
1590                                                    _MM_FROUND_CUR_DIRECTION);
1591}
1592
1593static __inline__ __m512d __DEFAULT_FN_ATTRS
1594_mm512_mask_fmadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
1595{
1596  return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
1597                                                    (__v8df) __B,
1598                                                    (__v8df) __C,
1599                                                    (__mmask8) __U,
1600                                                    _MM_FROUND_CUR_DIRECTION);
1601}
1602
1603static __inline__ __m512d __DEFAULT_FN_ATTRS
1604_mm512_mask3_fmadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
1605{
1606  return (__m512d) __builtin_ia32_vfmaddpd512_mask3 ((__v8df) __A,
1607                                                     (__v8df) __B,
1608                                                     (__v8df) __C,
1609                                                     (__mmask8) __U,
1610                                                     _MM_FROUND_CUR_DIRECTION);
1611}
1612
1613static __inline__ __m512d __DEFAULT_FN_ATTRS
1614_mm512_maskz_fmadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
1615{
1616  return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A,
1617                                                     (__v8df) __B,
1618                                                     (__v8df) __C,
1619                                                     (__mmask8) __U,
1620                                                     _MM_FROUND_CUR_DIRECTION);
1621}
1622
1623static __inline__ __m512d __DEFAULT_FN_ATTRS
1624_mm512_fmsub_pd(__m512d __A, __m512d __B, __m512d __C)
1625{
1626  return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
1627                                                    (__v8df) __B,
1628                                                    -(__v8df) __C,
1629                                                    (__mmask8) -1,
1630                                                    _MM_FROUND_CUR_DIRECTION);
1631}
1632
1633static __inline__ __m512d __DEFAULT_FN_ATTRS
1634_mm512_mask_fmsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
1635{
1636  return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
1637                                                    (__v8df) __B,
1638                                                    -(__v8df) __C,
1639                                                    (__mmask8) __U,
1640                                                    _MM_FROUND_CUR_DIRECTION);
1641}
1642
1643static __inline__ __m512d __DEFAULT_FN_ATTRS
1644_mm512_maskz_fmsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
1645{
1646  return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A,
1647                                                     (__v8df) __B,
1648                                                     -(__v8df) __C,
1649                                                     (__mmask8) __U,
1650                                                     _MM_FROUND_CUR_DIRECTION);
1651}
1652
1653static __inline__ __m512d __DEFAULT_FN_ATTRS
1654_mm512_fnmadd_pd(__m512d __A, __m512d __B, __m512d __C)
1655{
1656  return (__m512d) __builtin_ia32_vfmaddpd512_mask (-(__v8df) __A,
1657                                                    (__v8df) __B,
1658                                                    (__v8df) __C,
1659                                                    (__mmask8) -1,
1660                                                    _MM_FROUND_CUR_DIRECTION);
1661}
1662
1663static __inline__ __m512d __DEFAULT_FN_ATTRS
1664_mm512_mask3_fnmadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
1665{
1666  return (__m512d) __builtin_ia32_vfmaddpd512_mask3 (-(__v8df) __A,
1667                                                     (__v8df) __B,
1668                                                     (__v8df) __C,
1669                                                     (__mmask8) __U,
1670                                                     _MM_FROUND_CUR_DIRECTION);
1671}
1672
1673static __inline__ __m512d __DEFAULT_FN_ATTRS
1674_mm512_maskz_fnmadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
1675{
1676  return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A,
1677                                                     (__v8df) __B,
1678                                                     (__v8df) __C,
1679                                                     (__mmask8) __U,
1680                                                     _MM_FROUND_CUR_DIRECTION);
1681}
1682
1683static __inline__ __m512d __DEFAULT_FN_ATTRS
1684_mm512_fnmsub_pd(__m512d __A, __m512d __B, __m512d __C)
1685{
1686  return (__m512d) __builtin_ia32_vfmaddpd512_mask (-(__v8df) __A,
1687                                                    (__v8df) __B,
1688                                                    -(__v8df) __C,
1689                                                    (__mmask8) -1,
1690                                                    _MM_FROUND_CUR_DIRECTION);
1691}
1692
1693static __inline__ __m512d __DEFAULT_FN_ATTRS
1694_mm512_maskz_fnmsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
1695{
1696  return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A,
1697                                                     (__v8df) __B,
1698                                                     -(__v8df) __C,
1699                                                     (__mmask8) __U,
1700                                                     _MM_FROUND_CUR_DIRECTION);
1701}
1702
1703#define _mm512_fmadd_round_ps(A, B, C, R) __extension__ ({ \
1704  (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) (A), \
1705                                            (__v16sf) (B), (__v16sf) (C), \
1706                                            (__mmask16) -1, (R)); })
1707
1708
1709#define _mm512_mask_fmadd_round_ps(A, U, B, C, R) __extension__ ({ \
1710  (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) (A), \
1711                                            (__v16sf) (B), (__v16sf) (C), \
1712                                            (__mmask16) (U), (R)); })
1713
1714
1715#define _mm512_mask3_fmadd_round_ps(A, B, C, U, R) __extension__ ({ \
1716  (__m512) __builtin_ia32_vfmaddps512_mask3 ((__v16sf) (A), \
1717                                             (__v16sf) (B), (__v16sf) (C), \
1718                                             (__mmask16) (U), (R)); })
1719
1720
1721#define _mm512_maskz_fmadd_round_ps(U, A, B, C, R) __extension__ ({ \
1722  (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) (A), \
1723                                             (__v16sf) (B), (__v16sf) (C), \
1724                                             (__mmask16) (U), (R)); })
1725
1726
1727#define _mm512_fmsub_round_ps(A, B, C, R) __extension__ ({ \
1728  (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) (A), \
1729                                            (__v16sf) (B), -(__v16sf) (C), \
1730                                            (__mmask16) -1, (R)); })
1731
1732
1733#define _mm512_mask_fmsub_round_ps(A, U, B, C, R) __extension__ ({ \
1734  (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) (A), \
1735                                            (__v16sf) (B), -(__v16sf) (C), \
1736                                            (__mmask16) (U), (R)); })
1737
1738
1739#define _mm512_maskz_fmsub_round_ps(U, A, B, C, R) __extension__ ({ \
1740  (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) (A), \
1741                                             (__v16sf) (B), -(__v16sf) (C), \
1742                                             (__mmask16) (U), (R)); })
1743
1744
1745#define _mm512_fnmadd_round_ps(A, B, C, R) __extension__ ({ \
1746  (__m512) __builtin_ia32_vfmaddps512_mask (-(__v16sf) (A), \
1747                                            (__v16sf) (B), (__v16sf) (C), \
1748                                            (__mmask16) -1, (R)); })
1749
1750
1751#define _mm512_mask3_fnmadd_round_ps(A, B, C, U, R) __extension__ ({ \
1752  (__m512) __builtin_ia32_vfmaddps512_mask3 (-(__v16sf) (A), \
1753                                             (__v16sf) (B), (__v16sf) (C), \
1754                                             (__mmask16) (U), (R)); })
1755
1756
1757#define _mm512_maskz_fnmadd_round_ps(U, A, B, C, R) __extension__ ({ \
1758  (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) (A), \
1759                                             (__v16sf) (B), (__v16sf) (C), \
1760                                             (__mmask16) (U), (R)); })
1761
1762
1763#define _mm512_fnmsub_round_ps(A, B, C, R) __extension__ ({ \
1764  (__m512) __builtin_ia32_vfmaddps512_mask (-(__v16sf) (A), \
1765                                            (__v16sf) (B), -(__v16sf) (C), \
1766                                            (__mmask16) -1, (R)); })
1767
1768
1769#define _mm512_maskz_fnmsub_round_ps(U, A, B, C, R) __extension__ ({ \
1770  (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) (A), \
1771                                             (__v16sf) (B), -(__v16sf) (C), \
1772                                             (__mmask16) (U), (R)); })
1773
1774
1775static __inline__ __m512 __DEFAULT_FN_ATTRS
1776_mm512_fmadd_ps(__m512 __A, __m512 __B, __m512 __C)
1777{
1778  return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
1779                                                   (__v16sf) __B,
1780                                                   (__v16sf) __C,
1781                                                   (__mmask16) -1,
1782                                                   _MM_FROUND_CUR_DIRECTION);
1783}
1784
1785static __inline__ __m512 __DEFAULT_FN_ATTRS
1786_mm512_mask_fmadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
1787{
1788  return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
1789                                                   (__v16sf) __B,
1790                                                   (__v16sf) __C,
1791                                                   (__mmask16) __U,
1792                                                   _MM_FROUND_CUR_DIRECTION);
1793}
1794
1795static __inline__ __m512 __DEFAULT_FN_ATTRS
1796_mm512_mask3_fmadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
1797{
1798  return (__m512) __builtin_ia32_vfmaddps512_mask3 ((__v16sf) __A,
1799                                                    (__v16sf) __B,
1800                                                    (__v16sf) __C,
1801                                                    (__mmask16) __U,
1802                                                    _MM_FROUND_CUR_DIRECTION);
1803}
1804
1805static __inline__ __m512 __DEFAULT_FN_ATTRS
1806_mm512_maskz_fmadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
1807{
1808  return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A,
1809                                                    (__v16sf) __B,
1810                                                    (__v16sf) __C,
1811                                                    (__mmask16) __U,
1812                                                    _MM_FROUND_CUR_DIRECTION);
1813}
1814
1815static __inline__ __m512 __DEFAULT_FN_ATTRS
1816_mm512_fmsub_ps(__m512 __A, __m512 __B, __m512 __C)
1817{
1818  return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
1819                                                   (__v16sf) __B,
1820                                                   -(__v16sf) __C,
1821                                                   (__mmask16) -1,
1822                                                   _MM_FROUND_CUR_DIRECTION);
1823}
1824
1825static __inline__ __m512 __DEFAULT_FN_ATTRS
1826_mm512_mask_fmsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
1827{
1828  return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
1829                                                   (__v16sf) __B,
1830                                                   -(__v16sf) __C,
1831                                                   (__mmask16) __U,
1832                                                   _MM_FROUND_CUR_DIRECTION);
1833}
1834
1835static __inline__ __m512 __DEFAULT_FN_ATTRS
1836_mm512_maskz_fmsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
1837{
1838  return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A,
1839                                                    (__v16sf) __B,
1840                                                    -(__v16sf) __C,
1841                                                    (__mmask16) __U,
1842                                                    _MM_FROUND_CUR_DIRECTION);
1843}
1844
1845static __inline__ __m512 __DEFAULT_FN_ATTRS
1846_mm512_fnmadd_ps(__m512 __A, __m512 __B, __m512 __C)
1847{
1848  return (__m512) __builtin_ia32_vfmaddps512_mask (-(__v16sf) __A,
1849                                                   (__v16sf) __B,
1850                                                   (__v16sf) __C,
1851                                                   (__mmask16) -1,
1852                                                   _MM_FROUND_CUR_DIRECTION);
1853}
1854
1855static __inline__ __m512 __DEFAULT_FN_ATTRS
1856_mm512_mask3_fnmadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
1857{
1858  return (__m512) __builtin_ia32_vfmaddps512_mask3 (-(__v16sf) __A,
1859                                                    (__v16sf) __B,
1860                                                    (__v16sf) __C,
1861                                                    (__mmask16) __U,
1862                                                    _MM_FROUND_CUR_DIRECTION);
1863}
1864
1865static __inline__ __m512 __DEFAULT_FN_ATTRS
1866_mm512_maskz_fnmadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
1867{
1868  return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A,
1869                                                    (__v16sf) __B,
1870                                                    (__v16sf) __C,
1871                                                    (__mmask16) __U,
1872                                                    _MM_FROUND_CUR_DIRECTION);
1873}
1874
1875static __inline__ __m512 __DEFAULT_FN_ATTRS
1876_mm512_fnmsub_ps(__m512 __A, __m512 __B, __m512 __C)
1877{
1878  return (__m512) __builtin_ia32_vfmaddps512_mask (-(__v16sf) __A,
1879                                                   (__v16sf) __B,
1880                                                   -(__v16sf) __C,
1881                                                   (__mmask16) -1,
1882                                                   _MM_FROUND_CUR_DIRECTION);
1883}
1884
1885static __inline__ __m512 __DEFAULT_FN_ATTRS
1886_mm512_maskz_fnmsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
1887{
1888  return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A,
1889                                                    (__v16sf) __B,
1890                                                    -(__v16sf) __C,
1891                                                    (__mmask16) __U,
1892                                                    _MM_FROUND_CUR_DIRECTION);
1893}
1894
1895#define _mm512_fmaddsub_round_pd(A, B, C, R) __extension__ ({ \
1896  (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) (A), \
1897                                                (__v8df) (B), (__v8df) (C), \
1898                                                (__mmask8) -1, (R)); })
1899
1900
1901#define _mm512_mask_fmaddsub_round_pd(A, U, B, C, R) __extension__ ({ \
1902  (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) (A), \
1903                                                (__v8df) (B), (__v8df) (C), \
1904                                                (__mmask8) (U), (R)); })
1905
1906
1907#define _mm512_mask3_fmaddsub_round_pd(A, B, C, U, R) __extension__ ({ \
1908  (__m512d) __builtin_ia32_vfmaddsubpd512_mask3 ((__v8df) (A), \
1909                                                 (__v8df) (B), (__v8df) (C), \
1910                                                 (__mmask8) (U), (R)); })
1911
1912
1913#define _mm512_maskz_fmaddsub_round_pd(U, A, B, C, R) __extension__ ({ \
1914  (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) (A), \
1915                                                 (__v8df) (B), (__v8df) (C), \
1916                                                 (__mmask8) (U), (R)); })
1917
1918
1919#define _mm512_fmsubadd_round_pd(A, B, C, R) __extension__ ({ \
1920  (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) (A), \
1921                                                (__v8df) (B), -(__v8df) (C), \
1922                                                (__mmask8) -1, (R)); })
1923
1924
1925#define _mm512_mask_fmsubadd_round_pd(A, U, B, C, R) __extension__ ({ \
1926  (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) (A), \
1927                                                (__v8df) (B), -(__v8df) (C), \
1928                                                (__mmask8) (U), (R)); })
1929
1930
1931#define _mm512_maskz_fmsubadd_round_pd(U, A, B, C, R) __extension__ ({ \
1932  (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) (A), \
1933                                                 (__v8df) (B), -(__v8df) (C), \
1934                                                 (__mmask8) (U), (R)); })
1935
1936
1937static __inline__ __m512d __DEFAULT_FN_ATTRS
1938_mm512_fmaddsub_pd(__m512d __A, __m512d __B, __m512d __C)
1939{
1940  return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
1941                                                       (__v8df) __B,
1942                                                       (__v8df) __C,
1943                                                       (__mmask8) -1,
1944                                                       _MM_FROUND_CUR_DIRECTION);
1945}
1946
1947static __inline__ __m512d __DEFAULT_FN_ATTRS
1948_mm512_mask_fmaddsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
1949{
1950  return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
1951                                                       (__v8df) __B,
1952                                                       (__v8df) __C,
1953                                                       (__mmask8) __U,
1954                                                       _MM_FROUND_CUR_DIRECTION);
1955}
1956
1957static __inline__ __m512d __DEFAULT_FN_ATTRS
1958_mm512_mask3_fmaddsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
1959{
1960  return (__m512d) __builtin_ia32_vfmaddsubpd512_mask3 ((__v8df) __A,
1961                                                        (__v8df) __B,
1962                                                        (__v8df) __C,
1963                                                        (__mmask8) __U,
1964                                                        _MM_FROUND_CUR_DIRECTION);
1965}
1966
1967static __inline__ __m512d __DEFAULT_FN_ATTRS
1968_mm512_maskz_fmaddsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
1969{
1970  return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
1971                                                        (__v8df) __B,
1972                                                        (__v8df) __C,
1973                                                        (__mmask8) __U,
1974                                                        _MM_FROUND_CUR_DIRECTION);
1975}
1976
1977static __inline__ __m512d __DEFAULT_FN_ATTRS
1978_mm512_fmsubadd_pd(__m512d __A, __m512d __B, __m512d __C)
1979{
1980  return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
1981                                                       (__v8df) __B,
1982                                                       -(__v8df) __C,
1983                                                       (__mmask8) -1,
1984                                                       _MM_FROUND_CUR_DIRECTION);
1985}
1986
1987static __inline__ __m512d __DEFAULT_FN_ATTRS
1988_mm512_mask_fmsubadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
1989{
1990  return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
1991                                                       (__v8df) __B,
1992                                                       -(__v8df) __C,
1993                                                       (__mmask8) __U,
1994                                                       _MM_FROUND_CUR_DIRECTION);
1995}
1996
1997static __inline__ __m512d __DEFAULT_FN_ATTRS
1998_mm512_maskz_fmsubadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
1999{
2000  return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
2001                                                        (__v8df) __B,
2002                                                        -(__v8df) __C,
2003                                                        (__mmask8) __U,
2004                                                        _MM_FROUND_CUR_DIRECTION);
2005}
2006
2007#define _mm512_fmaddsub_round_ps(A, B, C, R) __extension__ ({ \
2008  (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) (A), \
2009                                               (__v16sf) (B), (__v16sf) (C), \
2010                                               (__mmask16) -1, (R)); })
2011
2012
2013#define _mm512_mask_fmaddsub_round_ps(A, U, B, C, R) __extension__ ({ \
2014  (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) (A), \
2015                                               (__v16sf) (B), (__v16sf) (C), \
2016                                               (__mmask16) (U), (R)); })
2017
2018
2019#define _mm512_mask3_fmaddsub_round_ps(A, B, C, U, R) __extension__ ({ \
2020  (__m512) __builtin_ia32_vfmaddsubps512_mask3 ((__v16sf) (A), \
2021                                                (__v16sf) (B), (__v16sf) (C), \
2022                                                (__mmask16) (U), (R)); })
2023
2024
2025#define _mm512_maskz_fmaddsub_round_ps(U, A, B, C, R) __extension__ ({ \
2026  (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) (A), \
2027                                                (__v16sf) (B), (__v16sf) (C), \
2028                                                (__mmask16) (U), (R)); })
2029
2030
2031#define _mm512_fmsubadd_round_ps(A, B, C, R) __extension__ ({ \
2032  (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) (A), \
2033                                               (__v16sf) (B), -(__v16sf) (C), \
2034                                               (__mmask16) -1, (R)); })
2035
2036
2037#define _mm512_mask_fmsubadd_round_ps(A, U, B, C, R) __extension__ ({ \
2038  (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) (A), \
2039                                               (__v16sf) (B), -(__v16sf) (C), \
2040                                               (__mmask16) (U), (R)); })
2041
2042
2043#define _mm512_maskz_fmsubadd_round_ps(U, A, B, C, R) __extension__ ({ \
2044  (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) (A), \
2045                                                (__v16sf) (B), -(__v16sf) (C), \
2046                                                (__mmask16) (U), (R)); })
2047
2048
2049static __inline__ __m512 __DEFAULT_FN_ATTRS
2050_mm512_fmaddsub_ps(__m512 __A, __m512 __B, __m512 __C)
2051{
2052  return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
2053                                                      (__v16sf) __B,
2054                                                      (__v16sf) __C,
2055                                                      (__mmask16) -1,
2056                                                      _MM_FROUND_CUR_DIRECTION);
2057}
2058
2059static __inline__ __m512 __DEFAULT_FN_ATTRS
2060_mm512_mask_fmaddsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
2061{
2062  return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
2063                                                      (__v16sf) __B,
2064                                                      (__v16sf) __C,
2065                                                      (__mmask16) __U,
2066                                                      _MM_FROUND_CUR_DIRECTION);
2067}
2068
2069static __inline__ __m512 __DEFAULT_FN_ATTRS
2070_mm512_mask3_fmaddsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
2071{
2072  return (__m512) __builtin_ia32_vfmaddsubps512_mask3 ((__v16sf) __A,
2073                                                       (__v16sf) __B,
2074                                                       (__v16sf) __C,
2075                                                       (__mmask16) __U,
2076                                                       _MM_FROUND_CUR_DIRECTION);
2077}
2078
2079static __inline__ __m512 __DEFAULT_FN_ATTRS
2080_mm512_maskz_fmaddsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
2081{
2082  return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
2083                                                       (__v16sf) __B,
2084                                                       (__v16sf) __C,
2085                                                       (__mmask16) __U,
2086                                                       _MM_FROUND_CUR_DIRECTION);
2087}
2088
2089static __inline__ __m512 __DEFAULT_FN_ATTRS
2090_mm512_fmsubadd_ps(__m512 __A, __m512 __B, __m512 __C)
2091{
2092  return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
2093                                                      (__v16sf) __B,
2094                                                      -(__v16sf) __C,
2095                                                      (__mmask16) -1,
2096                                                      _MM_FROUND_CUR_DIRECTION);
2097}
2098
2099static __inline__ __m512 __DEFAULT_FN_ATTRS
2100_mm512_mask_fmsubadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
2101{
2102  return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
2103                                                      (__v16sf) __B,
2104                                                      -(__v16sf) __C,
2105                                                      (__mmask16) __U,
2106                                                      _MM_FROUND_CUR_DIRECTION);
2107}
2108
2109static __inline__ __m512 __DEFAULT_FN_ATTRS
2110_mm512_maskz_fmsubadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
2111{
2112  return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
2113                                                       (__v16sf) __B,
2114                                                       -(__v16sf) __C,
2115                                                       (__mmask16) __U,
2116                                                       _MM_FROUND_CUR_DIRECTION);
2117}
2118
2119#define _mm512_mask3_fmsub_round_pd(A, B, C, U, R) __extension__ ({ \
2120  (__m512d) __builtin_ia32_vfmsubpd512_mask3 ((__v8df) (A), \
2121                                              (__v8df) (B), (__v8df) (C), \
2122                                              (__mmask8) (U), (R)); })
2123
2124
2125static __inline__ __m512d __DEFAULT_FN_ATTRS
2126_mm512_mask3_fmsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
2127{
2128  return (__m512d) __builtin_ia32_vfmsubpd512_mask3 ((__v8df) __A,
2129                                                     (__v8df) __B,
2130                                                     (__v8df) __C,
2131                                                     (__mmask8) __U,
2132                                                     _MM_FROUND_CUR_DIRECTION);
2133}
2134
2135#define _mm512_mask3_fmsub_round_ps(A, B, C, U, R) __extension__ ({ \
2136  (__m512) __builtin_ia32_vfmsubps512_mask3 ((__v16sf) (A), \
2137                                             (__v16sf) (B), (__v16sf) (C), \
2138                                             (__mmask16) (U), (R)); })
2139
2140
2141static __inline__ __m512 __DEFAULT_FN_ATTRS
2142_mm512_mask3_fmsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
2143{
2144  return (__m512) __builtin_ia32_vfmsubps512_mask3 ((__v16sf) __A,
2145                                                    (__v16sf) __B,
2146                                                    (__v16sf) __C,
2147                                                    (__mmask16) __U,
2148                                                    _MM_FROUND_CUR_DIRECTION);
2149}
2150
2151#define _mm512_mask3_fmsubadd_round_pd(A, B, C, U, R) __extension__ ({ \
2152  (__m512d) __builtin_ia32_vfmsubaddpd512_mask3 ((__v8df) (A), \
2153                                                 (__v8df) (B), (__v8df) (C), \
2154                                                 (__mmask8) (U), (R)); })
2155
2156
2157static __inline__ __m512d __DEFAULT_FN_ATTRS
2158_mm512_mask3_fmsubadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
2159{
2160  return (__m512d) __builtin_ia32_vfmsubaddpd512_mask3 ((__v8df) __A,
2161                                                        (__v8df) __B,
2162                                                        (__v8df) __C,
2163                                                        (__mmask8) __U,
2164                                                        _MM_FROUND_CUR_DIRECTION);
2165}
2166
2167#define _mm512_mask3_fmsubadd_round_ps(A, B, C, U, R) __extension__ ({ \
2168  (__m512) __builtin_ia32_vfmsubaddps512_mask3 ((__v16sf) (A), \
2169                                                (__v16sf) (B), (__v16sf) (C), \
2170                                                (__mmask16) (U), (R)); })
2171
2172
2173static __inline__ __m512 __DEFAULT_FN_ATTRS
2174_mm512_mask3_fmsubadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
2175{
2176  return (__m512) __builtin_ia32_vfmsubaddps512_mask3 ((__v16sf) __A,
2177                                                       (__v16sf) __B,
2178                                                       (__v16sf) __C,
2179                                                       (__mmask16) __U,
2180                                                       _MM_FROUND_CUR_DIRECTION);
2181}
2182
2183#define _mm512_mask_fnmadd_round_pd(A, U, B, C, R) __extension__ ({ \
2184  (__m512d) __builtin_ia32_vfnmaddpd512_mask ((__v8df) (A), \
2185                                              (__v8df) (B), (__v8df) (C), \
2186                                              (__mmask8) (U), (R)); })
2187
2188
2189static __inline__ __m512d __DEFAULT_FN_ATTRS
2190_mm512_mask_fnmadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
2191{
2192  return (__m512d) __builtin_ia32_vfnmaddpd512_mask ((__v8df) __A,
2193                                                     (__v8df) __B,
2194                                                     (__v8df) __C,
2195                                                     (__mmask8) __U,
2196                                                     _MM_FROUND_CUR_DIRECTION);
2197}
2198
2199#define _mm512_mask_fnmadd_round_ps(A, U, B, C, R) __extension__ ({ \
2200  (__m512) __builtin_ia32_vfnmaddps512_mask ((__v16sf) (A), \
2201                                             (__v16sf) (B), (__v16sf) (C), \
2202                                             (__mmask16) (U), (R)); })
2203
2204
2205static __inline__ __m512 __DEFAULT_FN_ATTRS
2206_mm512_mask_fnmadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
2207{
2208  return (__m512) __builtin_ia32_vfnmaddps512_mask ((__v16sf) __A,
2209                                                    (__v16sf) __B,
2210                                                    (__v16sf) __C,
2211                                                    (__mmask16) __U,
2212                                                    _MM_FROUND_CUR_DIRECTION);
2213}
2214
2215#define _mm512_mask_fnmsub_round_pd(A, U, B, C, R) __extension__ ({ \
2216  (__m512d) __builtin_ia32_vfnmsubpd512_mask ((__v8df) (A), \
2217                                              (__v8df) (B), (__v8df) (C), \
2218                                              (__mmask8) (U), (R)); })
2219
2220
2221#define _mm512_mask3_fnmsub_round_pd(A, B, C, U, R) __extension__ ({ \
2222  (__m512d) __builtin_ia32_vfnmsubpd512_mask3 ((__v8df) (A), \
2223                                               (__v8df) (B), (__v8df) (C), \
2224                                               (__mmask8) (U), (R)); })
2225
2226
2227static __inline__ __m512d __DEFAULT_FN_ATTRS
2228_mm512_mask_fnmsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
2229{
2230  return (__m512d) __builtin_ia32_vfnmsubpd512_mask ((__v8df) __A,
2231                                                     (__v8df) __B,
2232                                                     (__v8df) __C,
2233                                                     (__mmask8) __U,
2234                                                     _MM_FROUND_CUR_DIRECTION);
2235}
2236
2237static __inline__ __m512d __DEFAULT_FN_ATTRS
2238_mm512_mask3_fnmsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
2239{
2240  return (__m512d) __builtin_ia32_vfnmsubpd512_mask3 ((__v8df) __A,
2241                                                      (__v8df) __B,
2242                                                      (__v8df) __C,
2243                                                      (__mmask8) __U,
2244                                                      _MM_FROUND_CUR_DIRECTION);
2245}
2246
2247#define _mm512_mask_fnmsub_round_ps(A, U, B, C, R) __extension__ ({ \
2248  (__m512) __builtin_ia32_vfnmsubps512_mask ((__v16sf) (A), \
2249                                             (__v16sf) (B), (__v16sf) (C), \
2250                                             (__mmask16) (U), (R)); })
2251
2252
2253#define _mm512_mask3_fnmsub_round_ps(A, B, C, U, R) __extension__ ({ \
2254  (__m512) __builtin_ia32_vfnmsubps512_mask3 ((__v16sf) (A), \
2255                                              (__v16sf) (B), (__v16sf) (C), \
2256                                              (__mmask16) (U), (R)); })
2257
2258
2259static __inline__ __m512 __DEFAULT_FN_ATTRS
2260_mm512_mask_fnmsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
2261{
2262  return (__m512) __builtin_ia32_vfnmsubps512_mask ((__v16sf) __A,
2263                                                    (__v16sf) __B,
2264                                                    (__v16sf) __C,
2265                                                    (__mmask16) __U,
2266                                                    _MM_FROUND_CUR_DIRECTION);
2267}
2268
2269static __inline__ __m512 __DEFAULT_FN_ATTRS
2270_mm512_mask3_fnmsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
2271{
2272  return (__m512) __builtin_ia32_vfnmsubps512_mask3 ((__v16sf) __A,
2273                                                     (__v16sf) __B,
2274                                                     (__v16sf) __C,
2275                                                     (__mmask16) __U,
2276                                                     _MM_FROUND_CUR_DIRECTION);
2277}
2278
2279
2280
2281/* Vector permutations */
2282
2283static __inline __m512i __DEFAULT_FN_ATTRS
2284_mm512_permutex2var_epi32(__m512i __A, __m512i __I, __m512i __B)
2285{
2286  return (__m512i) __builtin_ia32_vpermt2vard512_mask ((__v16si) __I
2287                                                       /* idx */ ,
2288                                                       (__v16si) __A,
2289                                                       (__v16si) __B,
2290                                                       (__mmask16) -1);
2291}
2292static __inline __m512i __DEFAULT_FN_ATTRS
2293_mm512_permutex2var_epi64(__m512i __A, __m512i __I, __m512i __B)
2294{
2295  return (__m512i) __builtin_ia32_vpermt2varq512_mask ((__v8di) __I
2296                                                       /* idx */ ,
2297                                                       (__v8di) __A,
2298                                                       (__v8di) __B,
2299                                                       (__mmask8) -1);
2300}
2301
2302static __inline __m512d __DEFAULT_FN_ATTRS
2303_mm512_permutex2var_pd(__m512d __A, __m512i __I, __m512d __B)
2304{
2305  return (__m512d) __builtin_ia32_vpermt2varpd512_mask ((__v8di) __I
2306                                                        /* idx */ ,
2307                                                        (__v8df) __A,
2308                                                        (__v8df) __B,
2309                                                        (__mmask8) -1);
2310}
2311static __inline __m512 __DEFAULT_FN_ATTRS
2312_mm512_permutex2var_ps(__m512 __A, __m512i __I, __m512 __B)
2313{
2314  return (__m512) __builtin_ia32_vpermt2varps512_mask ((__v16si) __I
2315                                                       /* idx */ ,
2316                                                       (__v16sf) __A,
2317                                                       (__v16sf) __B,
2318                                                       (__mmask16) -1);
2319}
2320
2321#define _mm512_alignr_epi64(A, B, I) __extension__ ({ \
2322  (__m512i)__builtin_ia32_alignq512_mask((__v8di)(__m512i)(A), \
2323                                         (__v8di)(__m512i)(B), \
2324                                         (I), (__v8di)_mm512_setzero_si512(), \
2325                                         (__mmask8)-1); })
2326
2327#define _mm512_alignr_epi32(A, B, I) __extension__ ({ \
2328  (__m512i)__builtin_ia32_alignd512_mask((__v16si)(__m512i)(A), \
2329                                         (__v16si)(__m512i)(B), \
2330                                         (I), (__v16si)_mm512_setzero_si512(), \
2331                                         (__mmask16)-1); })
2332
2333/* Vector Extract */
2334
2335#define _mm512_extractf64x4_pd(A, I) __extension__ ({                    \
2336      (__m256d)                                                          \
2337        __builtin_ia32_extractf64x4_mask((__v8df)(__m512d)(A),           \
2338                                         (I),                            \
2339                                         (__v4df)_mm256_setzero_si256(), \
2340                                         (__mmask8) -1); })
2341
2342#define _mm512_extractf32x4_ps(A, I) __extension__ ({                    \
2343      (__m128)                                                           \
2344        __builtin_ia32_extractf32x4_mask((__v16sf)(__m512)(A),           \
2345                                         (I),                            \
2346                                         (__v4sf)_mm_setzero_ps(),       \
2347                                         (__mmask8) -1); })
2348
2349/* Vector Blend */
2350
2351static __inline __m512d __DEFAULT_FN_ATTRS
2352_mm512_mask_blend_pd(__mmask8 __U, __m512d __A, __m512d __W)
2353{
2354  return (__m512d) __builtin_ia32_blendmpd_512_mask ((__v8df) __A,
2355                 (__v8df) __W,
2356                 (__mmask8) __U);
2357}
2358
2359static __inline __m512 __DEFAULT_FN_ATTRS
2360_mm512_mask_blend_ps(__mmask16 __U, __m512 __A, __m512 __W)
2361{
2362  return (__m512) __builtin_ia32_blendmps_512_mask ((__v16sf) __A,
2363                (__v16sf) __W,
2364                (__mmask16) __U);
2365}
2366
2367static __inline __m512i __DEFAULT_FN_ATTRS
2368_mm512_mask_blend_epi64(__mmask8 __U, __m512i __A, __m512i __W)
2369{
2370  return (__m512i) __builtin_ia32_blendmq_512_mask ((__v8di) __A,
2371                (__v8di) __W,
2372                (__mmask8) __U);
2373}
2374
2375static __inline __m512i __DEFAULT_FN_ATTRS
2376_mm512_mask_blend_epi32(__mmask16 __U, __m512i __A, __m512i __W)
2377{
2378  return (__m512i) __builtin_ia32_blendmd_512_mask ((__v16si) __A,
2379                (__v16si) __W,
2380                (__mmask16) __U);
2381}
2382
2383/* Compare */
2384
2385#define _mm512_cmp_round_ps_mask(A, B, P, R) __extension__ ({ \
2386  (__mmask16)__builtin_ia32_cmpps512_mask((__v16sf)(__m512)(A), \
2387                                          (__v16sf)(__m512)(B), \
2388                                          (P), (__mmask16)-1, (R)); })
2389
2390#define _mm512_mask_cmp_round_ps_mask(U, A, B, P, R) __extension__ ({ \
2391  (__mmask16)__builtin_ia32_cmpps512_mask((__v16sf)(__m512)(A), \
2392                                          (__v16sf)(__m512)(B), \
2393                                          (P), (__mmask16)(U), (R)); })
2394
2395#define _mm512_cmp_ps_mask(A, B, P) \
2396  _mm512_cmp_round_ps_mask((A), (B), (P), _MM_FROUND_CUR_DIRECTION)
2397
2398#define _mm512_mask_cmp_ps_mask(U, A, B, P) \
2399  _mm512_mask_cmp_round_ps_mask((U), (A), (B), (P), _MM_FROUND_CUR_DIRECTION)
2400
2401#define _mm512_cmp_round_pd_mask(A, B, P, R) __extension__ ({ \
2402  (__mmask8)__builtin_ia32_cmppd512_mask((__v8df)(__m512d)(A), \
2403                                         (__v8df)(__m512d)(B), \
2404                                         (P), (__mmask8)-1, (R)); })
2405
2406#define _mm512_mask_cmp_round_pd_mask(U, A, B, P, R) __extension__ ({ \
2407  (__mmask8)__builtin_ia32_cmppd512_mask((__v8df)(__m512d)(A), \
2408                                         (__v8df)(__m512d)(B), \
2409                                         (P), (__mmask8)(U), (R)); })
2410
2411#define _mm512_cmp_pd_mask(A, B, P) \
2412  _mm512_cmp_round_pd_mask((A), (B), (P), _MM_FROUND_CUR_DIRECTION)
2413
2414#define _mm512_mask_cmp_pd_mask(U, A, B, P) \
2415  _mm512_mask_cmp_round_pd_mask((U), (A), (B), (P), _MM_FROUND_CUR_DIRECTION)
2416
2417/* Conversion */
2418
2419static __inline __m512i __DEFAULT_FN_ATTRS
2420_mm512_cvttps_epu32(__m512 __A)
2421{
2422  return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
2423                  (__v16si)
2424                  _mm512_setzero_si512 (),
2425                  (__mmask16) -1,
2426                  _MM_FROUND_CUR_DIRECTION);
2427}
2428
2429#define _mm512_cvt_roundepi32_ps(A, R) __extension__ ({ \
2430  (__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(A), \
2431                                          (__v16sf)_mm512_setzero_ps(), \
2432                                          (__mmask16)-1, (R)); })
2433
2434#define _mm512_cvt_roundepu32_ps(A, R) __extension__ ({ \
2435  (__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(A), \
2436                                           (__v16sf)_mm512_setzero_ps(), \
2437                                           (__mmask16)-1, (R)); })
2438
2439static __inline __m512d __DEFAULT_FN_ATTRS
2440_mm512_cvtepi32_pd(__m256i __A)
2441{
2442  return (__m512d) __builtin_ia32_cvtdq2pd512_mask ((__v8si) __A,
2443                (__v8df)
2444                _mm512_setzero_pd (),
2445                (__mmask8) -1);
2446}
2447
2448static __inline __m512d __DEFAULT_FN_ATTRS
2449_mm512_cvtepu32_pd(__m256i __A)
2450{
2451  return (__m512d) __builtin_ia32_cvtudq2pd512_mask ((__v8si) __A,
2452                (__v8df)
2453                _mm512_setzero_pd (),
2454                (__mmask8) -1);
2455}
2456
2457#define _mm512_cvt_roundpd_ps(A, R) __extension__ ({ \
2458  (__m256)__builtin_ia32_cvtpd2ps512_mask((__v8df)(A), \
2459                                          (__v8sf)_mm256_setzero_ps(), \
2460                                          (__mmask8)-1, (R)); })
2461
2462#define _mm512_cvtps_ph(A, I) __extension__ ({ \
2463  (__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(A), (I), \
2464                                            (__v16hi)_mm256_setzero_si256(), \
2465                                            -1); })
2466
2467static  __inline __m512 __DEFAULT_FN_ATTRS
2468_mm512_cvtph_ps(__m256i __A)
2469{
2470  return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
2471                (__v16sf)
2472                _mm512_setzero_ps (),
2473                (__mmask16) -1,
2474                _MM_FROUND_CUR_DIRECTION);
2475}
2476
2477static __inline __m512i __DEFAULT_FN_ATTRS
2478_mm512_cvttps_epi32(__m512 __a)
2479{
2480  return (__m512i)
2481    __builtin_ia32_cvttps2dq512_mask((__v16sf) __a,
2482                                     (__v16si) _mm512_setzero_si512 (),
2483                                     (__mmask16) -1, _MM_FROUND_CUR_DIRECTION);
2484}
2485
2486static __inline __m256i __DEFAULT_FN_ATTRS
2487_mm512_cvttpd_epi32(__m512d __a)
2488{
2489  return (__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df) __a,
2490                                                   (__v8si)_mm256_setzero_si256(),
2491                                                   (__mmask8) -1,
2492                                                    _MM_FROUND_CUR_DIRECTION);
2493}
2494
2495#define _mm512_cvtt_roundpd_epi32(A, R) __extension__ ({ \
2496  (__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df)(A), \
2497                                            (__v8si)_mm256_setzero_si256(), \
2498                                            (__mmask8)-1, (R)); })
2499
2500#define _mm512_cvtt_roundps_epi32(A, R) __extension__ ({ \
2501  (__m512i)__builtin_ia32_cvttps2dq512_mask((__v16sf)(A), \
2502                                            (__v16si)_mm512_setzero_si512(), \
2503                                            (__mmask16)-1, (R)); })
2504
2505#define _mm512_cvt_roundps_epi32(A, R) __extension__ ({ \
2506  (__m512i)__builtin_ia32_cvtps2dq512_mask((__v16sf)(A), \
2507                                           (__v16si)_mm512_setzero_si512(), \
2508                                           (__mmask16)-1, (R)); })
2509
2510#define _mm512_cvt_roundpd_epi32(A, R) __extension__ ({ \
2511  (__m256i)__builtin_ia32_cvtpd2dq512_mask((__v8df)(A), \
2512                                           (__v8si)_mm256_setzero_si256(), \
2513                                           (__mmask8)-1, (R)); })
2514
2515#define _mm512_cvt_roundps_epu32(A, R) __extension__ ({ \
2516  (__m512i)__builtin_ia32_cvtps2udq512_mask((__v16sf)(A), \
2517                                            (__v16si)_mm512_setzero_si512(), \
2518                                            (__mmask16)-1, (R)); })
2519
2520#define _mm512_cvt_roundpd_epu32(A, R) __extension__ ({ \
2521  (__m256i)__builtin_ia32_cvtpd2udq512_mask((__v8df)(A), \
2522                                            (__v8si)_mm256_setzero_si256(), \
2523                                            (__mmask8) -1, (R)); })
2524
2525/* Unpack and Interleave */
2526static __inline __m512d __DEFAULT_FN_ATTRS
2527_mm512_unpackhi_pd(__m512d __a, __m512d __b)
2528{
2529  return __builtin_shufflevector(__a, __b, 1, 9, 1+2, 9+2, 1+4, 9+4, 1+6, 9+6);
2530}
2531
2532static __inline __m512d __DEFAULT_FN_ATTRS
2533_mm512_unpacklo_pd(__m512d __a, __m512d __b)
2534{
2535  return __builtin_shufflevector(__a, __b, 0, 8, 0+2, 8+2, 0+4, 8+4, 0+6, 8+6);
2536}
2537
2538static __inline __m512 __DEFAULT_FN_ATTRS
2539_mm512_unpackhi_ps(__m512 __a, __m512 __b)
2540{
2541  return __builtin_shufflevector(__a, __b,
2542                                 2,    18,    3,    19,
2543                                 2+4,  18+4,  3+4,  19+4,
2544                                 2+8,  18+8,  3+8,  19+8,
2545                                 2+12, 18+12, 3+12, 19+12);
2546}
2547
2548static __inline __m512 __DEFAULT_FN_ATTRS
2549_mm512_unpacklo_ps(__m512 __a, __m512 __b)
2550{
2551  return __builtin_shufflevector(__a, __b,
2552                                 0,    16,    1,    17,
2553                                 0+4,  16+4,  1+4,  17+4,
2554                                 0+8,  16+8,  1+8,  17+8,
2555                                 0+12, 16+12, 1+12, 17+12);
2556}
2557
2558/* Bit Test */
2559
2560static __inline __mmask16 __DEFAULT_FN_ATTRS
2561_mm512_test_epi32_mask(__m512i __A, __m512i __B)
2562{
2563  return (__mmask16) __builtin_ia32_ptestmd512 ((__v16si) __A,
2564            (__v16si) __B,
2565            (__mmask16) -1);
2566}
2567
2568static __inline __mmask8 __DEFAULT_FN_ATTRS
2569_mm512_test_epi64_mask(__m512i __A, __m512i __B)
2570{
2571  return (__mmask8) __builtin_ia32_ptestmq512 ((__v8di) __A,
2572                 (__v8di) __B,
2573                 (__mmask8) -1);
2574}
2575
2576/* SIMD load ops */
2577
2578static __inline __m512i __DEFAULT_FN_ATTRS
2579_mm512_maskz_loadu_epi32(__mmask16 __U, void const *__P)
2580{
2581  return (__m512i) __builtin_ia32_loaddqusi512_mask ((const __v16si *)__P,
2582                                                     (__v16si)
2583                                                     _mm512_setzero_si512 (),
2584                                                     (__mmask16) __U);
2585}
2586
2587static __inline __m512i __DEFAULT_FN_ATTRS
2588_mm512_maskz_loadu_epi64(__mmask8 __U, void const *__P)
2589{
2590  return (__m512i) __builtin_ia32_loaddqudi512_mask ((const __v8di *)__P,
2591                                                     (__v8di)
2592                                                     _mm512_setzero_si512 (),
2593                                                     (__mmask8) __U);
2594}
2595
2596static __inline __m512 __DEFAULT_FN_ATTRS
2597_mm512_maskz_loadu_ps(__mmask16 __U, void const *__P)
2598{
2599  return (__m512) __builtin_ia32_loadups512_mask ((const __v16sf *)__P,
2600                                                  (__v16sf)
2601                                                  _mm512_setzero_ps (),
2602                                                  (__mmask16) __U);
2603}
2604
2605static __inline __m512d __DEFAULT_FN_ATTRS
2606_mm512_maskz_loadu_pd(__mmask8 __U, void const *__P)
2607{
2608  return (__m512d) __builtin_ia32_loadupd512_mask ((const __v8df *)__P,
2609                                                   (__v8df)
2610                                                   _mm512_setzero_pd (),
2611                                                   (__mmask8) __U);
2612}
2613
2614static __inline __m512 __DEFAULT_FN_ATTRS
2615_mm512_maskz_load_ps(__mmask16 __U, void const *__P)
2616{
2617  return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *)__P,
2618                                                  (__v16sf)
2619                                                  _mm512_setzero_ps (),
2620                                                  (__mmask16) __U);
2621}
2622
2623static __inline __m512d __DEFAULT_FN_ATTRS
2624_mm512_maskz_load_pd(__mmask8 __U, void const *__P)
2625{
2626  return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df *)__P,
2627                                                   (__v8df)
2628                                                   _mm512_setzero_pd (),
2629                                                   (__mmask8) __U);
2630}
2631
2632static __inline __m512d __DEFAULT_FN_ATTRS
2633_mm512_loadu_pd(double const *__p)
2634{
2635  struct __loadu_pd {
2636    __m512d __v;
2637  } __attribute__((__packed__, __may_alias__));
2638  return ((struct __loadu_pd*)__p)->__v;
2639}
2640
2641static __inline __m512 __DEFAULT_FN_ATTRS
2642_mm512_loadu_ps(float const *__p)
2643{
2644  struct __loadu_ps {
2645    __m512 __v;
2646  } __attribute__((__packed__, __may_alias__));
2647  return ((struct __loadu_ps*)__p)->__v;
2648}
2649
2650static __inline __m512 __DEFAULT_FN_ATTRS
2651_mm512_load_ps(double const *__p)
2652{
2653  return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *)__p,
2654                                                  (__v16sf)
2655                                                  _mm512_setzero_ps (),
2656                                                  (__mmask16) -1);
2657}
2658
2659static __inline __m512d __DEFAULT_FN_ATTRS
2660_mm512_load_pd(float const *__p)
2661{
2662  return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df *)__p,
2663                                                   (__v8df)
2664                                                   _mm512_setzero_pd (),
2665                                                   (__mmask8) -1);
2666}
2667
2668/* SIMD store ops */
2669
2670static __inline void __DEFAULT_FN_ATTRS
2671_mm512_mask_storeu_epi64(void *__P, __mmask8 __U, __m512i __A)
2672{
2673  __builtin_ia32_storedqudi512_mask ((__v8di *)__P, (__v8di) __A,
2674                                     (__mmask8) __U);
2675}
2676
2677static __inline void __DEFAULT_FN_ATTRS
2678_mm512_mask_storeu_epi32(void *__P, __mmask16 __U, __m512i __A)
2679{
2680  __builtin_ia32_storedqusi512_mask ((__v16si *)__P, (__v16si) __A,
2681                                     (__mmask16) __U);
2682}
2683
2684static __inline void __DEFAULT_FN_ATTRS
2685_mm512_mask_storeu_pd(void *__P, __mmask8 __U, __m512d __A)
2686{
2687  __builtin_ia32_storeupd512_mask ((__v8df *)__P, (__v8df) __A, (__mmask8) __U);
2688}
2689
2690static __inline void __DEFAULT_FN_ATTRS
2691_mm512_storeu_pd(void *__P, __m512d __A)
2692{
2693  __builtin_ia32_storeupd512_mask((__v8df *)__P, (__v8df)__A, (__mmask8)-1);
2694}
2695
2696static __inline void __DEFAULT_FN_ATTRS
2697_mm512_mask_storeu_ps(void *__P, __mmask16 __U, __m512 __A)
2698{
2699  __builtin_ia32_storeups512_mask ((__v16sf *)__P, (__v16sf) __A,
2700                                   (__mmask16) __U);
2701}
2702
2703static __inline void __DEFAULT_FN_ATTRS
2704_mm512_storeu_ps(void *__P, __m512 __A)
2705{
2706  __builtin_ia32_storeups512_mask((__v16sf *)__P, (__v16sf)__A, (__mmask16)-1);
2707}
2708
2709static __inline void __DEFAULT_FN_ATTRS
2710_mm512_mask_store_pd(void *__P, __mmask8 __U, __m512d __A)
2711{
2712  __builtin_ia32_storeapd512_mask ((__v8df *)__P, (__v8df) __A, (__mmask8) __U);
2713}
2714
2715static __inline void __DEFAULT_FN_ATTRS
2716_mm512_store_pd(void *__P, __m512d __A)
2717{
2718  *(__m512d*)__P = __A;
2719}
2720
2721static __inline void __DEFAULT_FN_ATTRS
2722_mm512_mask_store_ps(void *__P, __mmask16 __U, __m512 __A)
2723{
2724  __builtin_ia32_storeaps512_mask ((__v16sf *)__P, (__v16sf) __A,
2725                                   (__mmask16) __U);
2726}
2727
2728static __inline void __DEFAULT_FN_ATTRS
2729_mm512_store_ps(void *__P, __m512 __A)
2730{
2731  *(__m512*)__P = __A;
2732}
2733
2734/* Mask ops */
2735
2736static __inline __mmask16 __DEFAULT_FN_ATTRS
2737_mm512_knot(__mmask16 __M)
2738{
2739  return __builtin_ia32_knothi(__M);
2740}
2741
2742/* Integer compare */
2743
2744static __inline__ __mmask16 __DEFAULT_FN_ATTRS
2745_mm512_cmpeq_epi32_mask(__m512i __a, __m512i __b) {
2746  return (__mmask16)__builtin_ia32_pcmpeqd512_mask((__v16si)__a, (__v16si)__b,
2747                                                   (__mmask16)-1);
2748}
2749
2750static __inline__ __mmask16 __DEFAULT_FN_ATTRS
2751_mm512_mask_cmpeq_epi32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
2752  return (__mmask16)__builtin_ia32_pcmpeqd512_mask((__v16si)__a, (__v16si)__b,
2753                                                   __u);
2754}
2755
2756static __inline__ __mmask16 __DEFAULT_FN_ATTRS
2757_mm512_cmpeq_epu32_mask(__m512i __a, __m512i __b) {
2758  return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 0,
2759                                                 (__mmask16)-1);
2760}
2761
2762static __inline__ __mmask16 __DEFAULT_FN_ATTRS
2763_mm512_mask_cmpeq_epu32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
2764  return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 0,
2765                                                 __u);
2766}
2767
2768static __inline__ __mmask8 __DEFAULT_FN_ATTRS
2769_mm512_mask_cmpeq_epi64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
2770  return (__mmask8)__builtin_ia32_pcmpeqq512_mask((__v8di)__a, (__v8di)__b,
2771                                                  __u);
2772}
2773
2774static __inline__ __mmask8 __DEFAULT_FN_ATTRS
2775_mm512_cmpeq_epi64_mask(__m512i __a, __m512i __b) {
2776  return (__mmask8)__builtin_ia32_pcmpeqq512_mask((__v8di)__a, (__v8di)__b,
2777                                                  (__mmask8)-1);
2778}
2779
2780static __inline__ __mmask8 __DEFAULT_FN_ATTRS
2781_mm512_cmpeq_epu64_mask(__m512i __a, __m512i __b) {
2782  return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 0,
2783                                                (__mmask8)-1);
2784}
2785
2786static __inline__ __mmask8 __DEFAULT_FN_ATTRS
2787_mm512_mask_cmpeq_epu64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
2788  return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 0,
2789                                                __u);
2790}
2791
2792static __inline__ __mmask16 __DEFAULT_FN_ATTRS
2793_mm512_cmpge_epi32_mask(__m512i __a, __m512i __b) {
2794  return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 5,
2795                                                (__mmask16)-1);
2796}
2797
2798static __inline__ __mmask16 __DEFAULT_FN_ATTRS
2799_mm512_mask_cmpge_epi32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
2800  return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 5,
2801                                                __u);
2802}
2803
2804static __inline__ __mmask16 __DEFAULT_FN_ATTRS
2805_mm512_cmpge_epu32_mask(__m512i __a, __m512i __b) {
2806  return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 5,
2807                                                 (__mmask16)-1);
2808}
2809
2810static __inline__ __mmask16 __DEFAULT_FN_ATTRS
2811_mm512_mask_cmpge_epu32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
2812  return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 5,
2813                                                 __u);
2814}
2815
2816static __inline__ __mmask8 __DEFAULT_FN_ATTRS
2817_mm512_cmpge_epi64_mask(__m512i __a, __m512i __b) {
2818  return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 5,
2819                                               (__mmask8)-1);
2820}
2821
2822static __inline__ __mmask8 __DEFAULT_FN_ATTRS
2823_mm512_mask_cmpge_epi64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
2824  return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 5,
2825                                               __u);
2826}
2827
2828static __inline__ __mmask8 __DEFAULT_FN_ATTRS
2829_mm512_cmpge_epu64_mask(__m512i __a, __m512i __b) {
2830  return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 5,
2831                                                (__mmask8)-1);
2832}
2833
2834static __inline__ __mmask8 __DEFAULT_FN_ATTRS
2835_mm512_mask_cmpge_epu64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
2836  return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 5,
2837                                                __u);
2838}
2839
2840static __inline__ __mmask16 __DEFAULT_FN_ATTRS
2841_mm512_cmpgt_epi32_mask(__m512i __a, __m512i __b) {
2842  return (__mmask16)__builtin_ia32_pcmpgtd512_mask((__v16si)__a, (__v16si)__b,
2843                                                   (__mmask16)-1);
2844}
2845
2846static __inline__ __mmask16 __DEFAULT_FN_ATTRS
2847_mm512_mask_cmpgt_epi32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
2848  return (__mmask16)__builtin_ia32_pcmpgtd512_mask((__v16si)__a, (__v16si)__b,
2849                                                   __u);
2850}
2851
2852static __inline__ __mmask16 __DEFAULT_FN_ATTRS
2853_mm512_cmpgt_epu32_mask(__m512i __a, __m512i __b) {
2854  return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 6,
2855                                                 (__mmask16)-1);
2856}
2857
2858static __inline__ __mmask16 __DEFAULT_FN_ATTRS
2859_mm512_mask_cmpgt_epu32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
2860  return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 6,
2861                                                 __u);
2862}
2863
2864static __inline__ __mmask8 __DEFAULT_FN_ATTRS
2865_mm512_mask_cmpgt_epi64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
2866  return (__mmask8)__builtin_ia32_pcmpgtq512_mask((__v8di)__a, (__v8di)__b,
2867                                                  __u);
2868}
2869
2870static __inline__ __mmask8 __DEFAULT_FN_ATTRS
2871_mm512_cmpgt_epi64_mask(__m512i __a, __m512i __b) {
2872  return (__mmask8)__builtin_ia32_pcmpgtq512_mask((__v8di)__a, (__v8di)__b,
2873                                                  (__mmask8)-1);
2874}
2875
2876static __inline__ __mmask8 __DEFAULT_FN_ATTRS
2877_mm512_cmpgt_epu64_mask(__m512i __a, __m512i __b) {
2878  return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 6,
2879                                                (__mmask8)-1);
2880}
2881
2882static __inline__ __mmask8 __DEFAULT_FN_ATTRS
2883_mm512_mask_cmpgt_epu64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
2884  return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 6,
2885                                                __u);
2886}
2887
2888static __inline__ __mmask16 __DEFAULT_FN_ATTRS
2889_mm512_cmple_epi32_mask(__m512i __a, __m512i __b) {
2890  return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 2,
2891                                                (__mmask16)-1);
2892}
2893
2894static __inline__ __mmask16 __DEFAULT_FN_ATTRS
2895_mm512_mask_cmple_epi32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
2896  return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 2,
2897                                                __u);
2898}
2899
2900static __inline__ __mmask16 __DEFAULT_FN_ATTRS
2901_mm512_cmple_epu32_mask(__m512i __a, __m512i __b) {
2902  return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 2,
2903                                                 (__mmask16)-1);
2904}
2905
2906static __inline__ __mmask16 __DEFAULT_FN_ATTRS
2907_mm512_mask_cmple_epu32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
2908  return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 2,
2909                                                 __u);
2910}
2911
2912static __inline__ __mmask8 __DEFAULT_FN_ATTRS
2913_mm512_cmple_epi64_mask(__m512i __a, __m512i __b) {
2914  return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 2,
2915                                               (__mmask8)-1);
2916}
2917
2918static __inline__ __mmask8 __DEFAULT_FN_ATTRS
2919_mm512_mask_cmple_epi64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
2920  return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 2,
2921                                               __u);
2922}
2923
2924static __inline__ __mmask8 __DEFAULT_FN_ATTRS
2925_mm512_cmple_epu64_mask(__m512i __a, __m512i __b) {
2926  return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 2,
2927                                                (__mmask8)-1);
2928}
2929
2930static __inline__ __mmask8 __DEFAULT_FN_ATTRS
2931_mm512_mask_cmple_epu64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
2932  return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 2,
2933                                                __u);
2934}
2935
2936static __inline__ __mmask16 __DEFAULT_FN_ATTRS
2937_mm512_cmplt_epi32_mask(__m512i __a, __m512i __b) {
2938  return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 1,
2939                                                (__mmask16)-1);
2940}
2941
2942static __inline__ __mmask16 __DEFAULT_FN_ATTRS
2943_mm512_mask_cmplt_epi32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
2944  return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 1,
2945                                                __u);
2946}
2947
2948static __inline__ __mmask16 __DEFAULT_FN_ATTRS
2949_mm512_cmplt_epu32_mask(__m512i __a, __m512i __b) {
2950  return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 1,
2951                                                 (__mmask16)-1);
2952}
2953
2954static __inline__ __mmask16 __DEFAULT_FN_ATTRS
2955_mm512_mask_cmplt_epu32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
2956  return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 1,
2957                                                 __u);
2958}
2959
2960static __inline__ __mmask8 __DEFAULT_FN_ATTRS
2961_mm512_cmplt_epi64_mask(__m512i __a, __m512i __b) {
2962  return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 1,
2963                                               (__mmask8)-1);
2964}
2965
2966static __inline__ __mmask8 __DEFAULT_FN_ATTRS
2967_mm512_mask_cmplt_epi64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
2968  return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 1,
2969                                               __u);
2970}
2971
2972static __inline__ __mmask8 __DEFAULT_FN_ATTRS
2973_mm512_cmplt_epu64_mask(__m512i __a, __m512i __b) {
2974  return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 1,
2975                                                (__mmask8)-1);
2976}
2977
2978static __inline__ __mmask8 __DEFAULT_FN_ATTRS
2979_mm512_mask_cmplt_epu64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
2980  return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 1,
2981                                                __u);
2982}
2983
2984static __inline__ __mmask16 __DEFAULT_FN_ATTRS
2985_mm512_cmpneq_epi32_mask(__m512i __a, __m512i __b) {
2986  return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 4,
2987                                                (__mmask16)-1);
2988}
2989
2990static __inline__ __mmask16 __DEFAULT_FN_ATTRS
2991_mm512_mask_cmpneq_epi32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
2992  return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 4,
2993                                                __u);
2994}
2995
2996static __inline__ __mmask16 __DEFAULT_FN_ATTRS
2997_mm512_cmpneq_epu32_mask(__m512i __a, __m512i __b) {
2998  return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 4,
2999                                                 (__mmask16)-1);
3000}
3001
3002static __inline__ __mmask16 __DEFAULT_FN_ATTRS
3003_mm512_mask_cmpneq_epu32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
3004  return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 4,
3005                                                 __u);
3006}
3007
3008static __inline__ __mmask8 __DEFAULT_FN_ATTRS
3009_mm512_cmpneq_epi64_mask(__m512i __a, __m512i __b) {
3010  return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 4,
3011                                               (__mmask8)-1);
3012}
3013
3014static __inline__ __mmask8 __DEFAULT_FN_ATTRS
3015_mm512_mask_cmpneq_epi64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
3016  return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 4,
3017                                               __u);
3018}
3019
3020static __inline__ __mmask8 __DEFAULT_FN_ATTRS
3021_mm512_cmpneq_epu64_mask(__m512i __a, __m512i __b) {
3022  return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 4,
3023                                                (__mmask8)-1);
3024}
3025
3026static __inline__ __mmask8 __DEFAULT_FN_ATTRS
3027_mm512_mask_cmpneq_epu64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
3028  return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 4,
3029                                                __u);
3030}
3031
3032#define _mm512_cmp_epi32_mask(a, b, p) __extension__ ({ \
3033  (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)(__m512i)(a), \
3034                                         (__v16si)(__m512i)(b), (p), \
3035                                         (__mmask16)-1); })
3036
3037#define _mm512_cmp_epu32_mask(a, b, p) __extension__ ({ \
3038  (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)(__m512i)(a), \
3039                                          (__v16si)(__m512i)(b), (p), \
3040                                          (__mmask16)-1); })
3041
3042#define _mm512_cmp_epi64_mask(a, b, p) __extension__ ({ \
3043  (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)(__m512i)(a), \
3044                                        (__v8di)(__m512i)(b), (p), \
3045                                        (__mmask8)-1); })
3046
3047#define _mm512_cmp_epu64_mask(a, b, p) __extension__ ({ \
3048  (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)(__m512i)(a), \
3049                                         (__v8di)(__m512i)(b), (p), \
3050                                         (__mmask8)-1); })
3051
3052#define _mm512_mask_cmp_epi32_mask(m, a, b, p) __extension__ ({ \
3053  (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)(__m512i)(a), \
3054                                         (__v16si)(__m512i)(b), (p), \
3055                                         (__mmask16)(m)); })
3056
3057#define _mm512_mask_cmp_epu32_mask(m, a, b, p) __extension__ ({ \
3058  (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)(__m512i)(a), \
3059                                          (__v16si)(__m512i)(b), (p), \
3060                                          (__mmask16)(m)); })
3061
3062#define _mm512_mask_cmp_epi64_mask(m, a, b, p) __extension__ ({ \
3063  (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)(__m512i)(a), \
3064                                        (__v8di)(__m512i)(b), (p), \
3065                                        (__mmask8)(m)); })
3066
3067#define _mm512_mask_cmp_epu64_mask(m, a, b, p) __extension__ ({ \
3068  (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)(__m512i)(a), \
3069                                         (__v8di)(__m512i)(b), (p), \
3070                                         (__mmask8)(m)); })
3071
3072#undef __DEFAULT_FN_ATTRS
3073
3074#endif // __AVX512FINTRIN_H
3075