1/* Copyright (C) 2013-2015 Free Software Foundation, Inc.
2
3   This file is part of GCC.
4
5   GCC is free software; you can redistribute it and/or modify
6   it under the terms of the GNU General Public License as published by
7   the Free Software Foundation; either version 3, or (at your option)
8   any later version.
9
10   GCC is distributed in the hope that it will be useful,
11   but WITHOUT ANY WARRANTY; without even the implied warranty of
12   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13   GNU General Public License for more details.
14
15   Under Section 7 of GPL version 3, you are granted additional
16   permissions described in the GCC Runtime Library Exception, version
17   3.1, as published by the Free Software Foundation.
18
19   You should have received a copy of the GNU General Public License and
20   a copy of the GCC Runtime Library Exception along with this program;
21   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
22   <http://www.gnu.org/licenses/>.  */
23
24#ifndef _IMMINTRIN_H_INCLUDED
25#error "Never use <avx512fintrin.h> directly; include <immintrin.h> instead."
26#endif
27
28#ifndef _AVX512FINTRIN_H_INCLUDED
29#define _AVX512FINTRIN_H_INCLUDED
30
31#ifndef __AVX512F__
32#pragma GCC push_options
33#pragma GCC target("avx512f")
34#define __DISABLE_AVX512F__
35#endif /* __AVX512F__ */
36
37/* Internal data types for implementing the intrinsics.  */
38typedef double __v8df __attribute__ ((__vector_size__ (64)));
39typedef float __v16sf __attribute__ ((__vector_size__ (64)));
40typedef long long __v8di __attribute__ ((__vector_size__ (64)));
41typedef unsigned long long __v8du __attribute__ ((__vector_size__ (64)));
42typedef int __v16si __attribute__ ((__vector_size__ (64)));
43typedef unsigned int __v16su __attribute__ ((__vector_size__ (64)));
44typedef short __v32hi __attribute__ ((__vector_size__ (64)));
45typedef unsigned short __v32hu __attribute__ ((__vector_size__ (64)));
46typedef char __v64qi __attribute__ ((__vector_size__ (64)));
47typedef unsigned char __v64qu __attribute__ ((__vector_size__ (64)));
48
49/* The Intel API is flexible enough that we must allow aliasing with other
50   vector types, and their scalar components.  */
51typedef float __m512 __attribute__ ((__vector_size__ (64), __may_alias__));
52typedef long long __m512i __attribute__ ((__vector_size__ (64), __may_alias__));
53typedef double __m512d __attribute__ ((__vector_size__ (64), __may_alias__));
54
55typedef unsigned char  __mmask8;
56typedef unsigned short __mmask16;
57
58extern __inline __m512i
59__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
60_mm512_set_epi64 (long long __A, long long __B, long long __C,
61		  long long __D, long long __E, long long __F,
62		  long long __G, long long __H)
63{
64  return __extension__ (__m512i) (__v8di)
65	 { __H, __G, __F, __E, __D, __C, __B, __A };
66}
67
68/* Create the vector [A B C D E F G H I J K L M N O P].  */
69extern __inline __m512i
70__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
71_mm512_set_epi32 (int __A, int __B, int __C, int __D,
72		  int __E, int __F, int __G, int __H,
73		  int __I, int __J, int __K, int __L,
74		  int __M, int __N, int __O, int __P)
75{
76  return __extension__ (__m512i)(__v16si)
77	 { __P, __O, __N, __M, __L, __K, __J, __I,
78	   __H, __G, __F, __E, __D, __C, __B, __A };
79}
80
81extern __inline __m512d
82__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
83_mm512_set_pd (double __A, double __B, double __C, double __D,
84	       double __E, double __F, double __G, double __H)
85{
86  return __extension__ (__m512d)
87	 { __H, __G, __F, __E, __D, __C, __B, __A };
88}
89
90extern __inline __m512
91__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
92_mm512_set_ps (float __A, float __B, float __C, float __D,
93	       float __E, float __F, float __G, float __H,
94	       float __I, float __J, float __K, float __L,
95	       float __M, float __N, float __O, float __P)
96{
97  return __extension__ (__m512)
98	 { __P, __O, __N, __M, __L, __K, __J, __I,
99	   __H, __G, __F, __E, __D, __C, __B, __A };
100}
101
102#define _mm512_setr_epi64(e0,e1,e2,e3,e4,e5,e6,e7)			      \
103  _mm512_set_epi64(e7,e6,e5,e4,e3,e2,e1,e0)
104
105#define _mm512_setr_epi32(e0,e1,e2,e3,e4,e5,e6,e7,			      \
106			  e8,e9,e10,e11,e12,e13,e14,e15)		      \
107  _mm512_set_epi32(e15,e14,e13,e12,e11,e10,e9,e8,e7,e6,e5,e4,e3,e2,e1,e0)
108
109#define _mm512_setr_pd(e0,e1,e2,e3,e4,e5,e6,e7)				      \
110  _mm512_set_pd(e7,e6,e5,e4,e3,e2,e1,e0)
111
112#define _mm512_setr_ps(e0,e1,e2,e3,e4,e5,e6,e7,e8,e9,e10,e11,e12,e13,e14,e15) \
113  _mm512_set_ps(e15,e14,e13,e12,e11,e10,e9,e8,e7,e6,e5,e4,e3,e2,e1,e0)
114
115extern __inline __m512
116__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
117_mm512_undefined_ps (void)
118{
119  __m512 __Y = __Y;
120  return __Y;
121}
122
123extern __inline __m512d
124__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
125_mm512_undefined_pd (void)
126{
127  __m512d __Y = __Y;
128  return __Y;
129}
130
131extern __inline __m512i
132__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
133_mm512_undefined_si512 (void)
134{
135  __m512i __Y = __Y;
136  return __Y;
137}
138
139extern __inline __m512i
140__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
141_mm512_set1_epi8 (char __A)
142{
143  return __extension__ (__m512i)(__v64qi)
144	 { __A, __A, __A, __A, __A, __A, __A, __A,
145	   __A, __A, __A, __A, __A, __A, __A, __A,
146	   __A, __A, __A, __A, __A, __A, __A, __A,
147	   __A, __A, __A, __A, __A, __A, __A, __A,
148	   __A, __A, __A, __A, __A, __A, __A, __A,
149	   __A, __A, __A, __A, __A, __A, __A, __A,
150	   __A, __A, __A, __A, __A, __A, __A, __A,
151	   __A, __A, __A, __A, __A, __A, __A, __A };
152}
153
154extern __inline __m512i
155__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
156_mm512_set1_epi16 (short __A)
157{
158  return __extension__ (__m512i)(__v32hi)
159	 { __A, __A, __A, __A, __A, __A, __A, __A,
160	   __A, __A, __A, __A, __A, __A, __A, __A,
161	   __A, __A, __A, __A, __A, __A, __A, __A,
162	   __A, __A, __A, __A, __A, __A, __A, __A };
163}
164
165extern __inline __m512d
166__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
167_mm512_set1_pd (double __A)
168{
169  return (__m512d) __builtin_ia32_broadcastsd512 (__extension__
170						  (__v2df) { __A, },
171						  (__v8df)
172						  _mm512_undefined_pd (),
173						  (__mmask8) -1);
174}
175
176extern __inline __m512
177__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
178_mm512_set1_ps (float __A)
179{
180  return (__m512) __builtin_ia32_broadcastss512 (__extension__
181						 (__v4sf) { __A, },
182						 (__v16sf)
183						 _mm512_undefined_ps (),
184						 (__mmask16) -1);
185}
186
187/* Create the vector [A B C D A B C D A B C D A B C D].  */
188extern __inline __m512i
189__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
190_mm512_set4_epi32 (int __A, int __B, int __C, int __D)
191{
192  return __extension__ (__m512i)(__v16si)
193	 { __D, __C, __B, __A, __D, __C, __B, __A,
194	   __D, __C, __B, __A, __D, __C, __B, __A };
195}
196
197extern __inline __m512i
198__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
199_mm512_set4_epi64 (long long __A, long long __B, long long __C,
200		   long long __D)
201{
202  return __extension__ (__m512i) (__v8di)
203	 { __D, __C, __B, __A, __D, __C, __B, __A };
204}
205
206extern __inline __m512d
207__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
208_mm512_set4_pd (double __A, double __B, double __C, double __D)
209{
210  return __extension__ (__m512d)
211	 { __D, __C, __B, __A, __D, __C, __B, __A };
212}
213
214extern __inline __m512
215__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
216_mm512_set4_ps (float __A, float __B, float __C, float __D)
217{
218  return __extension__ (__m512)
219	 { __D, __C, __B, __A, __D, __C, __B, __A,
220	   __D, __C, __B, __A, __D, __C, __B, __A };
221}
222
223#define _mm512_setr4_epi64(e0,e1,e2,e3)					      \
224  _mm512_set4_epi64(e3,e2,e1,e0)
225
226#define _mm512_setr4_epi32(e0,e1,e2,e3)					      \
227  _mm512_set4_epi32(e3,e2,e1,e0)
228
229#define _mm512_setr4_pd(e0,e1,e2,e3)					      \
230  _mm512_set4_pd(e3,e2,e1,e0)
231
232#define _mm512_setr4_ps(e0,e1,e2,e3)					      \
233  _mm512_set4_ps(e3,e2,e1,e0)
234
235extern __inline __m512
236__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
237_mm512_setzero_ps (void)
238{
239  return __extension__ (__m512){ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
240				 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 };
241}
242
243extern __inline __m512d
244__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
245_mm512_setzero_pd (void)
246{
247  return __extension__ (__m512d) { 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 };
248}
249
250extern __inline __m512i
251__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
252_mm512_setzero_epi32 (void)
253{
254  return __extension__ (__m512i)(__v8di){ 0, 0, 0, 0, 0, 0, 0, 0 };
255}
256
257extern __inline __m512i
258__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
259_mm512_setzero_si512 (void)
260{
261  return __extension__ (__m512i)(__v8di){ 0, 0, 0, 0, 0, 0, 0, 0 };
262}
263
264extern __inline __m512d
265__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
266_mm512_mask_mov_pd (__m512d __W, __mmask8 __U, __m512d __A)
267{
268  return (__m512d) __builtin_ia32_movapd512_mask ((__v8df) __A,
269						  (__v8df) __W,
270						  (__mmask8) __U);
271}
272
273extern __inline __m512d
274__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
275_mm512_maskz_mov_pd (__mmask8 __U, __m512d __A)
276{
277  return (__m512d) __builtin_ia32_movapd512_mask ((__v8df) __A,
278						  (__v8df)
279						  _mm512_setzero_pd (),
280						  (__mmask8) __U);
281}
282
283extern __inline __m512
284__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
285_mm512_mask_mov_ps (__m512 __W, __mmask16 __U, __m512 __A)
286{
287  return (__m512) __builtin_ia32_movaps512_mask ((__v16sf) __A,
288						 (__v16sf) __W,
289						 (__mmask16) __U);
290}
291
292extern __inline __m512
293__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
294_mm512_maskz_mov_ps (__mmask16 __U, __m512 __A)
295{
296  return (__m512) __builtin_ia32_movaps512_mask ((__v16sf) __A,
297						 (__v16sf)
298						 _mm512_setzero_ps (),
299						 (__mmask16) __U);
300}
301
302extern __inline __m512d
303__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
304_mm512_load_pd (void const *__P)
305{
306  return *(__m512d *) __P;
307}
308
309extern __inline __m512d
310__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
311_mm512_mask_load_pd (__m512d __W, __mmask8 __U, void const *__P)
312{
313  return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df *) __P,
314						   (__v8df) __W,
315						   (__mmask8) __U);
316}
317
318extern __inline __m512d
319__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
320_mm512_maskz_load_pd (__mmask8 __U, void const *__P)
321{
322  return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df *) __P,
323						   (__v8df)
324						   _mm512_setzero_pd (),
325						   (__mmask8) __U);
326}
327
328extern __inline void
329__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
330_mm512_store_pd (void *__P, __m512d __A)
331{
332  *(__m512d *) __P = __A;
333}
334
335extern __inline void
336__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
337_mm512_mask_store_pd (void *__P, __mmask8 __U, __m512d __A)
338{
339  __builtin_ia32_storeapd512_mask ((__v8df *) __P, (__v8df) __A,
340				   (__mmask8) __U);
341}
342
343extern __inline __m512
344__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
345_mm512_load_ps (void const *__P)
346{
347  return *(__m512 *) __P;
348}
349
350extern __inline __m512
351__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
352_mm512_mask_load_ps (__m512 __W, __mmask16 __U, void const *__P)
353{
354  return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *) __P,
355						  (__v16sf) __W,
356						  (__mmask16) __U);
357}
358
359extern __inline __m512
360__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
361_mm512_maskz_load_ps (__mmask16 __U, void const *__P)
362{
363  return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *) __P,
364						  (__v16sf)
365						  _mm512_setzero_ps (),
366						  (__mmask16) __U);
367}
368
369extern __inline void
370__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
371_mm512_store_ps (void *__P, __m512 __A)
372{
373  *(__m512 *) __P = __A;
374}
375
376extern __inline void
377__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
378_mm512_mask_store_ps (void *__P, __mmask16 __U, __m512 __A)
379{
380  __builtin_ia32_storeaps512_mask ((__v16sf *) __P, (__v16sf) __A,
381				   (__mmask16) __U);
382}
383
384extern __inline __m512i
385__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
386_mm512_mask_mov_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
387{
388  return (__m512i) __builtin_ia32_movdqa64_512_mask ((__v8di) __A,
389						     (__v8di) __W,
390						     (__mmask8) __U);
391}
392
393extern __inline __m512i
394__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
395_mm512_maskz_mov_epi64 (__mmask8 __U, __m512i __A)
396{
397  return (__m512i) __builtin_ia32_movdqa64_512_mask ((__v8di) __A,
398						     (__v8di)
399						     _mm512_setzero_si512 (),
400						     (__mmask8) __U);
401}
402
403extern __inline __m512i
404__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
405_mm512_load_epi64 (void const *__P)
406{
407  return *(__m512i *) __P;
408}
409
410extern __inline __m512i
411__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
412_mm512_mask_load_epi64 (__m512i __W, __mmask8 __U, void const *__P)
413{
414  return (__m512i) __builtin_ia32_movdqa64load512_mask ((const __v8di *) __P,
415							(__v8di) __W,
416							(__mmask8) __U);
417}
418
419extern __inline __m512i
420__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
421_mm512_maskz_load_epi64 (__mmask8 __U, void const *__P)
422{
423  return (__m512i) __builtin_ia32_movdqa64load512_mask ((const __v8di *) __P,
424							(__v8di)
425							_mm512_setzero_si512 (),
426							(__mmask8) __U);
427}
428
429extern __inline void
430__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
431_mm512_store_epi64 (void *__P, __m512i __A)
432{
433  *(__m512i *) __P = __A;
434}
435
436extern __inline void
437__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
438_mm512_mask_store_epi64 (void *__P, __mmask8 __U, __m512i __A)
439{
440  __builtin_ia32_movdqa64store512_mask ((__v8di *) __P, (__v8di) __A,
441					(__mmask8) __U);
442}
443
444extern __inline __m512i
445__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
446_mm512_mask_mov_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
447{
448  return (__m512i) __builtin_ia32_movdqa32_512_mask ((__v16si) __A,
449						     (__v16si) __W,
450						     (__mmask16) __U);
451}
452
453extern __inline __m512i
454__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
455_mm512_maskz_mov_epi32 (__mmask16 __U, __m512i __A)
456{
457  return (__m512i) __builtin_ia32_movdqa32_512_mask ((__v16si) __A,
458						     (__v16si)
459						     _mm512_setzero_si512 (),
460						     (__mmask16) __U);
461}
462
463extern __inline __m512i
464__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
465_mm512_load_si512 (void const *__P)
466{
467  return *(__m512i *) __P;
468}
469
470extern __inline __m512i
471__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
472_mm512_load_epi32 (void const *__P)
473{
474  return *(__m512i *) __P;
475}
476
477extern __inline __m512i
478__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
479_mm512_mask_load_epi32 (__m512i __W, __mmask16 __U, void const *__P)
480{
481  return (__m512i) __builtin_ia32_movdqa32load512_mask ((const __v16si *) __P,
482							(__v16si) __W,
483							(__mmask16) __U);
484}
485
486extern __inline __m512i
487__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
488_mm512_maskz_load_epi32 (__mmask16 __U, void const *__P)
489{
490  return (__m512i) __builtin_ia32_movdqa32load512_mask ((const __v16si *) __P,
491							(__v16si)
492							_mm512_setzero_si512 (),
493							(__mmask16) __U);
494}
495
496extern __inline void
497__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
498_mm512_store_si512 (void *__P, __m512i __A)
499{
500  *(__m512i *) __P = __A;
501}
502
503extern __inline void
504__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
505_mm512_store_epi32 (void *__P, __m512i __A)
506{
507  *(__m512i *) __P = __A;
508}
509
510extern __inline void
511__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
512_mm512_mask_store_epi32 (void *__P, __mmask16 __U, __m512i __A)
513{
514  __builtin_ia32_movdqa32store512_mask ((__v16si *) __P, (__v16si) __A,
515					(__mmask16) __U);
516}
517
518extern __inline __m512i
519__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
520_mm512_mullo_epi32 (__m512i __A, __m512i __B)
521{
522  return (__m512i) ((__v16su) __A * (__v16su) __B);
523}
524
525extern __inline __m512i
526__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
527_mm512_maskz_mullo_epi32 (__mmask16 __M, __m512i __A, __m512i __B)
528{
529  return (__m512i) __builtin_ia32_pmulld512_mask ((__v16si) __A,
530						  (__v16si) __B,
531						  (__v16si)
532						  _mm512_setzero_si512 (),
533						  __M);
534}
535
536extern __inline __m512i
537__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
538_mm512_mask_mullo_epi32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
539{
540  return (__m512i) __builtin_ia32_pmulld512_mask ((__v16si) __A,
541						  (__v16si) __B,
542						  (__v16si) __W, __M);
543}
544
545extern __inline __m512i
546__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
547_mm512_sllv_epi32 (__m512i __X, __m512i __Y)
548{
549  return (__m512i) __builtin_ia32_psllv16si_mask ((__v16si) __X,
550						  (__v16si) __Y,
551						  (__v16si)
552						  _mm512_undefined_si512 (),
553						  (__mmask16) -1);
554}
555
556extern __inline __m512i
557__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
558_mm512_mask_sllv_epi32 (__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
559{
560  return (__m512i) __builtin_ia32_psllv16si_mask ((__v16si) __X,
561						  (__v16si) __Y,
562						  (__v16si) __W,
563						  (__mmask16) __U);
564}
565
566extern __inline __m512i
567__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
568_mm512_maskz_sllv_epi32 (__mmask16 __U, __m512i __X, __m512i __Y)
569{
570  return (__m512i) __builtin_ia32_psllv16si_mask ((__v16si) __X,
571						  (__v16si) __Y,
572						  (__v16si)
573						  _mm512_setzero_si512 (),
574						  (__mmask16) __U);
575}
576
577extern __inline __m512i
578__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
579_mm512_srav_epi32 (__m512i __X, __m512i __Y)
580{
581  return (__m512i) __builtin_ia32_psrav16si_mask ((__v16si) __X,
582						  (__v16si) __Y,
583						  (__v16si)
584						  _mm512_undefined_si512 (),
585						  (__mmask16) -1);
586}
587
588extern __inline __m512i
589__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
590_mm512_mask_srav_epi32 (__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
591{
592  return (__m512i) __builtin_ia32_psrav16si_mask ((__v16si) __X,
593						  (__v16si) __Y,
594						  (__v16si) __W,
595						  (__mmask16) __U);
596}
597
598extern __inline __m512i
599__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
600_mm512_maskz_srav_epi32 (__mmask16 __U, __m512i __X, __m512i __Y)
601{
602  return (__m512i) __builtin_ia32_psrav16si_mask ((__v16si) __X,
603						  (__v16si) __Y,
604						  (__v16si)
605						  _mm512_setzero_si512 (),
606						  (__mmask16) __U);
607}
608
609extern __inline __m512i
610__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
611_mm512_srlv_epi32 (__m512i __X, __m512i __Y)
612{
613  return (__m512i) __builtin_ia32_psrlv16si_mask ((__v16si) __X,
614						  (__v16si) __Y,
615						  (__v16si)
616						  _mm512_undefined_si512 (),
617						  (__mmask16) -1);
618}
619
620extern __inline __m512i
621__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
622_mm512_mask_srlv_epi32 (__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
623{
624  return (__m512i) __builtin_ia32_psrlv16si_mask ((__v16si) __X,
625						  (__v16si) __Y,
626						  (__v16si) __W,
627						  (__mmask16) __U);
628}
629
630extern __inline __m512i
631__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
632_mm512_maskz_srlv_epi32 (__mmask16 __U, __m512i __X, __m512i __Y)
633{
634  return (__m512i) __builtin_ia32_psrlv16si_mask ((__v16si) __X,
635						  (__v16si) __Y,
636						  (__v16si)
637						  _mm512_setzero_si512 (),
638						  (__mmask16) __U);
639}
640
641extern __inline __m512i
642__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
643_mm512_add_epi64 (__m512i __A, __m512i __B)
644{
645  return (__m512i) ((__v8du) __A + (__v8du) __B);
646}
647
648extern __inline __m512i
649__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
650_mm512_mask_add_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
651{
652  return (__m512i) __builtin_ia32_paddq512_mask ((__v8di) __A,
653						 (__v8di) __B,
654						 (__v8di) __W,
655						 (__mmask8) __U);
656}
657
658extern __inline __m512i
659__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
660_mm512_maskz_add_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
661{
662  return (__m512i) __builtin_ia32_paddq512_mask ((__v8di) __A,
663						 (__v8di) __B,
664						 (__v8di)
665						 _mm512_setzero_si512 (),
666						 (__mmask8) __U);
667}
668
669extern __inline __m512i
670__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
671_mm512_sub_epi64 (__m512i __A, __m512i __B)
672{
673  return (__m512i) ((__v8du) __A - (__v8du) __B);
674}
675
676extern __inline __m512i
677__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
678_mm512_mask_sub_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
679{
680  return (__m512i) __builtin_ia32_psubq512_mask ((__v8di) __A,
681						 (__v8di) __B,
682						 (__v8di) __W,
683						 (__mmask8) __U);
684}
685
686extern __inline __m512i
687__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
688_mm512_maskz_sub_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
689{
690  return (__m512i) __builtin_ia32_psubq512_mask ((__v8di) __A,
691						 (__v8di) __B,
692						 (__v8di)
693						 _mm512_setzero_si512 (),
694						 (__mmask8) __U);
695}
696
697extern __inline __m512i
698__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
699_mm512_sllv_epi64 (__m512i __X, __m512i __Y)
700{
701  return (__m512i) __builtin_ia32_psllv8di_mask ((__v8di) __X,
702						 (__v8di) __Y,
703						 (__v8di)
704						 _mm512_undefined_pd (),
705						 (__mmask8) -1);
706}
707
708extern __inline __m512i
709__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
710_mm512_mask_sllv_epi64 (__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
711{
712  return (__m512i) __builtin_ia32_psllv8di_mask ((__v8di) __X,
713						 (__v8di) __Y,
714						 (__v8di) __W,
715						 (__mmask8) __U);
716}
717
718extern __inline __m512i
719__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
720_mm512_maskz_sllv_epi64 (__mmask8 __U, __m512i __X, __m512i __Y)
721{
722  return (__m512i) __builtin_ia32_psllv8di_mask ((__v8di) __X,
723						 (__v8di) __Y,
724						 (__v8di)
725						 _mm512_setzero_si512 (),
726						 (__mmask8) __U);
727}
728
729extern __inline __m512i
730__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
731_mm512_srav_epi64 (__m512i __X, __m512i __Y)
732{
733  return (__m512i) __builtin_ia32_psrav8di_mask ((__v8di) __X,
734						 (__v8di) __Y,
735						 (__v8di)
736						 _mm512_undefined_si512 (),
737						 (__mmask8) -1);
738}
739
740extern __inline __m512i
741__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
742_mm512_mask_srav_epi64 (__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
743{
744  return (__m512i) __builtin_ia32_psrav8di_mask ((__v8di) __X,
745						 (__v8di) __Y,
746						 (__v8di) __W,
747						 (__mmask8) __U);
748}
749
750extern __inline __m512i
751__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
752_mm512_maskz_srav_epi64 (__mmask8 __U, __m512i __X, __m512i __Y)
753{
754  return (__m512i) __builtin_ia32_psrav8di_mask ((__v8di) __X,
755						 (__v8di) __Y,
756						 (__v8di)
757						 _mm512_setzero_si512 (),
758						 (__mmask8) __U);
759}
760
761extern __inline __m512i
762__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
763_mm512_srlv_epi64 (__m512i __X, __m512i __Y)
764{
765  return (__m512i) __builtin_ia32_psrlv8di_mask ((__v8di) __X,
766						 (__v8di) __Y,
767						 (__v8di)
768						 _mm512_undefined_si512 (),
769						 (__mmask8) -1);
770}
771
772extern __inline __m512i
773__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
774_mm512_mask_srlv_epi64 (__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
775{
776  return (__m512i) __builtin_ia32_psrlv8di_mask ((__v8di) __X,
777						 (__v8di) __Y,
778						 (__v8di) __W,
779						 (__mmask8) __U);
780}
781
782extern __inline __m512i
783__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
784_mm512_maskz_srlv_epi64 (__mmask8 __U, __m512i __X, __m512i __Y)
785{
786  return (__m512i) __builtin_ia32_psrlv8di_mask ((__v8di) __X,
787						 (__v8di) __Y,
788						 (__v8di)
789						 _mm512_setzero_si512 (),
790						 (__mmask8) __U);
791}
792
793extern __inline __m512i
794__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
795_mm512_add_epi32 (__m512i __A, __m512i __B)
796{
797  return (__m512i) ((__v16su) __A + (__v16su) __B);
798}
799
800extern __inline __m512i
801__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
802_mm512_mask_add_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
803{
804  return (__m512i) __builtin_ia32_paddd512_mask ((__v16si) __A,
805						 (__v16si) __B,
806						 (__v16si) __W,
807						 (__mmask16) __U);
808}
809
810extern __inline __m512i
811__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
812_mm512_maskz_add_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
813{
814  return (__m512i) __builtin_ia32_paddd512_mask ((__v16si) __A,
815						 (__v16si) __B,
816						 (__v16si)
817						 _mm512_setzero_si512 (),
818						 (__mmask16) __U);
819}
820
821extern __inline __m512i
822__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
823_mm512_mul_epi32 (__m512i __X, __m512i __Y)
824{
825  return (__m512i) __builtin_ia32_pmuldq512_mask ((__v16si) __X,
826						  (__v16si) __Y,
827						  (__v8di)
828						  _mm512_undefined_si512 (),
829						  (__mmask8) -1);
830}
831
832extern __inline __m512i
833__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
834_mm512_mask_mul_epi32 (__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y)
835{
836  return (__m512i) __builtin_ia32_pmuldq512_mask ((__v16si) __X,
837						  (__v16si) __Y,
838						  (__v8di) __W, __M);
839}
840
841extern __inline __m512i
842__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
843_mm512_maskz_mul_epi32 (__mmask8 __M, __m512i __X, __m512i __Y)
844{
845  return (__m512i) __builtin_ia32_pmuldq512_mask ((__v16si) __X,
846						  (__v16si) __Y,
847						  (__v8di)
848						  _mm512_setzero_si512 (),
849						  __M);
850}
851
852extern __inline __m512i
853__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
854_mm512_sub_epi32 (__m512i __A, __m512i __B)
855{
856  return (__m512i) ((__v16su) __A - (__v16su) __B);
857}
858
859extern __inline __m512i
860__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
861_mm512_mask_sub_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
862{
863  return (__m512i) __builtin_ia32_psubd512_mask ((__v16si) __A,
864						 (__v16si) __B,
865						 (__v16si) __W,
866						 (__mmask16) __U);
867}
868
869extern __inline __m512i
870__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
871_mm512_maskz_sub_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
872{
873  return (__m512i) __builtin_ia32_psubd512_mask ((__v16si) __A,
874						 (__v16si) __B,
875						 (__v16si)
876						 _mm512_setzero_si512 (),
877						 (__mmask16) __U);
878}
879
880extern __inline __m512i
881__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
882_mm512_mul_epu32 (__m512i __X, __m512i __Y)
883{
884  return (__m512i) __builtin_ia32_pmuludq512_mask ((__v16si) __X,
885						   (__v16si) __Y,
886						   (__v8di)
887						   _mm512_undefined_si512 (),
888						   (__mmask8) -1);
889}
890
891extern __inline __m512i
892__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
893_mm512_mask_mul_epu32 (__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y)
894{
895  return (__m512i) __builtin_ia32_pmuludq512_mask ((__v16si) __X,
896						   (__v16si) __Y,
897						   (__v8di) __W, __M);
898}
899
900extern __inline __m512i
901__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
902_mm512_maskz_mul_epu32 (__mmask8 __M, __m512i __X, __m512i __Y)
903{
904  return (__m512i) __builtin_ia32_pmuludq512_mask ((__v16si) __X,
905						   (__v16si) __Y,
906						   (__v8di)
907						   _mm512_setzero_si512 (),
908						   __M);
909}
910
911#ifdef __OPTIMIZE__
912extern __inline __m512i
913__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
914_mm512_slli_epi64 (__m512i __A, unsigned int __B)
915{
916  return (__m512i) __builtin_ia32_psllqi512_mask ((__v8di) __A, __B,
917						  (__v8di)
918						  _mm512_undefined_si512 (),
919						  (__mmask8) -1);
920}
921
922extern __inline __m512i
923__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
924_mm512_mask_slli_epi64 (__m512i __W, __mmask8 __U, __m512i __A,
925			unsigned int __B)
926{
927  return (__m512i) __builtin_ia32_psllqi512_mask ((__v8di) __A, __B,
928						  (__v8di) __W,
929						  (__mmask8) __U);
930}
931
932extern __inline __m512i
933__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
934_mm512_maskz_slli_epi64 (__mmask8 __U, __m512i __A, unsigned int __B)
935{
936  return (__m512i) __builtin_ia32_psllqi512_mask ((__v8di) __A, __B,
937						  (__v8di)
938						  _mm512_setzero_si512 (),
939						  (__mmask8) __U);
940}
941#else
942#define _mm512_slli_epi64(X, C)						   \
943  ((__m512i) __builtin_ia32_psllqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
944    (__v8di)(__m512i)_mm512_undefined_si512 (),\
945    (__mmask8)-1))
946
947#define _mm512_mask_slli_epi64(W, U, X, C)				   \
948  ((__m512i) __builtin_ia32_psllqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
949    (__v8di)(__m512i)(W),\
950    (__mmask8)(U)))
951
952#define _mm512_maskz_slli_epi64(U, X, C)                                   \
953  ((__m512i) __builtin_ia32_psllqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
954    (__v8di)(__m512i)_mm512_setzero_si512 (),\
955    (__mmask8)(U)))
956#endif
957
958extern __inline __m512i
959__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
960_mm512_sll_epi64 (__m512i __A, __m128i __B)
961{
962  return (__m512i) __builtin_ia32_psllq512_mask ((__v8di) __A,
963						 (__v2di) __B,
964						 (__v8di)
965						 _mm512_undefined_si512 (),
966						 (__mmask8) -1);
967}
968
969extern __inline __m512i
970__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
971_mm512_mask_sll_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
972{
973  return (__m512i) __builtin_ia32_psllq512_mask ((__v8di) __A,
974						 (__v2di) __B,
975						 (__v8di) __W,
976						 (__mmask8) __U);
977}
978
979extern __inline __m512i
980__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
981_mm512_maskz_sll_epi64 (__mmask8 __U, __m512i __A, __m128i __B)
982{
983  return (__m512i) __builtin_ia32_psllq512_mask ((__v8di) __A,
984						 (__v2di) __B,
985						 (__v8di)
986						 _mm512_setzero_si512 (),
987						 (__mmask8) __U);
988}
989
990#ifdef __OPTIMIZE__
991extern __inline __m512i
992__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
993_mm512_srli_epi64 (__m512i __A, unsigned int __B)
994{
995  return (__m512i) __builtin_ia32_psrlqi512_mask ((__v8di) __A, __B,
996						  (__v8di)
997						  _mm512_undefined_si512 (),
998						  (__mmask8) -1);
999}
1000
1001extern __inline __m512i
1002__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1003_mm512_mask_srli_epi64 (__m512i __W, __mmask8 __U,
1004			__m512i __A, unsigned int __B)
1005{
1006  return (__m512i) __builtin_ia32_psrlqi512_mask ((__v8di) __A, __B,
1007						  (__v8di) __W,
1008						  (__mmask8) __U);
1009}
1010
1011extern __inline __m512i
1012__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1013_mm512_maskz_srli_epi64 (__mmask8 __U, __m512i __A, unsigned int __B)
1014{
1015  return (__m512i) __builtin_ia32_psrlqi512_mask ((__v8di) __A, __B,
1016						  (__v8di)
1017						  _mm512_setzero_si512 (),
1018						  (__mmask8) __U);
1019}
1020#else
1021#define _mm512_srli_epi64(X, C)						   \
1022  ((__m512i) __builtin_ia32_psrlqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
1023    (__v8di)(__m512i)_mm512_undefined_si512 (),\
1024    (__mmask8)-1))
1025
1026#define _mm512_mask_srli_epi64(W, U, X, C)				   \
1027  ((__m512i) __builtin_ia32_psrlqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
1028    (__v8di)(__m512i)(W),\
1029    (__mmask8)(U)))
1030
1031#define _mm512_maskz_srli_epi64(U, X, C)                                   \
1032  ((__m512i) __builtin_ia32_psrlqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
1033    (__v8di)(__m512i)_mm512_setzero_si512 (),\
1034    (__mmask8)(U)))
1035#endif
1036
1037extern __inline __m512i
1038__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1039_mm512_srl_epi64 (__m512i __A, __m128i __B)
1040{
1041  return (__m512i) __builtin_ia32_psrlq512_mask ((__v8di) __A,
1042						 (__v2di) __B,
1043						 (__v8di)
1044						 _mm512_undefined_si512 (),
1045						 (__mmask8) -1);
1046}
1047
1048extern __inline __m512i
1049__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1050_mm512_mask_srl_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
1051{
1052  return (__m512i) __builtin_ia32_psrlq512_mask ((__v8di) __A,
1053						 (__v2di) __B,
1054						 (__v8di) __W,
1055						 (__mmask8) __U);
1056}
1057
1058extern __inline __m512i
1059__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1060_mm512_maskz_srl_epi64 (__mmask8 __U, __m512i __A, __m128i __B)
1061{
1062  return (__m512i) __builtin_ia32_psrlq512_mask ((__v8di) __A,
1063						 (__v2di) __B,
1064						 (__v8di)
1065						 _mm512_setzero_si512 (),
1066						 (__mmask8) __U);
1067}
1068
1069#ifdef __OPTIMIZE__
1070extern __inline __m512i
1071__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1072_mm512_srai_epi64 (__m512i __A, unsigned int __B)
1073{
1074  return (__m512i) __builtin_ia32_psraqi512_mask ((__v8di) __A, __B,
1075						  (__v8di)
1076						  _mm512_undefined_si512 (),
1077						  (__mmask8) -1);
1078}
1079
1080extern __inline __m512i
1081__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1082_mm512_mask_srai_epi64 (__m512i __W, __mmask8 __U, __m512i __A,
1083			unsigned int __B)
1084{
1085  return (__m512i) __builtin_ia32_psraqi512_mask ((__v8di) __A, __B,
1086						  (__v8di) __W,
1087						  (__mmask8) __U);
1088}
1089
1090extern __inline __m512i
1091__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1092_mm512_maskz_srai_epi64 (__mmask8 __U, __m512i __A, unsigned int __B)
1093{
1094  return (__m512i) __builtin_ia32_psraqi512_mask ((__v8di) __A, __B,
1095						  (__v8di)
1096						  _mm512_setzero_si512 (),
1097						  (__mmask8) __U);
1098}
1099#else
1100#define _mm512_srai_epi64(X, C)						   \
1101  ((__m512i) __builtin_ia32_psraqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
1102    (__v8di)(__m512i)_mm512_undefined_si512 (),\
1103    (__mmask8)-1))
1104
1105#define _mm512_mask_srai_epi64(W, U, X, C)				   \
1106  ((__m512i) __builtin_ia32_psraqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
1107    (__v8di)(__m512i)(W),\
1108    (__mmask8)(U)))
1109
1110#define _mm512_maskz_srai_epi64(U, X, C)				   \
1111  ((__m512i) __builtin_ia32_psraqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
1112    (__v8di)(__m512i)_mm512_setzero_si512 (),\
1113    (__mmask8)(U)))
1114#endif
1115
1116extern __inline __m512i
1117__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1118_mm512_sra_epi64 (__m512i __A, __m128i __B)
1119{
1120  return (__m512i) __builtin_ia32_psraq512_mask ((__v8di) __A,
1121						 (__v2di) __B,
1122						 (__v8di)
1123						 _mm512_undefined_si512 (),
1124						 (__mmask8) -1);
1125}
1126
1127extern __inline __m512i
1128__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1129_mm512_mask_sra_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
1130{
1131  return (__m512i) __builtin_ia32_psraq512_mask ((__v8di) __A,
1132						 (__v2di) __B,
1133						 (__v8di) __W,
1134						 (__mmask8) __U);
1135}
1136
1137extern __inline __m512i
1138__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1139_mm512_maskz_sra_epi64 (__mmask8 __U, __m512i __A, __m128i __B)
1140{
1141  return (__m512i) __builtin_ia32_psraq512_mask ((__v8di) __A,
1142						 (__v2di) __B,
1143						 (__v8di)
1144						 _mm512_setzero_si512 (),
1145						 (__mmask8) __U);
1146}
1147
1148#ifdef __OPTIMIZE__
1149extern __inline __m512i
1150__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1151_mm512_slli_epi32 (__m512i __A, unsigned int __B)
1152{
1153  return (__m512i) __builtin_ia32_pslldi512_mask ((__v16si) __A, __B,
1154						  (__v16si)
1155						  _mm512_undefined_si512 (),
1156						  (__mmask16) -1);
1157}
1158
1159extern __inline __m512i
1160__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1161_mm512_mask_slli_epi32 (__m512i __W, __mmask16 __U, __m512i __A,
1162			unsigned int __B)
1163{
1164  return (__m512i) __builtin_ia32_pslldi512_mask ((__v16si) __A, __B,
1165						  (__v16si) __W,
1166						  (__mmask16) __U);
1167}
1168
1169extern __inline __m512i
1170__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1171_mm512_maskz_slli_epi32 (__mmask16 __U, __m512i __A, unsigned int __B)
1172{
1173  return (__m512i) __builtin_ia32_pslldi512_mask ((__v16si) __A, __B,
1174						  (__v16si)
1175						  _mm512_setzero_si512 (),
1176						  (__mmask16) __U);
1177}
1178#else
1179#define _mm512_slli_epi32(X, C)						    \
1180  ((__m512i) __builtin_ia32_pslldi512_mask ((__v16si)(__m512i)(X), (int)(C),\
1181    (__v16si)(__m512i)_mm512_undefined_si512 (),\
1182    (__mmask16)-1))
1183
1184#define _mm512_mask_slli_epi32(W, U, X, C)                                  \
1185  ((__m512i) __builtin_ia32_pslldi512_mask ((__v16si)(__m512i)(X), (int)(C),\
1186    (__v16si)(__m512i)(W),\
1187    (__mmask16)(U)))
1188
1189#define _mm512_maskz_slli_epi32(U, X, C)                                    \
1190  ((__m512i) __builtin_ia32_pslldi512_mask ((__v16si)(__m512i)(X), (int)(C),\
1191    (__v16si)(__m512i)_mm512_setzero_si512 (),\
1192    (__mmask16)(U)))
1193#endif
1194
1195extern __inline __m512i
1196__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1197_mm512_sll_epi32 (__m512i __A, __m128i __B)
1198{
1199  return (__m512i) __builtin_ia32_pslld512_mask ((__v16si) __A,
1200						 (__v4si) __B,
1201						 (__v16si)
1202						 _mm512_undefined_si512 (),
1203						 (__mmask16) -1);
1204}
1205
1206extern __inline __m512i
1207__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1208_mm512_mask_sll_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
1209{
1210  return (__m512i) __builtin_ia32_pslld512_mask ((__v16si) __A,
1211						 (__v4si) __B,
1212						 (__v16si) __W,
1213						 (__mmask16) __U);
1214}
1215
1216extern __inline __m512i
1217__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1218_mm512_maskz_sll_epi32 (__mmask16 __U, __m512i __A, __m128i __B)
1219{
1220  return (__m512i) __builtin_ia32_pslld512_mask ((__v16si) __A,
1221						 (__v4si) __B,
1222						 (__v16si)
1223						 _mm512_setzero_si512 (),
1224						 (__mmask16) __U);
1225}
1226
1227#ifdef __OPTIMIZE__
1228extern __inline __m512i
1229__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1230_mm512_srli_epi32 (__m512i __A, unsigned int __B)
1231{
1232  return (__m512i) __builtin_ia32_psrldi512_mask ((__v16si) __A, __B,
1233						  (__v16si)
1234						  _mm512_undefined_si512 (),
1235						  (__mmask16) -1);
1236}
1237
1238extern __inline __m512i
1239__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1240_mm512_mask_srli_epi32 (__m512i __W, __mmask16 __U,
1241			__m512i __A, unsigned int __B)
1242{
1243  return (__m512i) __builtin_ia32_psrldi512_mask ((__v16si) __A, __B,
1244						  (__v16si) __W,
1245						  (__mmask16) __U);
1246}
1247
1248extern __inline __m512i
1249__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1250_mm512_maskz_srli_epi32 (__mmask16 __U, __m512i __A, unsigned int __B)
1251{
1252  return (__m512i) __builtin_ia32_psrldi512_mask ((__v16si) __A, __B,
1253						  (__v16si)
1254						  _mm512_setzero_si512 (),
1255						  (__mmask16) __U);
1256}
1257#else
1258#define _mm512_srli_epi32(X, C)						    \
1259  ((__m512i) __builtin_ia32_psrldi512_mask ((__v16si)(__m512i)(X), (int)(C),\
1260    (__v16si)(__m512i)_mm512_undefined_si512 (),\
1261    (__mmask16)-1))
1262
1263#define _mm512_mask_srli_epi32(W, U, X, C)                                  \
1264  ((__m512i) __builtin_ia32_psrldi512_mask ((__v16si)(__m512i)(X), (int)(C),\
1265    (__v16si)(__m512i)(W),\
1266    (__mmask16)(U)))
1267
1268#define _mm512_maskz_srli_epi32(U, X, C)				    \
1269  ((__m512i) __builtin_ia32_psrldi512_mask ((__v16si)(__m512i)(X), (int)(C),\
1270    (__v16si)(__m512i)_mm512_setzero_si512 (),\
1271    (__mmask16)(U)))
1272#endif
1273
1274extern __inline __m512i
1275__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1276_mm512_srl_epi32 (__m512i __A, __m128i __B)
1277{
1278  return (__m512i) __builtin_ia32_psrld512_mask ((__v16si) __A,
1279						 (__v4si) __B,
1280						 (__v16si)
1281						 _mm512_undefined_si512 (),
1282						 (__mmask16) -1);
1283}
1284
1285extern __inline __m512i
1286__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1287_mm512_mask_srl_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
1288{
1289  return (__m512i) __builtin_ia32_psrld512_mask ((__v16si) __A,
1290						 (__v4si) __B,
1291						 (__v16si) __W,
1292						 (__mmask16) __U);
1293}
1294
1295extern __inline __m512i
1296__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1297_mm512_maskz_srl_epi32 (__mmask16 __U, __m512i __A, __m128i __B)
1298{
1299  return (__m512i) __builtin_ia32_psrld512_mask ((__v16si) __A,
1300						 (__v4si) __B,
1301						 (__v16si)
1302						 _mm512_setzero_si512 (),
1303						 (__mmask16) __U);
1304}
1305
1306#ifdef __OPTIMIZE__
1307extern __inline __m512i
1308__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1309_mm512_srai_epi32 (__m512i __A, unsigned int __B)
1310{
1311  return (__m512i) __builtin_ia32_psradi512_mask ((__v16si) __A, __B,
1312						  (__v16si)
1313						  _mm512_undefined_si512 (),
1314						  (__mmask16) -1);
1315}
1316
1317extern __inline __m512i
1318__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1319_mm512_mask_srai_epi32 (__m512i __W, __mmask16 __U, __m512i __A,
1320			unsigned int __B)
1321{
1322  return (__m512i) __builtin_ia32_psradi512_mask ((__v16si) __A, __B,
1323						  (__v16si) __W,
1324						  (__mmask16) __U);
1325}
1326
1327extern __inline __m512i
1328__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1329_mm512_maskz_srai_epi32 (__mmask16 __U, __m512i __A, unsigned int __B)
1330{
1331  return (__m512i) __builtin_ia32_psradi512_mask ((__v16si) __A, __B,
1332						  (__v16si)
1333						  _mm512_setzero_si512 (),
1334						  (__mmask16) __U);
1335}
1336#else
1337#define _mm512_srai_epi32(X, C)						    \
1338  ((__m512i) __builtin_ia32_psradi512_mask ((__v16si)(__m512i)(X), (int)(C),\
1339    (__v16si)(__m512i)_mm512_undefined_si512 (),\
1340    (__mmask16)-1))
1341
1342#define _mm512_mask_srai_epi32(W, U, X, C)				    \
1343  ((__m512i) __builtin_ia32_psradi512_mask ((__v16si)(__m512i)(X), (int)(C),\
1344    (__v16si)(__m512i)(W),\
1345    (__mmask16)(U)))
1346
1347#define _mm512_maskz_srai_epi32(U, X, C)				    \
1348  ((__m512i) __builtin_ia32_psradi512_mask ((__v16si)(__m512i)(X), (int)(C),\
1349    (__v16si)(__m512i)_mm512_setzero_si512 (),\
1350    (__mmask16)(U)))
1351#endif
1352
1353extern __inline __m512i
1354__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1355_mm512_sra_epi32 (__m512i __A, __m128i __B)
1356{
1357  return (__m512i) __builtin_ia32_psrad512_mask ((__v16si) __A,
1358						 (__v4si) __B,
1359						 (__v16si)
1360						 _mm512_undefined_si512 (),
1361						 (__mmask16) -1);
1362}
1363
1364extern __inline __m512i
1365__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1366_mm512_mask_sra_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
1367{
1368  return (__m512i) __builtin_ia32_psrad512_mask ((__v16si) __A,
1369						 (__v4si) __B,
1370						 (__v16si) __W,
1371						 (__mmask16) __U);
1372}
1373
1374extern __inline __m512i
1375__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1376_mm512_maskz_sra_epi32 (__mmask16 __U, __m512i __A, __m128i __B)
1377{
1378  return (__m512i) __builtin_ia32_psrad512_mask ((__v16si) __A,
1379						 (__v4si) __B,
1380						 (__v16si)
1381						 _mm512_setzero_si512 (),
1382						 (__mmask16) __U);
1383}
1384
1385#ifdef __OPTIMIZE__
1386extern __inline __m128d
1387__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1388_mm_add_round_sd (__m128d __A, __m128d __B, const int __R)
1389{
1390  return (__m128d) __builtin_ia32_addsd_round ((__v2df) __A,
1391					       (__v2df) __B,
1392					       __R);
1393}
1394
1395extern __inline __m128
1396__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1397_mm_add_round_ss (__m128 __A, __m128 __B, const int __R)
1398{
1399  return (__m128) __builtin_ia32_addss_round ((__v4sf) __A,
1400					      (__v4sf) __B,
1401					      __R);
1402}
1403
1404extern __inline __m128d
1405__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1406_mm_sub_round_sd (__m128d __A, __m128d __B, const int __R)
1407{
1408  return (__m128d) __builtin_ia32_subsd_round ((__v2df) __A,
1409					       (__v2df) __B,
1410					       __R);
1411}
1412
1413extern __inline __m128
1414__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1415_mm_sub_round_ss (__m128 __A, __m128 __B, const int __R)
1416{
1417  return (__m128) __builtin_ia32_subss_round ((__v4sf) __A,
1418					      (__v4sf) __B,
1419					      __R);
1420}
1421
1422#else
1423#define _mm_add_round_sd(A, B, C)            \
1424    (__m128d)__builtin_ia32_addsd_round(A, B, C)
1425
1426#define _mm_add_round_ss(A, B, C)            \
1427    (__m128)__builtin_ia32_addss_round(A, B, C)
1428
1429#define _mm_sub_round_sd(A, B, C)            \
1430    (__m128d)__builtin_ia32_subsd_round(A, B, C)
1431
1432#define _mm_sub_round_ss(A, B, C)            \
1433    (__m128)__builtin_ia32_subss_round(A, B, C)
1434#endif
1435
1436#ifdef __OPTIMIZE__
1437extern __inline __m512i
1438__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1439_mm512_ternarylogic_epi64 (__m512i __A, __m512i __B, __m512i __C, const int imm)
1440{
1441  return (__m512i) __builtin_ia32_pternlogq512_mask ((__v8di) __A,
1442						     (__v8di) __B,
1443						     (__v8di) __C, imm,
1444						     (__mmask8) -1);
1445}
1446
1447extern __inline __m512i
1448__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1449_mm512_mask_ternarylogic_epi64 (__m512i __A, __mmask8 __U, __m512i __B,
1450				__m512i __C, const int imm)
1451{
1452  return (__m512i) __builtin_ia32_pternlogq512_mask ((__v8di) __A,
1453						     (__v8di) __B,
1454						     (__v8di) __C, imm,
1455						     (__mmask8) __U);
1456}
1457
1458extern __inline __m512i
1459__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1460_mm512_maskz_ternarylogic_epi64 (__mmask8 __U, __m512i __A, __m512i __B,
1461				 __m512i __C, const int imm)
1462{
1463  return (__m512i) __builtin_ia32_pternlogq512_maskz ((__v8di) __A,
1464						      (__v8di) __B,
1465						      (__v8di) __C,
1466						      imm, (__mmask8) __U);
1467}
1468
1469extern __inline __m512i
1470__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1471_mm512_ternarylogic_epi32 (__m512i __A, __m512i __B, __m512i __C, const int imm)
1472{
1473  return (__m512i) __builtin_ia32_pternlogd512_mask ((__v16si) __A,
1474						     (__v16si) __B,
1475						     (__v16si) __C,
1476						     imm, (__mmask16) -1);
1477}
1478
1479extern __inline __m512i
1480__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1481_mm512_mask_ternarylogic_epi32 (__m512i __A, __mmask16 __U, __m512i __B,
1482				__m512i __C, const int imm)
1483{
1484  return (__m512i) __builtin_ia32_pternlogd512_mask ((__v16si) __A,
1485						     (__v16si) __B,
1486						     (__v16si) __C,
1487						     imm, (__mmask16) __U);
1488}
1489
1490extern __inline __m512i
1491__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1492_mm512_maskz_ternarylogic_epi32 (__mmask16 __U, __m512i __A, __m512i __B,
1493				 __m512i __C, const int imm)
1494{
1495  return (__m512i) __builtin_ia32_pternlogd512_maskz ((__v16si) __A,
1496						      (__v16si) __B,
1497						      (__v16si) __C,
1498						      imm, (__mmask16) __U);
1499}
1500#else
1501#define _mm512_ternarylogic_epi64(A, B, C, I)				\
1502  ((__m512i) __builtin_ia32_pternlogq512_mask ((__v8di)(__m512i)(A),	\
1503    (__v8di)(__m512i)(B), (__v8di)(__m512i)(C), (int)(I), (__mmask8)-1))
1504#define _mm512_mask_ternarylogic_epi64(A, U, B, C, I)			\
1505  ((__m512i) __builtin_ia32_pternlogq512_mask ((__v8di)(__m512i)(A),	\
1506    (__v8di)(__m512i)(B), (__v8di)(__m512i)(C), (int)(I), (__mmask8)(U)))
1507#define _mm512_maskz_ternarylogic_epi64(U, A, B, C, I)			\
1508  ((__m512i) __builtin_ia32_pternlogq512_maskz ((__v8di)(__m512i)(A),	\
1509    (__v8di)(__m512i)(B), (__v8di)(__m512i)(C), (int)(I), (__mmask8)(U)))
1510#define _mm512_ternarylogic_epi32(A, B, C, I)				\
1511  ((__m512i) __builtin_ia32_pternlogd512_mask ((__v16si)(__m512i)(A),	\
1512    (__v16si)(__m512i)(B), (__v16si)(__m512i)(C), (int)(I),		\
1513    (__mmask16)-1))
1514#define _mm512_mask_ternarylogic_epi32(A, U, B, C, I)			\
1515  ((__m512i) __builtin_ia32_pternlogd512_mask ((__v16si)(__m512i)(A),	\
1516    (__v16si)(__m512i)(B), (__v16si)(__m512i)(C), (int)(I),		\
1517    (__mmask16)(U)))
1518#define _mm512_maskz_ternarylogic_epi32(U, A, B, C, I)			\
1519  ((__m512i) __builtin_ia32_pternlogd512_maskz ((__v16si)(__m512i)(A),	\
1520    (__v16si)(__m512i)(B), (__v16si)(__m512i)(C), (int)(I),		\
1521    (__mmask16)(U)))
1522#endif
1523
1524extern __inline __m512d
1525__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1526_mm512_rcp14_pd (__m512d __A)
1527{
1528  return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
1529						   (__v8df)
1530						   _mm512_undefined_pd (),
1531						   (__mmask8) -1);
1532}
1533
1534extern __inline __m512d
1535__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1536_mm512_mask_rcp14_pd (__m512d __W, __mmask8 __U, __m512d __A)
1537{
1538  return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
1539						   (__v8df) __W,
1540						   (__mmask8) __U);
1541}
1542
1543extern __inline __m512d
1544__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1545_mm512_maskz_rcp14_pd (__mmask8 __U, __m512d __A)
1546{
1547  return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
1548						   (__v8df)
1549						   _mm512_setzero_pd (),
1550						   (__mmask8) __U);
1551}
1552
1553extern __inline __m512
1554__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1555_mm512_rcp14_ps (__m512 __A)
1556{
1557  return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
1558						  (__v16sf)
1559						  _mm512_undefined_ps (),
1560						  (__mmask16) -1);
1561}
1562
1563extern __inline __m512
1564__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1565_mm512_mask_rcp14_ps (__m512 __W, __mmask16 __U, __m512 __A)
1566{
1567  return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
1568						  (__v16sf) __W,
1569						  (__mmask16) __U);
1570}
1571
1572extern __inline __m512
1573__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1574_mm512_maskz_rcp14_ps (__mmask16 __U, __m512 __A)
1575{
1576  return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
1577						  (__v16sf)
1578						  _mm512_setzero_ps (),
1579						  (__mmask16) __U);
1580}
1581
1582extern __inline __m128d
1583__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1584_mm_rcp14_sd (__m128d __A, __m128d __B)
1585{
1586  return (__m128d) __builtin_ia32_rcp14sd ((__v2df) __B,
1587					   (__v2df) __A);
1588}
1589
1590extern __inline __m128
1591__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1592_mm_rcp14_ss (__m128 __A, __m128 __B)
1593{
1594  return (__m128) __builtin_ia32_rcp14ss ((__v4sf) __B,
1595					  (__v4sf) __A);
1596}
1597
1598extern __inline __m512d
1599__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1600_mm512_rsqrt14_pd (__m512d __A)
1601{
1602  return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
1603						     (__v8df)
1604						     _mm512_undefined_pd (),
1605						     (__mmask8) -1);
1606}
1607
1608extern __inline __m512d
1609__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1610_mm512_mask_rsqrt14_pd (__m512d __W, __mmask8 __U, __m512d __A)
1611{
1612  return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
1613						     (__v8df) __W,
1614						     (__mmask8) __U);
1615}
1616
1617extern __inline __m512d
1618__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1619_mm512_maskz_rsqrt14_pd (__mmask8 __U, __m512d __A)
1620{
1621  return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
1622						     (__v8df)
1623						     _mm512_setzero_pd (),
1624						     (__mmask8) __U);
1625}
1626
1627extern __inline __m512
1628__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1629_mm512_rsqrt14_ps (__m512 __A)
1630{
1631  return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
1632						    (__v16sf)
1633						    _mm512_undefined_ps (),
1634						    (__mmask16) -1);
1635}
1636
1637extern __inline __m512
1638__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1639_mm512_mask_rsqrt14_ps (__m512 __W, __mmask16 __U, __m512 __A)
1640{
1641  return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
1642						    (__v16sf) __W,
1643						    (__mmask16) __U);
1644}
1645
1646extern __inline __m512
1647__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1648_mm512_maskz_rsqrt14_ps (__mmask16 __U, __m512 __A)
1649{
1650  return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
1651						    (__v16sf)
1652						    _mm512_setzero_ps (),
1653						    (__mmask16) __U);
1654}
1655
1656extern __inline __m128d
1657__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1658_mm_rsqrt14_sd (__m128d __A, __m128d __B)
1659{
1660  return (__m128d) __builtin_ia32_rsqrt14sd ((__v2df) __B,
1661					     (__v2df) __A);
1662}
1663
1664extern __inline __m128
1665__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1666_mm_rsqrt14_ss (__m128 __A, __m128 __B)
1667{
1668  return (__m128) __builtin_ia32_rsqrt14ss ((__v4sf) __B,
1669					    (__v4sf) __A);
1670}
1671
1672#ifdef __OPTIMIZE__
1673extern __inline __m512d
1674__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1675_mm512_sqrt_round_pd (__m512d __A, const int __R)
1676{
1677  return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
1678						  (__v8df)
1679						  _mm512_undefined_pd (),
1680						  (__mmask8) -1, __R);
1681}
1682
1683extern __inline __m512d
1684__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1685_mm512_mask_sqrt_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
1686			   const int __R)
1687{
1688  return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
1689						  (__v8df) __W,
1690						  (__mmask8) __U, __R);
1691}
1692
1693extern __inline __m512d
1694__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1695_mm512_maskz_sqrt_round_pd (__mmask8 __U, __m512d __A, const int __R)
1696{
1697  return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
1698						  (__v8df)
1699						  _mm512_setzero_pd (),
1700						  (__mmask8) __U, __R);
1701}
1702
1703extern __inline __m512
1704__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1705_mm512_sqrt_round_ps (__m512 __A, const int __R)
1706{
1707  return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A,
1708						 (__v16sf)
1709						 _mm512_undefined_ps (),
1710						 (__mmask16) -1, __R);
1711}
1712
1713extern __inline __m512
1714__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1715_mm512_mask_sqrt_round_ps (__m512 __W, __mmask16 __U, __m512 __A, const int __R)
1716{
1717  return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A,
1718						 (__v16sf) __W,
1719						 (__mmask16) __U, __R);
1720}
1721
1722extern __inline __m512
1723__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1724_mm512_maskz_sqrt_round_ps (__mmask16 __U, __m512 __A, const int __R)
1725{
1726  return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A,
1727						 (__v16sf)
1728						 _mm512_setzero_ps (),
1729						 (__mmask16) __U, __R);
1730}
1731
1732extern __inline __m128d
1733__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1734_mm_sqrt_round_sd (__m128d __A, __m128d __B, const int __R)
1735{
1736  return (__m128d) __builtin_ia32_sqrtsd_round ((__v2df) __B,
1737						(__v2df) __A,
1738						__R);
1739}
1740
1741extern __inline __m128
1742__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1743_mm_sqrt_round_ss (__m128 __A, __m128 __B, const int __R)
1744{
1745  return (__m128) __builtin_ia32_sqrtss_round ((__v4sf) __B,
1746					       (__v4sf) __A,
1747					       __R);
1748}
1749#else
1750#define _mm512_sqrt_round_pd(A, C)            \
1751    (__m512d)__builtin_ia32_sqrtpd512_mask(A, (__v8df)_mm512_undefined_pd(), -1, C)
1752
1753#define _mm512_mask_sqrt_round_pd(W, U, A, C) \
1754    (__m512d)__builtin_ia32_sqrtpd512_mask(A, W, U, C)
1755
1756#define _mm512_maskz_sqrt_round_pd(U, A, C)   \
1757    (__m512d)__builtin_ia32_sqrtpd512_mask(A, (__v8df)_mm512_setzero_pd(), U, C)
1758
1759#define _mm512_sqrt_round_ps(A, C)            \
1760    (__m512)__builtin_ia32_sqrtps512_mask(A, (__v16sf)_mm512_undefined_ps(), -1, C)
1761
1762#define _mm512_mask_sqrt_round_ps(W, U, A, C) \
1763    (__m512)__builtin_ia32_sqrtps512_mask(A, W, U, C)
1764
1765#define _mm512_maskz_sqrt_round_ps(U, A, C)   \
1766    (__m512)__builtin_ia32_sqrtps512_mask(A, (__v16sf)_mm512_setzero_ps(), U, C)
1767
1768#define _mm_sqrt_round_sd(A, B, C)            \
1769    (__m128d)__builtin_ia32_sqrtsd_round(A, B, C)
1770
1771#define _mm_sqrt_round_ss(A, B, C)            \
1772    (__m128)__builtin_ia32_sqrtss_round(A, B, C)
1773#endif
1774
1775extern __inline __m512i
1776__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1777_mm512_cvtepi8_epi32 (__m128i __A)
1778{
1779  return (__m512i) __builtin_ia32_pmovsxbd512_mask ((__v16qi) __A,
1780						    (__v16si)
1781						    _mm512_undefined_si512 (),
1782						    (__mmask16) -1);
1783}
1784
1785extern __inline __m512i
1786__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1787_mm512_mask_cvtepi8_epi32 (__m512i __W, __mmask16 __U, __m128i __A)
1788{
1789  return (__m512i) __builtin_ia32_pmovsxbd512_mask ((__v16qi) __A,
1790						    (__v16si) __W,
1791						    (__mmask16) __U);
1792}
1793
1794extern __inline __m512i
1795__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1796_mm512_maskz_cvtepi8_epi32 (__mmask16 __U, __m128i __A)
1797{
1798  return (__m512i) __builtin_ia32_pmovsxbd512_mask ((__v16qi) __A,
1799						    (__v16si)
1800						    _mm512_setzero_si512 (),
1801						    (__mmask16) __U);
1802}
1803
1804extern __inline __m512i
1805__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1806_mm512_cvtepi8_epi64 (__m128i __A)
1807{
1808  return (__m512i) __builtin_ia32_pmovsxbq512_mask ((__v16qi) __A,
1809						    (__v8di)
1810						    _mm512_undefined_si512 (),
1811						    (__mmask8) -1);
1812}
1813
1814extern __inline __m512i
1815__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1816_mm512_mask_cvtepi8_epi64 (__m512i __W, __mmask8 __U, __m128i __A)
1817{
1818  return (__m512i) __builtin_ia32_pmovsxbq512_mask ((__v16qi) __A,
1819						    (__v8di) __W,
1820						    (__mmask8) __U);
1821}
1822
1823extern __inline __m512i
1824__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1825_mm512_maskz_cvtepi8_epi64 (__mmask8 __U, __m128i __A)
1826{
1827  return (__m512i) __builtin_ia32_pmovsxbq512_mask ((__v16qi) __A,
1828						    (__v8di)
1829						    _mm512_setzero_si512 (),
1830						    (__mmask8) __U);
1831}
1832
1833extern __inline __m512i
1834__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1835_mm512_cvtepi16_epi32 (__m256i __A)
1836{
1837  return (__m512i) __builtin_ia32_pmovsxwd512_mask ((__v16hi) __A,
1838						    (__v16si)
1839						    _mm512_undefined_si512 (),
1840						    (__mmask16) -1);
1841}
1842
1843extern __inline __m512i
1844__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1845_mm512_mask_cvtepi16_epi32 (__m512i __W, __mmask16 __U, __m256i __A)
1846{
1847  return (__m512i) __builtin_ia32_pmovsxwd512_mask ((__v16hi) __A,
1848						    (__v16si) __W,
1849						    (__mmask16) __U);
1850}
1851
1852extern __inline __m512i
1853__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1854_mm512_maskz_cvtepi16_epi32 (__mmask16 __U, __m256i __A)
1855{
1856  return (__m512i) __builtin_ia32_pmovsxwd512_mask ((__v16hi) __A,
1857						    (__v16si)
1858						    _mm512_setzero_si512 (),
1859						    (__mmask16) __U);
1860}
1861
1862extern __inline __m512i
1863__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1864_mm512_cvtepi16_epi64 (__m128i __A)
1865{
1866  return (__m512i) __builtin_ia32_pmovsxwq512_mask ((__v8hi) __A,
1867						    (__v8di)
1868						    _mm512_undefined_si512 (),
1869						    (__mmask8) -1);
1870}
1871
1872extern __inline __m512i
1873__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1874_mm512_mask_cvtepi16_epi64 (__m512i __W, __mmask8 __U, __m128i __A)
1875{
1876  return (__m512i) __builtin_ia32_pmovsxwq512_mask ((__v8hi) __A,
1877						    (__v8di) __W,
1878						    (__mmask8) __U);
1879}
1880
1881extern __inline __m512i
1882__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1883_mm512_maskz_cvtepi16_epi64 (__mmask8 __U, __m128i __A)
1884{
1885  return (__m512i) __builtin_ia32_pmovsxwq512_mask ((__v8hi) __A,
1886						    (__v8di)
1887						    _mm512_setzero_si512 (),
1888						    (__mmask8) __U);
1889}
1890
1891extern __inline __m512i
1892__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1893_mm512_cvtepi32_epi64 (__m256i __X)
1894{
1895  return (__m512i) __builtin_ia32_pmovsxdq512_mask ((__v8si) __X,
1896						    (__v8di)
1897						    _mm512_undefined_si512 (),
1898						    (__mmask8) -1);
1899}
1900
1901extern __inline __m512i
1902__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1903_mm512_mask_cvtepi32_epi64 (__m512i __W, __mmask8 __U, __m256i __X)
1904{
1905  return (__m512i) __builtin_ia32_pmovsxdq512_mask ((__v8si) __X,
1906						    (__v8di) __W,
1907						    (__mmask8) __U);
1908}
1909
1910extern __inline __m512i
1911__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1912_mm512_maskz_cvtepi32_epi64 (__mmask8 __U, __m256i __X)
1913{
1914  return (__m512i) __builtin_ia32_pmovsxdq512_mask ((__v8si) __X,
1915						    (__v8di)
1916						    _mm512_setzero_si512 (),
1917						    (__mmask8) __U);
1918}
1919
1920extern __inline __m512i
1921__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1922_mm512_cvtepu8_epi32 (__m128i __A)
1923{
1924  return (__m512i) __builtin_ia32_pmovzxbd512_mask ((__v16qi) __A,
1925						    (__v16si)
1926						    _mm512_undefined_si512 (),
1927						    (__mmask16) -1);
1928}
1929
1930extern __inline __m512i
1931__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1932_mm512_mask_cvtepu8_epi32 (__m512i __W, __mmask16 __U, __m128i __A)
1933{
1934  return (__m512i) __builtin_ia32_pmovzxbd512_mask ((__v16qi) __A,
1935						    (__v16si) __W,
1936						    (__mmask16) __U);
1937}
1938
1939extern __inline __m512i
1940__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1941_mm512_maskz_cvtepu8_epi32 (__mmask16 __U, __m128i __A)
1942{
1943  return (__m512i) __builtin_ia32_pmovzxbd512_mask ((__v16qi) __A,
1944						    (__v16si)
1945						    _mm512_setzero_si512 (),
1946						    (__mmask16) __U);
1947}
1948
1949extern __inline __m512i
1950__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1951_mm512_cvtepu8_epi64 (__m128i __A)
1952{
1953  return (__m512i) __builtin_ia32_pmovzxbq512_mask ((__v16qi) __A,
1954						    (__v8di)
1955						    _mm512_undefined_si512 (),
1956						    (__mmask8) -1);
1957}
1958
1959extern __inline __m512i
1960__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1961_mm512_mask_cvtepu8_epi64 (__m512i __W, __mmask8 __U, __m128i __A)
1962{
1963  return (__m512i) __builtin_ia32_pmovzxbq512_mask ((__v16qi) __A,
1964						    (__v8di) __W,
1965						    (__mmask8) __U);
1966}
1967
1968extern __inline __m512i
1969__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1970_mm512_maskz_cvtepu8_epi64 (__mmask8 __U, __m128i __A)
1971{
1972  return (__m512i) __builtin_ia32_pmovzxbq512_mask ((__v16qi) __A,
1973						    (__v8di)
1974						    _mm512_setzero_si512 (),
1975						    (__mmask8) __U);
1976}
1977
1978extern __inline __m512i
1979__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1980_mm512_cvtepu16_epi32 (__m256i __A)
1981{
1982  return (__m512i) __builtin_ia32_pmovzxwd512_mask ((__v16hi) __A,
1983						    (__v16si)
1984						    _mm512_undefined_si512 (),
1985						    (__mmask16) -1);
1986}
1987
1988extern __inline __m512i
1989__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1990_mm512_mask_cvtepu16_epi32 (__m512i __W, __mmask16 __U, __m256i __A)
1991{
1992  return (__m512i) __builtin_ia32_pmovzxwd512_mask ((__v16hi) __A,
1993						    (__v16si) __W,
1994						    (__mmask16) __U);
1995}
1996
1997extern __inline __m512i
1998__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1999_mm512_maskz_cvtepu16_epi32 (__mmask16 __U, __m256i __A)
2000{
2001  return (__m512i) __builtin_ia32_pmovzxwd512_mask ((__v16hi) __A,
2002						    (__v16si)
2003						    _mm512_setzero_si512 (),
2004						    (__mmask16) __U);
2005}
2006
2007extern __inline __m512i
2008__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2009_mm512_cvtepu16_epi64 (__m128i __A)
2010{
2011  return (__m512i) __builtin_ia32_pmovzxwq512_mask ((__v8hi) __A,
2012						    (__v8di)
2013						    _mm512_undefined_si512 (),
2014						    (__mmask8) -1);
2015}
2016
2017extern __inline __m512i
2018__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2019_mm512_mask_cvtepu16_epi64 (__m512i __W, __mmask8 __U, __m128i __A)
2020{
2021  return (__m512i) __builtin_ia32_pmovzxwq512_mask ((__v8hi) __A,
2022						    (__v8di) __W,
2023						    (__mmask8) __U);
2024}
2025
2026extern __inline __m512i
2027__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2028_mm512_maskz_cvtepu16_epi64 (__mmask8 __U, __m128i __A)
2029{
2030  return (__m512i) __builtin_ia32_pmovzxwq512_mask ((__v8hi) __A,
2031						    (__v8di)
2032						    _mm512_setzero_si512 (),
2033						    (__mmask8) __U);
2034}
2035
2036extern __inline __m512i
2037__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2038_mm512_cvtepu32_epi64 (__m256i __X)
2039{
2040  return (__m512i) __builtin_ia32_pmovzxdq512_mask ((__v8si) __X,
2041						    (__v8di)
2042						    _mm512_undefined_si512 (),
2043						    (__mmask8) -1);
2044}
2045
2046extern __inline __m512i
2047__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2048_mm512_mask_cvtepu32_epi64 (__m512i __W, __mmask8 __U, __m256i __X)
2049{
2050  return (__m512i) __builtin_ia32_pmovzxdq512_mask ((__v8si) __X,
2051						    (__v8di) __W,
2052						    (__mmask8) __U);
2053}
2054
2055extern __inline __m512i
2056__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2057_mm512_maskz_cvtepu32_epi64 (__mmask8 __U, __m256i __X)
2058{
2059  return (__m512i) __builtin_ia32_pmovzxdq512_mask ((__v8si) __X,
2060						    (__v8di)
2061						    _mm512_setzero_si512 (),
2062						    (__mmask8) __U);
2063}
2064
2065#ifdef __OPTIMIZE__
2066extern __inline __m512d
2067__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2068_mm512_add_round_pd (__m512d __A, __m512d __B, const int __R)
2069{
2070  return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A,
2071						 (__v8df) __B,
2072						 (__v8df)
2073						 _mm512_undefined_pd (),
2074						 (__mmask8) -1, __R);
2075}
2076
2077extern __inline __m512d
2078__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2079_mm512_mask_add_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
2080			  __m512d __B, const int __R)
2081{
2082  return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A,
2083						 (__v8df) __B,
2084						 (__v8df) __W,
2085						 (__mmask8) __U, __R);
2086}
2087
2088extern __inline __m512d
2089__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2090_mm512_maskz_add_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
2091			   const int __R)
2092{
2093  return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A,
2094						 (__v8df) __B,
2095						 (__v8df)
2096						 _mm512_setzero_pd (),
2097						 (__mmask8) __U, __R);
2098}
2099
2100extern __inline __m512
2101__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2102_mm512_add_round_ps (__m512 __A, __m512 __B, const int __R)
2103{
2104  return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A,
2105						(__v16sf) __B,
2106						(__v16sf)
2107						_mm512_undefined_ps (),
2108						(__mmask16) -1, __R);
2109}
2110
2111extern __inline __m512
2112__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2113_mm512_mask_add_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
2114			  __m512 __B, const int __R)
2115{
2116  return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A,
2117						(__v16sf) __B,
2118						(__v16sf) __W,
2119						(__mmask16) __U, __R);
2120}
2121
2122extern __inline __m512
2123__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2124_mm512_maskz_add_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R)
2125{
2126  return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A,
2127						(__v16sf) __B,
2128						(__v16sf)
2129						_mm512_setzero_ps (),
2130						(__mmask16) __U, __R);
2131}
2132
2133extern __inline __m512d
2134__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2135_mm512_sub_round_pd (__m512d __A, __m512d __B, const int __R)
2136{
2137  return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A,
2138						 (__v8df) __B,
2139						 (__v8df)
2140						 _mm512_undefined_pd (),
2141						 (__mmask8) -1, __R);
2142}
2143
2144extern __inline __m512d
2145__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2146_mm512_mask_sub_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
2147			  __m512d __B, const int __R)
2148{
2149  return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A,
2150						 (__v8df) __B,
2151						 (__v8df) __W,
2152						 (__mmask8) __U, __R);
2153}
2154
2155extern __inline __m512d
2156__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2157_mm512_maskz_sub_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
2158			   const int __R)
2159{
2160  return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A,
2161						 (__v8df) __B,
2162						 (__v8df)
2163						 _mm512_setzero_pd (),
2164						 (__mmask8) __U, __R);
2165}
2166
2167extern __inline __m512
2168__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2169_mm512_sub_round_ps (__m512 __A, __m512 __B, const int __R)
2170{
2171  return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A,
2172						(__v16sf) __B,
2173						(__v16sf)
2174						_mm512_undefined_ps (),
2175						(__mmask16) -1, __R);
2176}
2177
2178extern __inline __m512
2179__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2180_mm512_mask_sub_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
2181			  __m512 __B, const int __R)
2182{
2183  return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A,
2184						(__v16sf) __B,
2185						(__v16sf) __W,
2186						(__mmask16) __U, __R);
2187}
2188
2189extern __inline __m512
2190__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2191_mm512_maskz_sub_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R)
2192{
2193  return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A,
2194						(__v16sf) __B,
2195						(__v16sf)
2196						_mm512_setzero_ps (),
2197						(__mmask16) __U, __R);
2198}
2199#else
2200#define _mm512_add_round_pd(A, B, C)            \
2201    (__m512d)__builtin_ia32_addpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, C)
2202
2203#define _mm512_mask_add_round_pd(W, U, A, B, C) \
2204    (__m512d)__builtin_ia32_addpd512_mask(A, B, W, U, C)
2205
2206#define _mm512_maskz_add_round_pd(U, A, B, C)   \
2207    (__m512d)__builtin_ia32_addpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, C)
2208
2209#define _mm512_add_round_ps(A, B, C)            \
2210    (__m512)__builtin_ia32_addps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, C)
2211
2212#define _mm512_mask_add_round_ps(W, U, A, B, C) \
2213    (__m512)__builtin_ia32_addps512_mask(A, B, W, U, C)
2214
2215#define _mm512_maskz_add_round_ps(U, A, B, C)   \
2216    (__m512)__builtin_ia32_addps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, C)
2217
2218#define _mm512_sub_round_pd(A, B, C)            \
2219    (__m512d)__builtin_ia32_subpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, C)
2220
2221#define _mm512_mask_sub_round_pd(W, U, A, B, C) \
2222    (__m512d)__builtin_ia32_subpd512_mask(A, B, W, U, C)
2223
2224#define _mm512_maskz_sub_round_pd(U, A, B, C)   \
2225    (__m512d)__builtin_ia32_subpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, C)
2226
2227#define _mm512_sub_round_ps(A, B, C)            \
2228    (__m512)__builtin_ia32_subps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, C)
2229
2230#define _mm512_mask_sub_round_ps(W, U, A, B, C) \
2231    (__m512)__builtin_ia32_subps512_mask(A, B, W, U, C)
2232
2233#define _mm512_maskz_sub_round_ps(U, A, B, C)   \
2234    (__m512)__builtin_ia32_subps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, C)
2235#endif
2236
2237#ifdef __OPTIMIZE__
2238extern __inline __m512d
2239__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2240_mm512_mul_round_pd (__m512d __A, __m512d __B, const int __R)
2241{
2242  return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A,
2243						 (__v8df) __B,
2244						 (__v8df)
2245						 _mm512_undefined_pd (),
2246						 (__mmask8) -1, __R);
2247}
2248
2249extern __inline __m512d
2250__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2251_mm512_mask_mul_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
2252			  __m512d __B, const int __R)
2253{
2254  return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A,
2255						 (__v8df) __B,
2256						 (__v8df) __W,
2257						 (__mmask8) __U, __R);
2258}
2259
2260extern __inline __m512d
2261__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2262_mm512_maskz_mul_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
2263			   const int __R)
2264{
2265  return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A,
2266						 (__v8df) __B,
2267						 (__v8df)
2268						 _mm512_setzero_pd (),
2269						 (__mmask8) __U, __R);
2270}
2271
2272extern __inline __m512
2273__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2274_mm512_mul_round_ps (__m512 __A, __m512 __B, const int __R)
2275{
2276  return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A,
2277						(__v16sf) __B,
2278						(__v16sf)
2279						_mm512_undefined_ps (),
2280						(__mmask16) -1, __R);
2281}
2282
2283extern __inline __m512
2284__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2285_mm512_mask_mul_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
2286			  __m512 __B, const int __R)
2287{
2288  return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A,
2289						(__v16sf) __B,
2290						(__v16sf) __W,
2291						(__mmask16) __U, __R);
2292}
2293
2294extern __inline __m512
2295__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2296_mm512_maskz_mul_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R)
2297{
2298  return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A,
2299						(__v16sf) __B,
2300						(__v16sf)
2301						_mm512_setzero_ps (),
2302						(__mmask16) __U, __R);
2303}
2304
2305extern __inline __m512d
2306__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2307_mm512_div_round_pd (__m512d __M, __m512d __V, const int __R)
2308{
2309  return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __M,
2310						 (__v8df) __V,
2311						 (__v8df)
2312						 _mm512_undefined_pd (),
2313						 (__mmask8) -1, __R);
2314}
2315
2316extern __inline __m512d
2317__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2318_mm512_mask_div_round_pd (__m512d __W, __mmask8 __U, __m512d __M,
2319			  __m512d __V, const int __R)
2320{
2321  return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __M,
2322						 (__v8df) __V,
2323						 (__v8df) __W,
2324						 (__mmask8) __U, __R);
2325}
2326
2327extern __inline __m512d
2328__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2329_mm512_maskz_div_round_pd (__mmask8 __U, __m512d __M, __m512d __V,
2330			   const int __R)
2331{
2332  return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __M,
2333						 (__v8df) __V,
2334						 (__v8df)
2335						 _mm512_setzero_pd (),
2336						 (__mmask8) __U, __R);
2337}
2338
2339extern __inline __m512
2340__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2341_mm512_div_round_ps (__m512 __A, __m512 __B, const int __R)
2342{
2343  return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A,
2344						(__v16sf) __B,
2345						(__v16sf)
2346						_mm512_undefined_ps (),
2347						(__mmask16) -1, __R);
2348}
2349
2350extern __inline __m512
2351__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2352_mm512_mask_div_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
2353			  __m512 __B, const int __R)
2354{
2355  return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A,
2356						(__v16sf) __B,
2357						(__v16sf) __W,
2358						(__mmask16) __U, __R);
2359}
2360
2361extern __inline __m512
2362__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2363_mm512_maskz_div_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R)
2364{
2365  return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A,
2366						(__v16sf) __B,
2367						(__v16sf)
2368						_mm512_setzero_ps (),
2369						(__mmask16) __U, __R);
2370}
2371
2372extern __inline __m128d
2373__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2374_mm_mul_round_sd (__m128d __A, __m128d __B, const int __R)
2375{
2376  return (__m128d) __builtin_ia32_mulsd_round ((__v2df) __A,
2377					       (__v2df) __B,
2378					       __R);
2379}
2380
2381extern __inline __m128
2382__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2383_mm_mul_round_ss (__m128 __A, __m128 __B, const int __R)
2384{
2385  return (__m128) __builtin_ia32_mulss_round ((__v4sf) __A,
2386					      (__v4sf) __B,
2387					      __R);
2388}
2389
2390extern __inline __m128d
2391__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2392_mm_div_round_sd (__m128d __A, __m128d __B, const int __R)
2393{
2394  return (__m128d) __builtin_ia32_divsd_round ((__v2df) __A,
2395					       (__v2df) __B,
2396					       __R);
2397}
2398
2399extern __inline __m128
2400__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2401_mm_div_round_ss (__m128 __A, __m128 __B, const int __R)
2402{
2403  return (__m128) __builtin_ia32_divss_round ((__v4sf) __A,
2404					      (__v4sf) __B,
2405					      __R);
2406}
2407
2408#else
2409#define _mm512_mul_round_pd(A, B, C)            \
2410    (__m512d)__builtin_ia32_mulpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, C)
2411
2412#define _mm512_mask_mul_round_pd(W, U, A, B, C) \
2413    (__m512d)__builtin_ia32_mulpd512_mask(A, B, W, U, C)
2414
2415#define _mm512_maskz_mul_round_pd(U, A, B, C)   \
2416    (__m512d)__builtin_ia32_mulpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, C)
2417
2418#define _mm512_mul_round_ps(A, B, C)            \
2419    (__m512)__builtin_ia32_mulps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, C)
2420
2421#define _mm512_mask_mul_round_ps(W, U, A, B, C) \
2422    (__m512)__builtin_ia32_mulps512_mask(A, B, W, U, C)
2423
2424#define _mm512_maskz_mul_round_ps(U, A, B, C)   \
2425    (__m512)__builtin_ia32_mulps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, C)
2426
2427#define _mm512_div_round_pd(A, B, C)            \
2428    (__m512d)__builtin_ia32_divpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, C)
2429
2430#define _mm512_mask_div_round_pd(W, U, A, B, C) \
2431    (__m512d)__builtin_ia32_divpd512_mask(A, B, W, U, C)
2432
2433#define _mm512_maskz_div_round_pd(U, A, B, C)   \
2434    (__m512d)__builtin_ia32_divpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, C)
2435
2436#define _mm512_div_round_ps(A, B, C)            \
2437    (__m512)__builtin_ia32_divps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, C)
2438
2439#define _mm512_mask_div_round_ps(W, U, A, B, C) \
2440    (__m512)__builtin_ia32_divps512_mask(A, B, W, U, C)
2441
2442#define _mm512_maskz_div_round_ps(U, A, B, C)   \
2443    (__m512)__builtin_ia32_divps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, C)
2444
2445#define _mm_mul_round_sd(A, B, C)            \
2446    (__m128d)__builtin_ia32_mulsd_round(A, B, C)
2447
2448#define _mm_mul_round_ss(A, B, C)            \
2449    (__m128)__builtin_ia32_mulss_round(A, B, C)
2450
2451#define _mm_div_round_sd(A, B, C)            \
2452    (__m128d)__builtin_ia32_divsd_round(A, B, C)
2453
2454#define _mm_div_round_ss(A, B, C)            \
2455    (__m128)__builtin_ia32_divss_round(A, B, C)
2456#endif
2457
2458#ifdef __OPTIMIZE__
2459extern __inline __m512d
2460__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2461_mm512_max_round_pd (__m512d __A, __m512d __B, const int __R)
2462{
2463  return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
2464						 (__v8df) __B,
2465						 (__v8df)
2466						 _mm512_undefined_pd (),
2467						 (__mmask8) -1, __R);
2468}
2469
2470extern __inline __m512d
2471__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2472_mm512_mask_max_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
2473			  __m512d __B, const int __R)
2474{
2475  return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
2476						 (__v8df) __B,
2477						 (__v8df) __W,
2478						 (__mmask8) __U, __R);
2479}
2480
2481extern __inline __m512d
2482__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2483_mm512_maskz_max_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
2484			   const int __R)
2485{
2486  return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
2487						 (__v8df) __B,
2488						 (__v8df)
2489						 _mm512_setzero_pd (),
2490						 (__mmask8) __U, __R);
2491}
2492
2493extern __inline __m512
2494__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2495_mm512_max_round_ps (__m512 __A, __m512 __B, const int __R)
2496{
2497  return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
2498						(__v16sf) __B,
2499						(__v16sf)
2500						_mm512_undefined_ps (),
2501						(__mmask16) -1, __R);
2502}
2503
2504extern __inline __m512
2505__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2506_mm512_mask_max_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
2507			  __m512 __B, const int __R)
2508{
2509  return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
2510						(__v16sf) __B,
2511						(__v16sf) __W,
2512						(__mmask16) __U, __R);
2513}
2514
2515extern __inline __m512
2516__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2517_mm512_maskz_max_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R)
2518{
2519  return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
2520						(__v16sf) __B,
2521						(__v16sf)
2522						_mm512_setzero_ps (),
2523						(__mmask16) __U, __R);
2524}
2525
2526extern __inline __m512d
2527__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2528_mm512_min_round_pd (__m512d __A, __m512d __B, const int __R)
2529{
2530  return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
2531						 (__v8df) __B,
2532						 (__v8df)
2533						 _mm512_undefined_pd (),
2534						 (__mmask8) -1, __R);
2535}
2536
2537extern __inline __m512d
2538__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2539_mm512_mask_min_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
2540			  __m512d __B, const int __R)
2541{
2542  return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
2543						 (__v8df) __B,
2544						 (__v8df) __W,
2545						 (__mmask8) __U, __R);
2546}
2547
2548extern __inline __m512d
2549__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2550_mm512_maskz_min_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
2551			   const int __R)
2552{
2553  return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
2554						 (__v8df) __B,
2555						 (__v8df)
2556						 _mm512_setzero_pd (),
2557						 (__mmask8) __U, __R);
2558}
2559
2560extern __inline __m512
2561__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2562_mm512_min_round_ps (__m512 __A, __m512 __B, const int __R)
2563{
2564  return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
2565						(__v16sf) __B,
2566						(__v16sf)
2567						_mm512_undefined_ps (),
2568						(__mmask16) -1, __R);
2569}
2570
2571extern __inline __m512
2572__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2573_mm512_mask_min_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
2574			  __m512 __B, const int __R)
2575{
2576  return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
2577						(__v16sf) __B,
2578						(__v16sf) __W,
2579						(__mmask16) __U, __R);
2580}
2581
2582extern __inline __m512
2583__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2584_mm512_maskz_min_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R)
2585{
2586  return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
2587						(__v16sf) __B,
2588						(__v16sf)
2589						_mm512_setzero_ps (),
2590						(__mmask16) __U, __R);
2591}
2592#else
2593#define _mm512_max_round_pd(A, B,  R) \
2594    (__m512d)__builtin_ia32_maxpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, R)
2595
2596#define _mm512_mask_max_round_pd(W, U,  A, B, R) \
2597    (__m512d)__builtin_ia32_maxpd512_mask(A, B, W, U, R)
2598
2599#define _mm512_maskz_max_round_pd(U, A,  B, R) \
2600    (__m512d)__builtin_ia32_maxpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, R)
2601
2602#define _mm512_max_round_ps(A, B,  R) \
2603    (__m512)__builtin_ia32_maxps512_mask(A, B, (__v16sf)_mm512_undefined_pd(), -1, R)
2604
2605#define _mm512_mask_max_round_ps(W, U,  A, B, R) \
2606    (__m512)__builtin_ia32_maxps512_mask(A, B, W, U, R)
2607
2608#define _mm512_maskz_max_round_ps(U, A,  B, R) \
2609    (__m512)__builtin_ia32_maxps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, R)
2610
2611#define _mm512_min_round_pd(A, B,  R) \
2612    (__m512d)__builtin_ia32_minpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, R)
2613
2614#define _mm512_mask_min_round_pd(W, U,  A, B, R) \
2615    (__m512d)__builtin_ia32_minpd512_mask(A, B, W, U, R)
2616
2617#define _mm512_maskz_min_round_pd(U, A,  B, R) \
2618    (__m512d)__builtin_ia32_minpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, R)
2619
2620#define _mm512_min_round_ps(A, B, R) \
2621    (__m512)__builtin_ia32_minps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, R)
2622
2623#define _mm512_mask_min_round_ps(W, U,  A, B, R) \
2624    (__m512)__builtin_ia32_minps512_mask(A, B, W, U, R)
2625
2626#define _mm512_maskz_min_round_ps(U, A,  B, R) \
2627    (__m512)__builtin_ia32_minps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, R)
2628#endif
2629
2630#ifdef __OPTIMIZE__
2631extern __inline __m512d
2632__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2633_mm512_scalef_round_pd (__m512d __A, __m512d __B, const int __R)
2634{
2635  return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
2636						    (__v8df) __B,
2637						    (__v8df)
2638						    _mm512_undefined_pd (),
2639						    (__mmask8) -1, __R);
2640}
2641
2642extern __inline __m512d
2643__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2644_mm512_mask_scalef_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
2645			     __m512d __B, const int __R)
2646{
2647  return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
2648						    (__v8df) __B,
2649						    (__v8df) __W,
2650						    (__mmask8) __U, __R);
2651}
2652
2653extern __inline __m512d
2654__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2655_mm512_maskz_scalef_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
2656			      const int __R)
2657{
2658  return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
2659						    (__v8df) __B,
2660						    (__v8df)
2661						    _mm512_setzero_pd (),
2662						    (__mmask8) __U, __R);
2663}
2664
2665extern __inline __m512
2666__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2667_mm512_scalef_round_ps (__m512 __A, __m512 __B, const int __R)
2668{
2669  return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
2670						   (__v16sf) __B,
2671						   (__v16sf)
2672						   _mm512_undefined_ps (),
2673						   (__mmask16) -1, __R);
2674}
2675
2676extern __inline __m512
2677__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2678_mm512_mask_scalef_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
2679			     __m512 __B, const int __R)
2680{
2681  return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
2682						   (__v16sf) __B,
2683						   (__v16sf) __W,
2684						   (__mmask16) __U, __R);
2685}
2686
2687extern __inline __m512
2688__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2689_mm512_maskz_scalef_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
2690			      const int __R)
2691{
2692  return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
2693						   (__v16sf) __B,
2694						   (__v16sf)
2695						   _mm512_setzero_ps (),
2696						   (__mmask16) __U, __R);
2697}
2698
2699extern __inline __m128d
2700__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2701_mm_scalef_round_sd (__m128d __A, __m128d __B, const int __R)
2702{
2703  return (__m128d) __builtin_ia32_scalefsd_round ((__v2df) __A,
2704						  (__v2df) __B,
2705						  __R);
2706}
2707
2708extern __inline __m128
2709__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2710_mm_scalef_round_ss (__m128 __A, __m128 __B, const int __R)
2711{
2712  return (__m128) __builtin_ia32_scalefss_round ((__v4sf) __A,
2713						 (__v4sf) __B,
2714						 __R);
2715}
2716#else
2717#define _mm512_scalef_round_pd(A, B, C)            \
2718    (__m512d)__builtin_ia32_scalefpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, C)
2719
2720#define _mm512_mask_scalef_round_pd(W, U, A, B, C) \
2721    (__m512d)__builtin_ia32_scalefpd512_mask(A, B, W, U, C)
2722
2723#define _mm512_maskz_scalef_round_pd(U, A, B, C)   \
2724    (__m512d)__builtin_ia32_scalefpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, C)
2725
2726#define _mm512_scalef_round_ps(A, B, C)            \
2727    (__m512)__builtin_ia32_scalefps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, C)
2728
2729#define _mm512_mask_scalef_round_ps(W, U, A, B, C) \
2730    (__m512)__builtin_ia32_scalefps512_mask(A, B, W, U, C)
2731
2732#define _mm512_maskz_scalef_round_ps(U, A, B, C)   \
2733    (__m512)__builtin_ia32_scalefps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, C)
2734
2735#define _mm_scalef_round_sd(A, B, C)            \
2736    (__m128d)__builtin_ia32_scalefsd_round(A, B, C)
2737
2738#define _mm_scalef_round_ss(A, B, C)            \
2739    (__m128)__builtin_ia32_scalefss_round(A, B, C)
2740#endif
2741
2742#ifdef __OPTIMIZE__
2743extern __inline __m512d
2744__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2745_mm512_fmadd_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R)
2746{
2747  return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
2748						    (__v8df) __B,
2749						    (__v8df) __C,
2750						    (__mmask8) -1, __R);
2751}
2752
2753extern __inline __m512d
2754__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2755_mm512_mask_fmadd_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
2756			    __m512d __C, const int __R)
2757{
2758  return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
2759						    (__v8df) __B,
2760						    (__v8df) __C,
2761						    (__mmask8) __U, __R);
2762}
2763
2764extern __inline __m512d
2765__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2766_mm512_mask3_fmadd_round_pd (__m512d __A, __m512d __B, __m512d __C,
2767			     __mmask8 __U, const int __R)
2768{
2769  return (__m512d) __builtin_ia32_vfmaddpd512_mask3 ((__v8df) __A,
2770						     (__v8df) __B,
2771						     (__v8df) __C,
2772						     (__mmask8) __U, __R);
2773}
2774
2775extern __inline __m512d
2776__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2777_mm512_maskz_fmadd_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
2778			     __m512d __C, const int __R)
2779{
2780  return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A,
2781						     (__v8df) __B,
2782						     (__v8df) __C,
2783						     (__mmask8) __U, __R);
2784}
2785
2786extern __inline __m512
2787__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2788_mm512_fmadd_round_ps (__m512 __A, __m512 __B, __m512 __C, const int __R)
2789{
2790  return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
2791						   (__v16sf) __B,
2792						   (__v16sf) __C,
2793						   (__mmask16) -1, __R);
2794}
2795
2796extern __inline __m512
2797__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2798_mm512_mask_fmadd_round_ps (__m512 __A, __mmask16 __U, __m512 __B,
2799			    __m512 __C, const int __R)
2800{
2801  return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
2802						   (__v16sf) __B,
2803						   (__v16sf) __C,
2804						   (__mmask16) __U, __R);
2805}
2806
2807extern __inline __m512
2808__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2809_mm512_mask3_fmadd_round_ps (__m512 __A, __m512 __B, __m512 __C,
2810			     __mmask16 __U, const int __R)
2811{
2812  return (__m512) __builtin_ia32_vfmaddps512_mask3 ((__v16sf) __A,
2813						    (__v16sf) __B,
2814						    (__v16sf) __C,
2815						    (__mmask16) __U, __R);
2816}
2817
2818extern __inline __m512
2819__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2820_mm512_maskz_fmadd_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
2821			     __m512 __C, const int __R)
2822{
2823  return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A,
2824						    (__v16sf) __B,
2825						    (__v16sf) __C,
2826						    (__mmask16) __U, __R);
2827}
2828
2829extern __inline __m512d
2830__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2831_mm512_fmsub_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R)
2832{
2833  return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
2834						    (__v8df) __B,
2835						    -(__v8df) __C,
2836						    (__mmask8) -1, __R);
2837}
2838
2839extern __inline __m512d
2840__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2841_mm512_mask_fmsub_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
2842			    __m512d __C, const int __R)
2843{
2844  return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
2845						    (__v8df) __B,
2846						    -(__v8df) __C,
2847						    (__mmask8) __U, __R);
2848}
2849
2850extern __inline __m512d
2851__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2852_mm512_mask3_fmsub_round_pd (__m512d __A, __m512d __B, __m512d __C,
2853			     __mmask8 __U, const int __R)
2854{
2855  return (__m512d) __builtin_ia32_vfmsubpd512_mask3 ((__v8df) __A,
2856						     (__v8df) __B,
2857						     (__v8df) __C,
2858						     (__mmask8) __U, __R);
2859}
2860
2861extern __inline __m512d
2862__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2863_mm512_maskz_fmsub_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
2864			     __m512d __C, const int __R)
2865{
2866  return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A,
2867						     (__v8df) __B,
2868						     -(__v8df) __C,
2869						     (__mmask8) __U, __R);
2870}
2871
2872extern __inline __m512
2873__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2874_mm512_fmsub_round_ps (__m512 __A, __m512 __B, __m512 __C, const int __R)
2875{
2876  return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
2877						   (__v16sf) __B,
2878						   -(__v16sf) __C,
2879						   (__mmask16) -1, __R);
2880}
2881
2882extern __inline __m512
2883__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2884_mm512_mask_fmsub_round_ps (__m512 __A, __mmask16 __U, __m512 __B,
2885			    __m512 __C, const int __R)
2886{
2887  return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
2888						   (__v16sf) __B,
2889						   -(__v16sf) __C,
2890						   (__mmask16) __U, __R);
2891}
2892
2893extern __inline __m512
2894__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2895_mm512_mask3_fmsub_round_ps (__m512 __A, __m512 __B, __m512 __C,
2896			     __mmask16 __U, const int __R)
2897{
2898  return (__m512) __builtin_ia32_vfmsubps512_mask3 ((__v16sf) __A,
2899						    (__v16sf) __B,
2900						    (__v16sf) __C,
2901						    (__mmask16) __U, __R);
2902}
2903
2904extern __inline __m512
2905__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2906_mm512_maskz_fmsub_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
2907			     __m512 __C, const int __R)
2908{
2909  return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A,
2910						    (__v16sf) __B,
2911						    -(__v16sf) __C,
2912						    (__mmask16) __U, __R);
2913}
2914
2915extern __inline __m512d
2916__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2917_mm512_fmaddsub_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R)
2918{
2919  return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
2920						       (__v8df) __B,
2921						       (__v8df) __C,
2922						       (__mmask8) -1, __R);
2923}
2924
2925extern __inline __m512d
2926__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2927_mm512_mask_fmaddsub_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
2928			       __m512d __C, const int __R)
2929{
2930  return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
2931						       (__v8df) __B,
2932						       (__v8df) __C,
2933						       (__mmask8) __U, __R);
2934}
2935
2936extern __inline __m512d
2937__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2938_mm512_mask3_fmaddsub_round_pd (__m512d __A, __m512d __B, __m512d __C,
2939				__mmask8 __U, const int __R)
2940{
2941  return (__m512d) __builtin_ia32_vfmaddsubpd512_mask3 ((__v8df) __A,
2942							(__v8df) __B,
2943							(__v8df) __C,
2944							(__mmask8) __U, __R);
2945}
2946
2947extern __inline __m512d
2948__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2949_mm512_maskz_fmaddsub_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
2950				__m512d __C, const int __R)
2951{
2952  return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
2953							(__v8df) __B,
2954							(__v8df) __C,
2955							(__mmask8) __U, __R);
2956}
2957
2958extern __inline __m512
2959__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2960_mm512_fmaddsub_round_ps (__m512 __A, __m512 __B, __m512 __C, const int __R)
2961{
2962  return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
2963						      (__v16sf) __B,
2964						      (__v16sf) __C,
2965						      (__mmask16) -1, __R);
2966}
2967
2968extern __inline __m512
2969__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2970_mm512_mask_fmaddsub_round_ps (__m512 __A, __mmask16 __U, __m512 __B,
2971			       __m512 __C, const int __R)
2972{
2973  return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
2974						      (__v16sf) __B,
2975						      (__v16sf) __C,
2976						      (__mmask16) __U, __R);
2977}
2978
2979extern __inline __m512
2980__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2981_mm512_mask3_fmaddsub_round_ps (__m512 __A, __m512 __B, __m512 __C,
2982				__mmask16 __U, const int __R)
2983{
2984  return (__m512) __builtin_ia32_vfmaddsubps512_mask3 ((__v16sf) __A,
2985						       (__v16sf) __B,
2986						       (__v16sf) __C,
2987						       (__mmask16) __U, __R);
2988}
2989
2990extern __inline __m512
2991__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2992_mm512_maskz_fmaddsub_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
2993				__m512 __C, const int __R)
2994{
2995  return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
2996						       (__v16sf) __B,
2997						       (__v16sf) __C,
2998						       (__mmask16) __U, __R);
2999}
3000
3001extern __inline __m512d
3002__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3003_mm512_fmsubadd_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R)
3004{
3005  return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
3006						       (__v8df) __B,
3007						       -(__v8df) __C,
3008						       (__mmask8) -1, __R);
3009}
3010
3011extern __inline __m512d
3012__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3013_mm512_mask_fmsubadd_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
3014			       __m512d __C, const int __R)
3015{
3016  return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
3017						       (__v8df) __B,
3018						       -(__v8df) __C,
3019						       (__mmask8) __U, __R);
3020}
3021
3022extern __inline __m512d
3023__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3024_mm512_mask3_fmsubadd_round_pd (__m512d __A, __m512d __B, __m512d __C,
3025				__mmask8 __U, const int __R)
3026{
3027  return (__m512d) __builtin_ia32_vfmsubaddpd512_mask3 ((__v8df) __A,
3028							(__v8df) __B,
3029							(__v8df) __C,
3030							(__mmask8) __U, __R);
3031}
3032
3033extern __inline __m512d
3034__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3035_mm512_maskz_fmsubadd_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
3036				__m512d __C, const int __R)
3037{
3038  return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
3039							(__v8df) __B,
3040							-(__v8df) __C,
3041							(__mmask8) __U, __R);
3042}
3043
3044extern __inline __m512
3045__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3046_mm512_fmsubadd_round_ps (__m512 __A, __m512 __B, __m512 __C, const int __R)
3047{
3048  return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
3049						      (__v16sf) __B,
3050						      -(__v16sf) __C,
3051						      (__mmask16) -1, __R);
3052}
3053
3054extern __inline __m512
3055__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3056_mm512_mask_fmsubadd_round_ps (__m512 __A, __mmask16 __U, __m512 __B,
3057			       __m512 __C, const int __R)
3058{
3059  return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
3060						      (__v16sf) __B,
3061						      -(__v16sf) __C,
3062						      (__mmask16) __U, __R);
3063}
3064
3065extern __inline __m512
3066__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3067_mm512_mask3_fmsubadd_round_ps (__m512 __A, __m512 __B, __m512 __C,
3068				__mmask16 __U, const int __R)
3069{
3070  return (__m512) __builtin_ia32_vfmsubaddps512_mask3 ((__v16sf) __A,
3071						       (__v16sf) __B,
3072						       (__v16sf) __C,
3073						       (__mmask16) __U, __R);
3074}
3075
3076extern __inline __m512
3077__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3078_mm512_maskz_fmsubadd_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
3079				__m512 __C, const int __R)
3080{
3081  return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
3082						       (__v16sf) __B,
3083						       -(__v16sf) __C,
3084						       (__mmask16) __U, __R);
3085}
3086
3087extern __inline __m512d
3088__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3089_mm512_fnmadd_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R)
3090{
3091  return (__m512d) __builtin_ia32_vfmaddpd512_mask (-(__v8df) __A,
3092						    (__v8df) __B,
3093						    (__v8df) __C,
3094						    (__mmask8) -1, __R);
3095}
3096
3097extern __inline __m512d
3098__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3099_mm512_mask_fnmadd_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
3100			     __m512d __C, const int __R)
3101{
3102  return (__m512d) __builtin_ia32_vfnmaddpd512_mask ((__v8df) __A,
3103						     (__v8df) __B,
3104						     (__v8df) __C,
3105						     (__mmask8) __U, __R);
3106}
3107
3108extern __inline __m512d
3109__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3110_mm512_mask3_fnmadd_round_pd (__m512d __A, __m512d __B, __m512d __C,
3111			      __mmask8 __U, const int __R)
3112{
3113  return (__m512d) __builtin_ia32_vfmaddpd512_mask3 (-(__v8df) __A,
3114						     (__v8df) __B,
3115						     (__v8df) __C,
3116						     (__mmask8) __U, __R);
3117}
3118
3119extern __inline __m512d
3120__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3121_mm512_maskz_fnmadd_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
3122			      __m512d __C, const int __R)
3123{
3124  return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A,
3125						     (__v8df) __B,
3126						     (__v8df) __C,
3127						     (__mmask8) __U, __R);
3128}
3129
3130extern __inline __m512
3131__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3132_mm512_fnmadd_round_ps (__m512 __A, __m512 __B, __m512 __C, const int __R)
3133{
3134  return (__m512) __builtin_ia32_vfmaddps512_mask (-(__v16sf) __A,
3135						   (__v16sf) __B,
3136						   (__v16sf) __C,
3137						   (__mmask16) -1, __R);
3138}
3139
3140extern __inline __m512
3141__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3142_mm512_mask_fnmadd_round_ps (__m512 __A, __mmask16 __U, __m512 __B,
3143			     __m512 __C, const int __R)
3144{
3145  return (__m512) __builtin_ia32_vfnmaddps512_mask ((__v16sf) __A,
3146						    (__v16sf) __B,
3147						    (__v16sf) __C,
3148						    (__mmask16) __U, __R);
3149}
3150
3151extern __inline __m512
3152__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3153_mm512_mask3_fnmadd_round_ps (__m512 __A, __m512 __B, __m512 __C,
3154			      __mmask16 __U, const int __R)
3155{
3156  return (__m512) __builtin_ia32_vfmaddps512_mask3 (-(__v16sf) __A,
3157						    (__v16sf) __B,
3158						    (__v16sf) __C,
3159						    (__mmask16) __U, __R);
3160}
3161
3162extern __inline __m512
3163__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3164_mm512_maskz_fnmadd_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
3165			      __m512 __C, const int __R)
3166{
3167  return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A,
3168						    (__v16sf) __B,
3169						    (__v16sf) __C,
3170						    (__mmask16) __U, __R);
3171}
3172
3173extern __inline __m512d
3174__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3175_mm512_fnmsub_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R)
3176{
3177  return (__m512d) __builtin_ia32_vfmaddpd512_mask (-(__v8df) __A,
3178						    (__v8df) __B,
3179						    -(__v8df) __C,
3180						    (__mmask8) -1, __R);
3181}
3182
3183extern __inline __m512d
3184__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3185_mm512_mask_fnmsub_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
3186			     __m512d __C, const int __R)
3187{
3188  return (__m512d) __builtin_ia32_vfnmsubpd512_mask ((__v8df) __A,
3189						     (__v8df) __B,
3190						     (__v8df) __C,
3191						     (__mmask8) __U, __R);
3192}
3193
3194extern __inline __m512d
3195__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3196_mm512_mask3_fnmsub_round_pd (__m512d __A, __m512d __B, __m512d __C,
3197			      __mmask8 __U, const int __R)
3198{
3199  return (__m512d) __builtin_ia32_vfnmsubpd512_mask3 ((__v8df) __A,
3200						      (__v8df) __B,
3201						      (__v8df) __C,
3202						      (__mmask8) __U, __R);
3203}
3204
3205extern __inline __m512d
3206__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3207_mm512_maskz_fnmsub_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
3208			      __m512d __C, const int __R)
3209{
3210  return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A,
3211						     (__v8df) __B,
3212						     -(__v8df) __C,
3213						     (__mmask8) __U, __R);
3214}
3215
3216extern __inline __m512
3217__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3218_mm512_fnmsub_round_ps (__m512 __A, __m512 __B, __m512 __C, const int __R)
3219{
3220  return (__m512) __builtin_ia32_vfmaddps512_mask (-(__v16sf) __A,
3221						   (__v16sf) __B,
3222						   -(__v16sf) __C,
3223						   (__mmask16) -1, __R);
3224}
3225
3226extern __inline __m512
3227__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3228_mm512_mask_fnmsub_round_ps (__m512 __A, __mmask16 __U, __m512 __B,
3229			     __m512 __C, const int __R)
3230{
3231  return (__m512) __builtin_ia32_vfnmsubps512_mask ((__v16sf) __A,
3232						    (__v16sf) __B,
3233						    (__v16sf) __C,
3234						    (__mmask16) __U, __R);
3235}
3236
3237extern __inline __m512
3238__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3239_mm512_mask3_fnmsub_round_ps (__m512 __A, __m512 __B, __m512 __C,
3240			      __mmask16 __U, const int __R)
3241{
3242  return (__m512) __builtin_ia32_vfnmsubps512_mask3 ((__v16sf) __A,
3243						     (__v16sf) __B,
3244						     (__v16sf) __C,
3245						     (__mmask16) __U, __R);
3246}
3247
3248extern __inline __m512
3249__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3250_mm512_maskz_fnmsub_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
3251			      __m512 __C, const int __R)
3252{
3253  return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A,
3254						    (__v16sf) __B,
3255						    -(__v16sf) __C,
3256						    (__mmask16) __U, __R);
3257}
3258#else
3259#define _mm512_fmadd_round_pd(A, B, C, R)            \
3260    (__m512d)__builtin_ia32_vfmaddpd512_mask(A, B, C, -1, R)
3261
3262#define _mm512_mask_fmadd_round_pd(A, U, B, C, R)    \
3263    (__m512d)__builtin_ia32_vfmaddpd512_mask(A, B, C, U, R)
3264
3265#define _mm512_mask3_fmadd_round_pd(A, B, C, U, R)   \
3266    (__m512d)__builtin_ia32_vfmaddpd512_mask3(A, B, C, U, R)
3267
3268#define _mm512_maskz_fmadd_round_pd(U, A, B, C, R)   \
3269    (__m512d)__builtin_ia32_vfmaddpd512_maskz(A, B, C, U, R)
3270
3271#define _mm512_fmadd_round_ps(A, B, C, R)            \
3272    (__m512)__builtin_ia32_vfmaddps512_mask(A, B, C, -1, R)
3273
3274#define _mm512_mask_fmadd_round_ps(A, U, B, C, R)    \
3275    (__m512)__builtin_ia32_vfmaddps512_mask(A, B, C, U, R)
3276
3277#define _mm512_mask3_fmadd_round_ps(A, B, C, U, R)   \
3278    (__m512)__builtin_ia32_vfmaddps512_mask3(A, B, C, U, R)
3279
3280#define _mm512_maskz_fmadd_round_ps(U, A, B, C, R)   \
3281    (__m512)__builtin_ia32_vfmaddps512_maskz(A, B, C, U, R)
3282
3283#define _mm512_fmsub_round_pd(A, B, C, R)            \
3284    (__m512d)__builtin_ia32_vfmaddpd512_mask(A, B, -(C), -1, R)
3285
3286#define _mm512_mask_fmsub_round_pd(A, U, B, C, R)    \
3287    (__m512d)__builtin_ia32_vfmaddpd512_mask(A, B, -(C), U, R)
3288
3289#define _mm512_mask3_fmsub_round_pd(A, B, C, U, R)   \
3290    (__m512d)__builtin_ia32_vfmsubpd512_mask3(A, B, C, U, R)
3291
3292#define _mm512_maskz_fmsub_round_pd(U, A, B, C, R)   \
3293    (__m512d)__builtin_ia32_vfmaddpd512_maskz(A, B, -(C), U, R)
3294
3295#define _mm512_fmsub_round_ps(A, B, C, R)            \
3296    (__m512)__builtin_ia32_vfmaddps512_mask(A, B, -(C), -1, R)
3297
3298#define _mm512_mask_fmsub_round_ps(A, U, B, C, R)    \
3299    (__m512)__builtin_ia32_vfmaddps512_mask(A, B, -(C), U, R)
3300
3301#define _mm512_mask3_fmsub_round_ps(A, B, C, U, R)   \
3302    (__m512)__builtin_ia32_vfmsubps512_mask3(A, B, C, U, R)
3303
3304#define _mm512_maskz_fmsub_round_ps(U, A, B, C, R)   \
3305    (__m512)__builtin_ia32_vfmaddps512_maskz(A, B, -(C), U, R)
3306
3307#define _mm512_fmaddsub_round_pd(A, B, C, R)            \
3308    (__m512d)__builtin_ia32_vfmaddsubpd512_mask(A, B, C, -1, R)
3309
3310#define _mm512_mask_fmaddsub_round_pd(A, U, B, C, R)    \
3311    (__m512d)__builtin_ia32_vfmaddpd512_mask(A, B, C, U, R)
3312
3313#define _mm512_mask3_fmaddsub_round_pd(A, B, C, U, R)   \
3314    (__m512d)__builtin_ia32_vfmaddsubpd512_mask3(A, B, C, U, R)
3315
3316#define _mm512_maskz_fmaddsub_round_pd(U, A, B, C, R)   \
3317    (__m512d)__builtin_ia32_vfmaddsubpd512_maskz(A, B, C, U, R)
3318
3319#define _mm512_fmaddsub_round_ps(A, B, C, R)            \
3320    (__m512)__builtin_ia32_vfmaddsubps512_mask(A, B, C, -1, R)
3321
3322#define _mm512_mask_fmaddsub_round_ps(A, U, B, C, R)    \
3323    (__m512)__builtin_ia32_vfmaddsubps512_mask(A, B, C, U, R)
3324
3325#define _mm512_mask3_fmaddsub_round_ps(A, B, C, U, R)   \
3326    (__m512)__builtin_ia32_vfmaddsubps512_mask3(A, B, C, U, R)
3327
3328#define _mm512_maskz_fmaddsub_round_ps(U, A, B, C, R)   \
3329    (__m512)__builtin_ia32_vfmaddsubps512_maskz(A, B, C, U, R)
3330
3331#define _mm512_fmsubadd_round_pd(A, B, C, R)            \
3332    (__m512d)__builtin_ia32_vfmaddsubpd512_mask(A, B, -(C), -1, R)
3333
3334#define _mm512_mask_fmsubadd_round_pd(A, U, B, C, R)    \
3335    (__m512d)__builtin_ia32_vfmaddsubpd512_mask(A, B, -(C), U, R)
3336
3337#define _mm512_mask3_fmsubadd_round_pd(A, B, C, U, R)   \
3338    (__m512d)__builtin_ia32_vfmsubaddpd512_mask3(A, B, C, U, R)
3339
3340#define _mm512_maskz_fmsubadd_round_pd(U, A, B, C, R)   \
3341    (__m512d)__builtin_ia32_vfmaddsubpd512_maskz(A, B, -(C), U, R)
3342
3343#define _mm512_fmsubadd_round_ps(A, B, C, R)            \
3344    (__m512)__builtin_ia32_vfmaddsubps512_mask(A, B, -(C), -1, R)
3345
3346#define _mm512_mask_fmsubadd_round_ps(A, U, B, C, R)    \
3347    (__m512)__builtin_ia32_vfmaddsubps512_mask(A, B, -(C), U, R)
3348
3349#define _mm512_mask3_fmsubadd_round_ps(A, B, C, U, R)   \
3350    (__m512)__builtin_ia32_vfmsubaddps512_mask3(A, B, C, U, R)
3351
3352#define _mm512_maskz_fmsubadd_round_ps(U, A, B, C, R)   \
3353    (__m512)__builtin_ia32_vfmaddsubps512_maskz(A, B, -(C), U, R)
3354
3355#define _mm512_fnmadd_round_pd(A, B, C, R)            \
3356    (__m512d)__builtin_ia32_vfmaddpd512_mask(-(A), B, C, -1, R)
3357
3358#define _mm512_mask_fnmadd_round_pd(A, U, B, C, R)    \
3359    (__m512d)__builtin_ia32_vfnmaddpd512_mask(-(A), B, C, U, R)
3360
3361#define _mm512_mask3_fnmadd_round_pd(A, B, C, U, R)   \
3362    (__m512d)__builtin_ia32_vfmaddpd512_mask3(-(A), B, C, U, R)
3363
3364#define _mm512_maskz_fnmadd_round_pd(U, A, B, C, R)   \
3365    (__m512d)__builtin_ia32_vfmaddpd512_maskz(-(A), B, C, U, R)
3366
3367#define _mm512_fnmadd_round_ps(A, B, C, R)            \
3368    (__m512)__builtin_ia32_vfmaddps512_mask(-(A), B, C, -1, R)
3369
3370#define _mm512_mask_fnmadd_round_ps(A, U, B, C, R)    \
3371    (__m512)__builtin_ia32_vfnmaddps512_mask(-(A), B, C, U, R)
3372
3373#define _mm512_mask3_fnmadd_round_ps(A, B, C, U, R)   \
3374    (__m512)__builtin_ia32_vfmaddps512_mask3(-(A), B, C, U, R)
3375
3376#define _mm512_maskz_fnmadd_round_ps(U, A, B, C, R)   \
3377    (__m512)__builtin_ia32_vfmaddps512_maskz(-(A), B, C, U, R)
3378
3379#define _mm512_fnmsub_round_pd(A, B, C, R)            \
3380    (__m512d)__builtin_ia32_vfmaddpd512_mask(-(A), B, -(C), -1, R)
3381
3382#define _mm512_mask_fnmsub_round_pd(A, U, B, C, R)    \
3383    (__m512d)__builtin_ia32_vfnmsubpd512_mask(A, B, C, U, R)
3384
3385#define _mm512_mask3_fnmsub_round_pd(A, B, C, U, R)   \
3386    (__m512d)__builtin_ia32_vfnmsubpd512_mask3(A, B, C, U, R)
3387
3388#define _mm512_maskz_fnmsub_round_pd(U, A, B, C, R)   \
3389    (__m512d)__builtin_ia32_vfmaddpd512_maskz(-(A), B, -(C), U, R)
3390
3391#define _mm512_fnmsub_round_ps(A, B, C, R)            \
3392    (__m512)__builtin_ia32_vfmaddps512_mask(-(A), B, -(C), -1, R)
3393
3394#define _mm512_mask_fnmsub_round_ps(A, U, B, C, R)    \
3395    (__m512)__builtin_ia32_vfnmsubps512_mask(A, B, C, U, R)
3396
3397#define _mm512_mask3_fnmsub_round_ps(A, B, C, U, R)   \
3398    (__m512)__builtin_ia32_vfnmsubps512_mask3(A, B, C, U, R)
3399
3400#define _mm512_maskz_fnmsub_round_ps(U, A, B, C, R)   \
3401    (__m512)__builtin_ia32_vfmaddps512_maskz(-(A), B, -(C), U, R)
3402#endif
3403
3404extern __inline __m512i
3405__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3406_mm512_abs_epi64 (__m512i __A)
3407{
3408  return (__m512i) __builtin_ia32_pabsq512_mask ((__v8di) __A,
3409						 (__v8di)
3410						 _mm512_undefined_si512 (),
3411						 (__mmask8) -1);
3412}
3413
3414extern __inline __m512i
3415__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3416_mm512_mask_abs_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
3417{
3418  return (__m512i) __builtin_ia32_pabsq512_mask ((__v8di) __A,
3419						 (__v8di) __W,
3420						 (__mmask8) __U);
3421}
3422
3423extern __inline __m512i
3424__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3425_mm512_maskz_abs_epi64 (__mmask8 __U, __m512i __A)
3426{
3427  return (__m512i) __builtin_ia32_pabsq512_mask ((__v8di) __A,
3428						 (__v8di)
3429						 _mm512_setzero_si512 (),
3430						 (__mmask8) __U);
3431}
3432
3433extern __inline __m512i
3434__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3435_mm512_abs_epi32 (__m512i __A)
3436{
3437  return (__m512i) __builtin_ia32_pabsd512_mask ((__v16si) __A,
3438						 (__v16si)
3439						 _mm512_undefined_si512 (),
3440						 (__mmask16) -1);
3441}
3442
3443extern __inline __m512i
3444__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3445_mm512_mask_abs_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
3446{
3447  return (__m512i) __builtin_ia32_pabsd512_mask ((__v16si) __A,
3448						 (__v16si) __W,
3449						 (__mmask16) __U);
3450}
3451
3452extern __inline __m512i
3453__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3454_mm512_maskz_abs_epi32 (__mmask16 __U, __m512i __A)
3455{
3456  return (__m512i) __builtin_ia32_pabsd512_mask ((__v16si) __A,
3457						 (__v16si)
3458						 _mm512_setzero_si512 (),
3459						 (__mmask16) __U);
3460}
3461
3462extern __inline __m512
3463__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3464_mm512_broadcastss_ps (__m128 __A)
3465{
3466  return (__m512) __builtin_ia32_broadcastss512 ((__v4sf) __A,
3467						 (__v16sf)
3468						 _mm512_undefined_ps (),
3469						 (__mmask16) -1);
3470}
3471
3472extern __inline __m512
3473__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3474_mm512_mask_broadcastss_ps (__m512 __O, __mmask16 __M, __m128 __A)
3475{
3476  return (__m512) __builtin_ia32_broadcastss512 ((__v4sf) __A,
3477						 (__v16sf) __O, __M);
3478}
3479
3480extern __inline __m512
3481__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3482_mm512_maskz_broadcastss_ps (__mmask16 __M, __m128 __A)
3483{
3484  return (__m512) __builtin_ia32_broadcastss512 ((__v4sf) __A,
3485						 (__v16sf)
3486						 _mm512_setzero_ps (),
3487						 __M);
3488}
3489
3490extern __inline __m512d
3491__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3492_mm512_broadcastsd_pd (__m128d __A)
3493{
3494  return (__m512d) __builtin_ia32_broadcastsd512 ((__v2df) __A,
3495						  (__v8df)
3496						  _mm512_undefined_pd (),
3497						  (__mmask8) -1);
3498}
3499
3500extern __inline __m512d
3501__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3502_mm512_mask_broadcastsd_pd (__m512d __O, __mmask8 __M, __m128d __A)
3503{
3504  return (__m512d) __builtin_ia32_broadcastsd512 ((__v2df) __A,
3505						  (__v8df) __O, __M);
3506}
3507
3508extern __inline __m512d
3509__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3510_mm512_maskz_broadcastsd_pd (__mmask8 __M, __m128d __A)
3511{
3512  return (__m512d) __builtin_ia32_broadcastsd512 ((__v2df) __A,
3513						  (__v8df)
3514						  _mm512_setzero_pd (),
3515						  __M);
3516}
3517
3518extern __inline __m512i
3519__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3520_mm512_broadcastd_epi32 (__m128i __A)
3521{
3522  return (__m512i) __builtin_ia32_pbroadcastd512 ((__v4si) __A,
3523						  (__v16si)
3524						  _mm512_undefined_si512 (),
3525						  (__mmask16) -1);
3526}
3527
3528extern __inline __m512i
3529__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3530_mm512_mask_broadcastd_epi32 (__m512i __O, __mmask16 __M, __m128i __A)
3531{
3532  return (__m512i) __builtin_ia32_pbroadcastd512 ((__v4si) __A,
3533						  (__v16si) __O, __M);
3534}
3535
3536extern __inline __m512i
3537__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3538_mm512_maskz_broadcastd_epi32 (__mmask16 __M, __m128i __A)
3539{
3540  return (__m512i) __builtin_ia32_pbroadcastd512 ((__v4si) __A,
3541						  (__v16si)
3542						  _mm512_setzero_si512 (),
3543						  __M);
3544}
3545
3546extern __inline __m512i
3547__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3548_mm512_set1_epi32 (int __A)
3549{
3550  return (__m512i) __builtin_ia32_pbroadcastd512_gpr_mask (__A,
3551							   (__v16si)
3552							   _mm512_undefined_si512 (),
3553							   (__mmask16)(-1));
3554}
3555
3556extern __inline __m512i
3557__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3558_mm512_mask_set1_epi32 (__m512i __O, __mmask16 __M, int __A)
3559{
3560  return (__m512i) __builtin_ia32_pbroadcastd512_gpr_mask (__A, (__v16si) __O,
3561							   __M);
3562}
3563
3564extern __inline __m512i
3565__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3566_mm512_maskz_set1_epi32 (__mmask16 __M, int __A)
3567{
3568  return (__m512i)
3569	 __builtin_ia32_pbroadcastd512_gpr_mask (__A,
3570						 (__v16si) _mm512_setzero_si512 (),
3571						 __M);
3572}
3573
3574extern __inline __m512i
3575__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3576_mm512_broadcastq_epi64 (__m128i __A)
3577{
3578  return (__m512i) __builtin_ia32_pbroadcastq512 ((__v2di) __A,
3579						  (__v8di)
3580						  _mm512_undefined_si512 (),
3581						  (__mmask8) -1);
3582}
3583
3584extern __inline __m512i
3585__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3586_mm512_mask_broadcastq_epi64 (__m512i __O, __mmask8 __M, __m128i __A)
3587{
3588  return (__m512i) __builtin_ia32_pbroadcastq512 ((__v2di) __A,
3589						  (__v8di) __O, __M);
3590}
3591
3592extern __inline __m512i
3593__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3594_mm512_maskz_broadcastq_epi64 (__mmask8 __M, __m128i __A)
3595{
3596  return (__m512i) __builtin_ia32_pbroadcastq512 ((__v2di) __A,
3597						  (__v8di)
3598						  _mm512_setzero_si512 (),
3599						  __M);
3600}
3601
3602extern __inline __m512i
3603__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3604_mm512_set1_epi64 (long long __A)
3605{
3606  return (__m512i) __builtin_ia32_pbroadcastq512_gpr_mask (__A,
3607							   (__v8di)
3608							   _mm512_undefined_si512 (),
3609							   (__mmask8)(-1));
3610}
3611
3612extern __inline __m512i
3613__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3614_mm512_mask_set1_epi64 (__m512i __O, __mmask8 __M, long long __A)
3615{
3616  return (__m512i) __builtin_ia32_pbroadcastq512_gpr_mask (__A, (__v8di) __O,
3617							   __M);
3618}
3619
3620extern __inline __m512i
3621__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3622_mm512_maskz_set1_epi64 (__mmask8 __M, long long __A)
3623{
3624  return (__m512i)
3625	 __builtin_ia32_pbroadcastq512_gpr_mask (__A,
3626						 (__v8di) _mm512_setzero_si512 (),
3627						 __M);
3628}
3629
3630extern __inline __m512
3631__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3632_mm512_broadcast_f32x4 (__m128 __A)
3633{
3634  return (__m512) __builtin_ia32_broadcastf32x4_512 ((__v4sf) __A,
3635						     (__v16sf)
3636						     _mm512_undefined_ps (),
3637						     (__mmask16) -1);
3638}
3639
3640extern __inline __m512
3641__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3642_mm512_mask_broadcast_f32x4 (__m512 __O, __mmask16 __M, __m128 __A)
3643{
3644  return (__m512) __builtin_ia32_broadcastf32x4_512 ((__v4sf) __A,
3645						     (__v16sf) __O,
3646						     __M);
3647}
3648
3649extern __inline __m512
3650__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3651_mm512_maskz_broadcast_f32x4 (__mmask16 __M, __m128 __A)
3652{
3653  return (__m512) __builtin_ia32_broadcastf32x4_512 ((__v4sf) __A,
3654						     (__v16sf)
3655						     _mm512_setzero_ps (),
3656						     __M);
3657}
3658
3659extern __inline __m512i
3660__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3661_mm512_broadcast_i32x4 (__m128i __A)
3662{
3663  return (__m512i) __builtin_ia32_broadcasti32x4_512 ((__v4si) __A,
3664						      (__v16si)
3665						      _mm512_undefined_si512 (),
3666						      (__mmask16) -1);
3667}
3668
3669extern __inline __m512i
3670__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3671_mm512_mask_broadcast_i32x4 (__m512i __O, __mmask16 __M, __m128i __A)
3672{
3673  return (__m512i) __builtin_ia32_broadcasti32x4_512 ((__v4si) __A,
3674						      (__v16si) __O,
3675						      __M);
3676}
3677
3678extern __inline __m512i
3679__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3680_mm512_maskz_broadcast_i32x4 (__mmask16 __M, __m128i __A)
3681{
3682  return (__m512i) __builtin_ia32_broadcasti32x4_512 ((__v4si) __A,
3683						      (__v16si)
3684						      _mm512_setzero_si512 (),
3685						      __M);
3686}
3687
3688extern __inline __m512d
3689__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3690_mm512_broadcast_f64x4 (__m256d __A)
3691{
3692  return (__m512d) __builtin_ia32_broadcastf64x4_512 ((__v4df) __A,
3693						      (__v8df)
3694						      _mm512_undefined_pd (),
3695						      (__mmask8) -1);
3696}
3697
3698extern __inline __m512d
3699__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3700_mm512_mask_broadcast_f64x4 (__m512d __O, __mmask8 __M, __m256d __A)
3701{
3702  return (__m512d) __builtin_ia32_broadcastf64x4_512 ((__v4df) __A,
3703						      (__v8df) __O,
3704						      __M);
3705}
3706
3707extern __inline __m512d
3708__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3709_mm512_maskz_broadcast_f64x4 (__mmask8 __M, __m256d __A)
3710{
3711  return (__m512d) __builtin_ia32_broadcastf64x4_512 ((__v4df) __A,
3712						      (__v8df)
3713						      _mm512_setzero_pd (),
3714						      __M);
3715}
3716
3717extern __inline __m512i
3718__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3719_mm512_broadcast_i64x4 (__m256i __A)
3720{
3721  return (__m512i) __builtin_ia32_broadcasti64x4_512 ((__v4di) __A,
3722						      (__v8di)
3723						      _mm512_undefined_si512 (),
3724						      (__mmask8) -1);
3725}
3726
3727extern __inline __m512i
3728__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3729_mm512_mask_broadcast_i64x4 (__m512i __O, __mmask8 __M, __m256i __A)
3730{
3731  return (__m512i) __builtin_ia32_broadcasti64x4_512 ((__v4di) __A,
3732						      (__v8di) __O,
3733						      __M);
3734}
3735
3736extern __inline __m512i
3737__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3738_mm512_maskz_broadcast_i64x4 (__mmask8 __M, __m256i __A)
3739{
3740  return (__m512i) __builtin_ia32_broadcasti64x4_512 ((__v4di) __A,
3741						      (__v8di)
3742						      _mm512_setzero_si512 (),
3743						      __M);
3744}
3745
3746typedef enum
3747{
3748  _MM_PERM_AAAA = 0x00, _MM_PERM_AAAB = 0x01, _MM_PERM_AAAC = 0x02,
3749  _MM_PERM_AAAD = 0x03, _MM_PERM_AABA = 0x04, _MM_PERM_AABB = 0x05,
3750  _MM_PERM_AABC = 0x06, _MM_PERM_AABD = 0x07, _MM_PERM_AACA = 0x08,
3751  _MM_PERM_AACB = 0x09, _MM_PERM_AACC = 0x0A, _MM_PERM_AACD = 0x0B,
3752  _MM_PERM_AADA = 0x0C, _MM_PERM_AADB = 0x0D, _MM_PERM_AADC = 0x0E,
3753  _MM_PERM_AADD = 0x0F, _MM_PERM_ABAA = 0x10, _MM_PERM_ABAB = 0x11,
3754  _MM_PERM_ABAC = 0x12, _MM_PERM_ABAD = 0x13, _MM_PERM_ABBA = 0x14,
3755  _MM_PERM_ABBB = 0x15, _MM_PERM_ABBC = 0x16, _MM_PERM_ABBD = 0x17,
3756  _MM_PERM_ABCA = 0x18, _MM_PERM_ABCB = 0x19, _MM_PERM_ABCC = 0x1A,
3757  _MM_PERM_ABCD = 0x1B, _MM_PERM_ABDA = 0x1C, _MM_PERM_ABDB = 0x1D,
3758  _MM_PERM_ABDC = 0x1E, _MM_PERM_ABDD = 0x1F, _MM_PERM_ACAA = 0x20,
3759  _MM_PERM_ACAB = 0x21, _MM_PERM_ACAC = 0x22, _MM_PERM_ACAD = 0x23,
3760  _MM_PERM_ACBA = 0x24, _MM_PERM_ACBB = 0x25, _MM_PERM_ACBC = 0x26,
3761  _MM_PERM_ACBD = 0x27, _MM_PERM_ACCA = 0x28, _MM_PERM_ACCB = 0x29,
3762  _MM_PERM_ACCC = 0x2A, _MM_PERM_ACCD = 0x2B, _MM_PERM_ACDA = 0x2C,
3763  _MM_PERM_ACDB = 0x2D, _MM_PERM_ACDC = 0x2E, _MM_PERM_ACDD = 0x2F,
3764  _MM_PERM_ADAA = 0x30, _MM_PERM_ADAB = 0x31, _MM_PERM_ADAC = 0x32,
3765  _MM_PERM_ADAD = 0x33, _MM_PERM_ADBA = 0x34, _MM_PERM_ADBB = 0x35,
3766  _MM_PERM_ADBC = 0x36, _MM_PERM_ADBD = 0x37, _MM_PERM_ADCA = 0x38,
3767  _MM_PERM_ADCB = 0x39, _MM_PERM_ADCC = 0x3A, _MM_PERM_ADCD = 0x3B,
3768  _MM_PERM_ADDA = 0x3C, _MM_PERM_ADDB = 0x3D, _MM_PERM_ADDC = 0x3E,
3769  _MM_PERM_ADDD = 0x3F, _MM_PERM_BAAA = 0x40, _MM_PERM_BAAB = 0x41,
3770  _MM_PERM_BAAC = 0x42, _MM_PERM_BAAD = 0x43, _MM_PERM_BABA = 0x44,
3771  _MM_PERM_BABB = 0x45, _MM_PERM_BABC = 0x46, _MM_PERM_BABD = 0x47,
3772  _MM_PERM_BACA = 0x48, _MM_PERM_BACB = 0x49, _MM_PERM_BACC = 0x4A,
3773  _MM_PERM_BACD = 0x4B, _MM_PERM_BADA = 0x4C, _MM_PERM_BADB = 0x4D,
3774  _MM_PERM_BADC = 0x4E, _MM_PERM_BADD = 0x4F, _MM_PERM_BBAA = 0x50,
3775  _MM_PERM_BBAB = 0x51, _MM_PERM_BBAC = 0x52, _MM_PERM_BBAD = 0x53,
3776  _MM_PERM_BBBA = 0x54, _MM_PERM_BBBB = 0x55, _MM_PERM_BBBC = 0x56,
3777  _MM_PERM_BBBD = 0x57, _MM_PERM_BBCA = 0x58, _MM_PERM_BBCB = 0x59,
3778  _MM_PERM_BBCC = 0x5A, _MM_PERM_BBCD = 0x5B, _MM_PERM_BBDA = 0x5C,
3779  _MM_PERM_BBDB = 0x5D, _MM_PERM_BBDC = 0x5E, _MM_PERM_BBDD = 0x5F,
3780  _MM_PERM_BCAA = 0x60, _MM_PERM_BCAB = 0x61, _MM_PERM_BCAC = 0x62,
3781  _MM_PERM_BCAD = 0x63, _MM_PERM_BCBA = 0x64, _MM_PERM_BCBB = 0x65,
3782  _MM_PERM_BCBC = 0x66, _MM_PERM_BCBD = 0x67, _MM_PERM_BCCA = 0x68,
3783  _MM_PERM_BCCB = 0x69, _MM_PERM_BCCC = 0x6A, _MM_PERM_BCCD = 0x6B,
3784  _MM_PERM_BCDA = 0x6C, _MM_PERM_BCDB = 0x6D, _MM_PERM_BCDC = 0x6E,
3785  _MM_PERM_BCDD = 0x6F, _MM_PERM_BDAA = 0x70, _MM_PERM_BDAB = 0x71,
3786  _MM_PERM_BDAC = 0x72, _MM_PERM_BDAD = 0x73, _MM_PERM_BDBA = 0x74,
3787  _MM_PERM_BDBB = 0x75, _MM_PERM_BDBC = 0x76, _MM_PERM_BDBD = 0x77,
3788  _MM_PERM_BDCA = 0x78, _MM_PERM_BDCB = 0x79, _MM_PERM_BDCC = 0x7A,
3789  _MM_PERM_BDCD = 0x7B, _MM_PERM_BDDA = 0x7C, _MM_PERM_BDDB = 0x7D,
3790  _MM_PERM_BDDC = 0x7E, _MM_PERM_BDDD = 0x7F, _MM_PERM_CAAA = 0x80,
3791  _MM_PERM_CAAB = 0x81, _MM_PERM_CAAC = 0x82, _MM_PERM_CAAD = 0x83,
3792  _MM_PERM_CABA = 0x84, _MM_PERM_CABB = 0x85, _MM_PERM_CABC = 0x86,
3793  _MM_PERM_CABD = 0x87, _MM_PERM_CACA = 0x88, _MM_PERM_CACB = 0x89,
3794  _MM_PERM_CACC = 0x8A, _MM_PERM_CACD = 0x8B, _MM_PERM_CADA = 0x8C,
3795  _MM_PERM_CADB = 0x8D, _MM_PERM_CADC = 0x8E, _MM_PERM_CADD = 0x8F,
3796  _MM_PERM_CBAA = 0x90, _MM_PERM_CBAB = 0x91, _MM_PERM_CBAC = 0x92,
3797  _MM_PERM_CBAD = 0x93, _MM_PERM_CBBA = 0x94, _MM_PERM_CBBB = 0x95,
3798  _MM_PERM_CBBC = 0x96, _MM_PERM_CBBD = 0x97, _MM_PERM_CBCA = 0x98,
3799  _MM_PERM_CBCB = 0x99, _MM_PERM_CBCC = 0x9A, _MM_PERM_CBCD = 0x9B,
3800  _MM_PERM_CBDA = 0x9C, _MM_PERM_CBDB = 0x9D, _MM_PERM_CBDC = 0x9E,
3801  _MM_PERM_CBDD = 0x9F, _MM_PERM_CCAA = 0xA0, _MM_PERM_CCAB = 0xA1,
3802  _MM_PERM_CCAC = 0xA2, _MM_PERM_CCAD = 0xA3, _MM_PERM_CCBA = 0xA4,
3803  _MM_PERM_CCBB = 0xA5, _MM_PERM_CCBC = 0xA6, _MM_PERM_CCBD = 0xA7,
3804  _MM_PERM_CCCA = 0xA8, _MM_PERM_CCCB = 0xA9, _MM_PERM_CCCC = 0xAA,
3805  _MM_PERM_CCCD = 0xAB, _MM_PERM_CCDA = 0xAC, _MM_PERM_CCDB = 0xAD,
3806  _MM_PERM_CCDC = 0xAE, _MM_PERM_CCDD = 0xAF, _MM_PERM_CDAA = 0xB0,
3807  _MM_PERM_CDAB = 0xB1, _MM_PERM_CDAC = 0xB2, _MM_PERM_CDAD = 0xB3,
3808  _MM_PERM_CDBA = 0xB4, _MM_PERM_CDBB = 0xB5, _MM_PERM_CDBC = 0xB6,
3809  _MM_PERM_CDBD = 0xB7, _MM_PERM_CDCA = 0xB8, _MM_PERM_CDCB = 0xB9,
3810  _MM_PERM_CDCC = 0xBA, _MM_PERM_CDCD = 0xBB, _MM_PERM_CDDA = 0xBC,
3811  _MM_PERM_CDDB = 0xBD, _MM_PERM_CDDC = 0xBE, _MM_PERM_CDDD = 0xBF,
3812  _MM_PERM_DAAA = 0xC0, _MM_PERM_DAAB = 0xC1, _MM_PERM_DAAC = 0xC2,
3813  _MM_PERM_DAAD = 0xC3, _MM_PERM_DABA = 0xC4, _MM_PERM_DABB = 0xC5,
3814  _MM_PERM_DABC = 0xC6, _MM_PERM_DABD = 0xC7, _MM_PERM_DACA = 0xC8,
3815  _MM_PERM_DACB = 0xC9, _MM_PERM_DACC = 0xCA, _MM_PERM_DACD = 0xCB,
3816  _MM_PERM_DADA = 0xCC, _MM_PERM_DADB = 0xCD, _MM_PERM_DADC = 0xCE,
3817  _MM_PERM_DADD = 0xCF, _MM_PERM_DBAA = 0xD0, _MM_PERM_DBAB = 0xD1,
3818  _MM_PERM_DBAC = 0xD2, _MM_PERM_DBAD = 0xD3, _MM_PERM_DBBA = 0xD4,
3819  _MM_PERM_DBBB = 0xD5, _MM_PERM_DBBC = 0xD6, _MM_PERM_DBBD = 0xD7,
3820  _MM_PERM_DBCA = 0xD8, _MM_PERM_DBCB = 0xD9, _MM_PERM_DBCC = 0xDA,
3821  _MM_PERM_DBCD = 0xDB, _MM_PERM_DBDA = 0xDC, _MM_PERM_DBDB = 0xDD,
3822  _MM_PERM_DBDC = 0xDE, _MM_PERM_DBDD = 0xDF, _MM_PERM_DCAA = 0xE0,
3823  _MM_PERM_DCAB = 0xE1, _MM_PERM_DCAC = 0xE2, _MM_PERM_DCAD = 0xE3,
3824  _MM_PERM_DCBA = 0xE4, _MM_PERM_DCBB = 0xE5, _MM_PERM_DCBC = 0xE6,
3825  _MM_PERM_DCBD = 0xE7, _MM_PERM_DCCA = 0xE8, _MM_PERM_DCCB = 0xE9,
3826  _MM_PERM_DCCC = 0xEA, _MM_PERM_DCCD = 0xEB, _MM_PERM_DCDA = 0xEC,
3827  _MM_PERM_DCDB = 0xED, _MM_PERM_DCDC = 0xEE, _MM_PERM_DCDD = 0xEF,
3828  _MM_PERM_DDAA = 0xF0, _MM_PERM_DDAB = 0xF1, _MM_PERM_DDAC = 0xF2,
3829  _MM_PERM_DDAD = 0xF3, _MM_PERM_DDBA = 0xF4, _MM_PERM_DDBB = 0xF5,
3830  _MM_PERM_DDBC = 0xF6, _MM_PERM_DDBD = 0xF7, _MM_PERM_DDCA = 0xF8,
3831  _MM_PERM_DDCB = 0xF9, _MM_PERM_DDCC = 0xFA, _MM_PERM_DDCD = 0xFB,
3832  _MM_PERM_DDDA = 0xFC, _MM_PERM_DDDB = 0xFD, _MM_PERM_DDDC = 0xFE,
3833  _MM_PERM_DDDD = 0xFF
3834} _MM_PERM_ENUM;
3835
3836#ifdef __OPTIMIZE__
3837extern __inline __m512i
3838__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3839_mm512_shuffle_epi32 (__m512i __A, _MM_PERM_ENUM __mask)
3840{
3841  return (__m512i) __builtin_ia32_pshufd512_mask ((__v16si) __A,
3842						  __mask,
3843						  (__v16si)
3844						  _mm512_undefined_si512 (),
3845						  (__mmask16) -1);
3846}
3847
3848extern __inline __m512i
3849__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3850_mm512_mask_shuffle_epi32 (__m512i __W, __mmask16 __U, __m512i __A,
3851			   _MM_PERM_ENUM __mask)
3852{
3853  return (__m512i) __builtin_ia32_pshufd512_mask ((__v16si) __A,
3854						  __mask,
3855						  (__v16si) __W,
3856						  (__mmask16) __U);
3857}
3858
3859extern __inline __m512i
3860__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3861_mm512_maskz_shuffle_epi32 (__mmask16 __U, __m512i __A, _MM_PERM_ENUM __mask)
3862{
3863  return (__m512i) __builtin_ia32_pshufd512_mask ((__v16si) __A,
3864						  __mask,
3865						  (__v16si)
3866						  _mm512_setzero_si512 (),
3867						  (__mmask16) __U);
3868}
3869
3870extern __inline __m512i
3871__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3872_mm512_shuffle_i64x2 (__m512i __A, __m512i __B, const int __imm)
3873{
3874  return (__m512i) __builtin_ia32_shuf_i64x2_mask ((__v8di) __A,
3875						   (__v8di) __B, __imm,
3876						   (__v8di)
3877						   _mm512_undefined_si512 (),
3878						   (__mmask8) -1);
3879}
3880
3881extern __inline __m512i
3882__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3883_mm512_mask_shuffle_i64x2 (__m512i __W, __mmask8 __U, __m512i __A,
3884			   __m512i __B, const int __imm)
3885{
3886  return (__m512i) __builtin_ia32_shuf_i64x2_mask ((__v8di) __A,
3887						   (__v8di) __B, __imm,
3888						   (__v8di) __W,
3889						   (__mmask8) __U);
3890}
3891
3892extern __inline __m512i
3893__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3894_mm512_maskz_shuffle_i64x2 (__mmask8 __U, __m512i __A, __m512i __B,
3895			    const int __imm)
3896{
3897  return (__m512i) __builtin_ia32_shuf_i64x2_mask ((__v8di) __A,
3898						   (__v8di) __B, __imm,
3899						   (__v8di)
3900						   _mm512_setzero_si512 (),
3901						   (__mmask8) __U);
3902}
3903
3904extern __inline __m512i
3905__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3906_mm512_shuffle_i32x4 (__m512i __A, __m512i __B, const int __imm)
3907{
3908  return (__m512i) __builtin_ia32_shuf_i32x4_mask ((__v16si) __A,
3909						   (__v16si) __B,
3910						   __imm,
3911						   (__v16si)
3912						   _mm512_undefined_si512 (),
3913						   (__mmask16) -1);
3914}
3915
3916extern __inline __m512i
3917__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3918_mm512_mask_shuffle_i32x4 (__m512i __W, __mmask16 __U, __m512i __A,
3919			   __m512i __B, const int __imm)
3920{
3921  return (__m512i) __builtin_ia32_shuf_i32x4_mask ((__v16si) __A,
3922						   (__v16si) __B,
3923						   __imm,
3924						   (__v16si) __W,
3925						   (__mmask16) __U);
3926}
3927
3928extern __inline __m512i
3929__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3930_mm512_maskz_shuffle_i32x4 (__mmask16 __U, __m512i __A, __m512i __B,
3931			    const int __imm)
3932{
3933  return (__m512i) __builtin_ia32_shuf_i32x4_mask ((__v16si) __A,
3934						   (__v16si) __B,
3935						   __imm,
3936						   (__v16si)
3937						   _mm512_setzero_si512 (),
3938						   (__mmask16) __U);
3939}
3940
3941extern __inline __m512d
3942__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3943_mm512_shuffle_f64x2 (__m512d __A, __m512d __B, const int __imm)
3944{
3945  return (__m512d) __builtin_ia32_shuf_f64x2_mask ((__v8df) __A,
3946						   (__v8df) __B, __imm,
3947						   (__v8df)
3948						   _mm512_undefined_pd (),
3949						   (__mmask8) -1);
3950}
3951
3952extern __inline __m512d
3953__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3954_mm512_mask_shuffle_f64x2 (__m512d __W, __mmask8 __U, __m512d __A,
3955			   __m512d __B, const int __imm)
3956{
3957  return (__m512d) __builtin_ia32_shuf_f64x2_mask ((__v8df) __A,
3958						   (__v8df) __B, __imm,
3959						   (__v8df) __W,
3960						   (__mmask8) __U);
3961}
3962
3963extern __inline __m512d
3964__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3965_mm512_maskz_shuffle_f64x2 (__mmask8 __U, __m512d __A, __m512d __B,
3966			    const int __imm)
3967{
3968  return (__m512d) __builtin_ia32_shuf_f64x2_mask ((__v8df) __A,
3969						   (__v8df) __B, __imm,
3970						   (__v8df)
3971						   _mm512_setzero_pd (),
3972						   (__mmask8) __U);
3973}
3974
3975extern __inline __m512
3976__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3977_mm512_shuffle_f32x4 (__m512 __A, __m512 __B, const int __imm)
3978{
3979  return (__m512) __builtin_ia32_shuf_f32x4_mask ((__v16sf) __A,
3980						  (__v16sf) __B, __imm,
3981						  (__v16sf)
3982						  _mm512_undefined_ps (),
3983						  (__mmask16) -1);
3984}
3985
3986extern __inline __m512
3987__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3988_mm512_mask_shuffle_f32x4 (__m512 __W, __mmask16 __U, __m512 __A,
3989			   __m512 __B, const int __imm)
3990{
3991  return (__m512) __builtin_ia32_shuf_f32x4_mask ((__v16sf) __A,
3992						  (__v16sf) __B, __imm,
3993						  (__v16sf) __W,
3994						  (__mmask16) __U);
3995}
3996
3997extern __inline __m512
3998__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3999_mm512_maskz_shuffle_f32x4 (__mmask16 __U, __m512 __A, __m512 __B,
4000			    const int __imm)
4001{
4002  return (__m512) __builtin_ia32_shuf_f32x4_mask ((__v16sf) __A,
4003						  (__v16sf) __B, __imm,
4004						  (__v16sf)
4005						  _mm512_setzero_ps (),
4006						  (__mmask16) __U);
4007}
4008
4009#else
4010#define _mm512_shuffle_epi32(X, C)                                      \
4011  ((__m512i)  __builtin_ia32_pshufd512_mask ((__v16si)(__m512i)(X), (int)(C),\
4012    (__v16si)(__m512i)_mm512_undefined_si512 (),\
4013    (__mmask16)-1))
4014
4015#define _mm512_mask_shuffle_epi32(W, U, X, C)                           \
4016  ((__m512i)  __builtin_ia32_pshufd512_mask ((__v16si)(__m512i)(X), (int)(C),\
4017    (__v16si)(__m512i)(W),\
4018    (__mmask16)(U)))
4019
4020#define _mm512_maskz_shuffle_epi32(U, X, C)                             \
4021  ((__m512i)  __builtin_ia32_pshufd512_mask ((__v16si)(__m512i)(X), (int)(C),\
4022    (__v16si)(__m512i)_mm512_setzero_si512 (),\
4023    (__mmask16)(U)))
4024
4025#define _mm512_shuffle_i64x2(X, Y, C)                                   \
4026  ((__m512i)  __builtin_ia32_shuf_i64x2_mask ((__v8di)(__m512i)(X),     \
4027      (__v8di)(__m512i)(Y), (int)(C),\
4028    (__v8di)(__m512i)_mm512_undefined_si512 (),\
4029    (__mmask8)-1))
4030
4031#define _mm512_mask_shuffle_i64x2(W, U, X, Y, C)                        \
4032  ((__m512i)  __builtin_ia32_shuf_i64x2_mask ((__v8di)(__m512i)(X),     \
4033      (__v8di)(__m512i)(Y), (int)(C),\
4034    (__v8di)(__m512i)(W),\
4035    (__mmask8)(U)))
4036
4037#define _mm512_maskz_shuffle_i64x2(U, X, Y, C)                          \
4038  ((__m512i)  __builtin_ia32_shuf_i64x2_mask ((__v8di)(__m512i)(X),     \
4039      (__v8di)(__m512i)(Y), (int)(C),\
4040    (__v8di)(__m512i)_mm512_setzero_si512 (),\
4041    (__mmask8)(U)))
4042
4043#define _mm512_shuffle_i32x4(X, Y, C)                                   \
4044  ((__m512i)  __builtin_ia32_shuf_i32x4_mask ((__v16si)(__m512i)(X),    \
4045      (__v16si)(__m512i)(Y), (int)(C),\
4046    (__v16si)(__m512i)_mm512_undefined_si512 (),\
4047    (__mmask16)-1))
4048
4049#define _mm512_mask_shuffle_i32x4(W, U, X, Y, C)                        \
4050  ((__m512i)  __builtin_ia32_shuf_i32x4_mask ((__v16si)(__m512i)(X),    \
4051      (__v16si)(__m512i)(Y), (int)(C),\
4052    (__v16si)(__m512i)(W),\
4053    (__mmask16)(U)))
4054
4055#define _mm512_maskz_shuffle_i32x4(U, X, Y, C)                          \
4056  ((__m512i)  __builtin_ia32_shuf_i32x4_mask ((__v16si)(__m512i)(X),    \
4057      (__v16si)(__m512i)(Y), (int)(C),\
4058    (__v16si)(__m512i)_mm512_setzero_si512 (),\
4059    (__mmask16)(U)))
4060
4061#define _mm512_shuffle_f64x2(X, Y, C)                                   \
4062  ((__m512d)  __builtin_ia32_shuf_f64x2_mask ((__v8df)(__m512d)(X),     \
4063      (__v8df)(__m512d)(Y), (int)(C),\
4064    (__v8df)(__m512d)_mm512_undefined_pd(),\
4065    (__mmask8)-1))
4066
4067#define _mm512_mask_shuffle_f64x2(W, U, X, Y, C)                        \
4068  ((__m512d)  __builtin_ia32_shuf_f64x2_mask ((__v8df)(__m512d)(X),     \
4069      (__v8df)(__m512d)(Y), (int)(C),\
4070    (__v8df)(__m512d)(W),\
4071    (__mmask8)(U)))
4072
4073#define _mm512_maskz_shuffle_f64x2(U, X, Y, C)                         \
4074  ((__m512d)  __builtin_ia32_shuf_f64x2_mask ((__v8df)(__m512d)(X),    \
4075      (__v8df)(__m512d)(Y), (int)(C),\
4076    (__v8df)(__m512d)_mm512_setzero_pd(),\
4077    (__mmask8)(U)))
4078
4079#define _mm512_shuffle_f32x4(X, Y, C)                                  \
4080  ((__m512)  __builtin_ia32_shuf_f32x4_mask ((__v16sf)(__m512)(X),     \
4081      (__v16sf)(__m512)(Y), (int)(C),\
4082    (__v16sf)(__m512)_mm512_undefined_ps(),\
4083    (__mmask16)-1))
4084
4085#define _mm512_mask_shuffle_f32x4(W, U, X, Y, C)                       \
4086  ((__m512)  __builtin_ia32_shuf_f32x4_mask ((__v16sf)(__m512)(X),     \
4087      (__v16sf)(__m512)(Y), (int)(C),\
4088    (__v16sf)(__m512)(W),\
4089    (__mmask16)(U)))
4090
4091#define _mm512_maskz_shuffle_f32x4(U, X, Y, C)                         \
4092  ((__m512)  __builtin_ia32_shuf_f32x4_mask ((__v16sf)(__m512)(X),     \
4093      (__v16sf)(__m512)(Y), (int)(C),\
4094    (__v16sf)(__m512)_mm512_setzero_ps(),\
4095    (__mmask16)(U)))
4096#endif
4097
4098extern __inline __m512i
4099__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4100_mm512_rolv_epi32 (__m512i __A, __m512i __B)
4101{
4102  return (__m512i) __builtin_ia32_prolvd512_mask ((__v16si) __A,
4103						  (__v16si) __B,
4104						  (__v16si)
4105						  _mm512_undefined_si512 (),
4106						  (__mmask16) -1);
4107}
4108
4109extern __inline __m512i
4110__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4111_mm512_mask_rolv_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
4112{
4113  return (__m512i) __builtin_ia32_prolvd512_mask ((__v16si) __A,
4114						  (__v16si) __B,
4115						  (__v16si) __W,
4116						  (__mmask16) __U);
4117}
4118
4119extern __inline __m512i
4120__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4121_mm512_maskz_rolv_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
4122{
4123  return (__m512i) __builtin_ia32_prolvd512_mask ((__v16si) __A,
4124						  (__v16si) __B,
4125						  (__v16si)
4126						  _mm512_setzero_si512 (),
4127						  (__mmask16) __U);
4128}
4129
4130extern __inline __m512i
4131__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4132_mm512_rorv_epi32 (__m512i __A, __m512i __B)
4133{
4134  return (__m512i) __builtin_ia32_prorvd512_mask ((__v16si) __A,
4135						  (__v16si) __B,
4136						  (__v16si)
4137						  _mm512_undefined_si512 (),
4138						  (__mmask16) -1);
4139}
4140
4141extern __inline __m512i
4142__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4143_mm512_mask_rorv_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
4144{
4145  return (__m512i) __builtin_ia32_prorvd512_mask ((__v16si) __A,
4146						  (__v16si) __B,
4147						  (__v16si) __W,
4148						  (__mmask16) __U);
4149}
4150
4151extern __inline __m512i
4152__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4153_mm512_maskz_rorv_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
4154{
4155  return (__m512i) __builtin_ia32_prorvd512_mask ((__v16si) __A,
4156						  (__v16si) __B,
4157						  (__v16si)
4158						  _mm512_setzero_si512 (),
4159						  (__mmask16) __U);
4160}
4161
4162extern __inline __m512i
4163__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4164_mm512_rolv_epi64 (__m512i __A, __m512i __B)
4165{
4166  return (__m512i) __builtin_ia32_prolvq512_mask ((__v8di) __A,
4167						  (__v8di) __B,
4168						  (__v8di)
4169						  _mm512_undefined_si512 (),
4170						  (__mmask8) -1);
4171}
4172
4173extern __inline __m512i
4174__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4175_mm512_mask_rolv_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
4176{
4177  return (__m512i) __builtin_ia32_prolvq512_mask ((__v8di) __A,
4178						  (__v8di) __B,
4179						  (__v8di) __W,
4180						  (__mmask8) __U);
4181}
4182
4183extern __inline __m512i
4184__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4185_mm512_maskz_rolv_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
4186{
4187  return (__m512i) __builtin_ia32_prolvq512_mask ((__v8di) __A,
4188						  (__v8di) __B,
4189						  (__v8di)
4190						  _mm512_setzero_si512 (),
4191						  (__mmask8) __U);
4192}
4193
4194extern __inline __m512i
4195__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4196_mm512_rorv_epi64 (__m512i __A, __m512i __B)
4197{
4198  return (__m512i) __builtin_ia32_prorvq512_mask ((__v8di) __A,
4199						  (__v8di) __B,
4200						  (__v8di)
4201						  _mm512_undefined_si512 (),
4202						  (__mmask8) -1);
4203}
4204
4205extern __inline __m512i
4206__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4207_mm512_mask_rorv_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
4208{
4209  return (__m512i) __builtin_ia32_prorvq512_mask ((__v8di) __A,
4210						  (__v8di) __B,
4211						  (__v8di) __W,
4212						  (__mmask8) __U);
4213}
4214
4215extern __inline __m512i
4216__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4217_mm512_maskz_rorv_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
4218{
4219  return (__m512i) __builtin_ia32_prorvq512_mask ((__v8di) __A,
4220						  (__v8di) __B,
4221						  (__v8di)
4222						  _mm512_setzero_si512 (),
4223						  (__mmask8) __U);
4224}
4225
4226#ifdef __OPTIMIZE__
4227extern __inline __m256i
4228__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4229_mm512_cvtt_roundpd_epi32 (__m512d __A, const int __R)
4230{
4231  return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
4232						     (__v8si)
4233						     _mm256_undefined_si256 (),
4234						     (__mmask8) -1, __R);
4235}
4236
4237extern __inline __m256i
4238__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4239_mm512_mask_cvtt_roundpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A,
4240				const int __R)
4241{
4242  return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
4243						     (__v8si) __W,
4244						     (__mmask8) __U, __R);
4245}
4246
4247extern __inline __m256i
4248__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4249_mm512_maskz_cvtt_roundpd_epi32 (__mmask8 __U, __m512d __A, const int __R)
4250{
4251  return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
4252						     (__v8si)
4253						     _mm256_setzero_si256 (),
4254						     (__mmask8) __U, __R);
4255}
4256
4257extern __inline __m256i
4258__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4259_mm512_cvtt_roundpd_epu32 (__m512d __A, const int __R)
4260{
4261  return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
4262						      (__v8si)
4263						      _mm256_undefined_si256 (),
4264						      (__mmask8) -1, __R);
4265}
4266
4267extern __inline __m256i
4268__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4269_mm512_mask_cvtt_roundpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A,
4270				const int __R)
4271{
4272  return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
4273						      (__v8si) __W,
4274						      (__mmask8) __U, __R);
4275}
4276
4277extern __inline __m256i
4278__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4279_mm512_maskz_cvtt_roundpd_epu32 (__mmask8 __U, __m512d __A, const int __R)
4280{
4281  return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
4282						      (__v8si)
4283						      _mm256_setzero_si256 (),
4284						      (__mmask8) __U, __R);
4285}
4286#else
4287#define _mm512_cvtt_roundpd_epi32(A, B)		     \
4288    ((__m256i)__builtin_ia32_cvttpd2dq512_mask(A, (__v8si)_mm256_undefined_si256(), -1, B))
4289
4290#define _mm512_mask_cvtt_roundpd_epi32(W, U, A, B)   \
4291    ((__m256i)__builtin_ia32_cvttpd2dq512_mask(A, (__v8si)(W), U, B))
4292
4293#define _mm512_maskz_cvtt_roundpd_epi32(U, A, B)     \
4294    ((__m256i)__builtin_ia32_cvttpd2dq512_mask(A, (__v8si)_mm256_setzero_si256(), U, B))
4295
4296#define _mm512_cvtt_roundpd_epu32(A, B)		     \
4297    ((__m256i)__builtin_ia32_cvttpd2udq512_mask(A, (__v8si)_mm256_undefined_si256(), -1, B))
4298
4299#define _mm512_mask_cvtt_roundpd_epu32(W, U, A, B)   \
4300    ((__m256i)__builtin_ia32_cvttpd2udq512_mask(A, (__v8si)(W), U, B))
4301
4302#define _mm512_maskz_cvtt_roundpd_epu32(U, A, B)     \
4303    ((__m256i)__builtin_ia32_cvttpd2udq512_mask(A, (__v8si)_mm256_setzero_si256(), U, B))
4304#endif
4305
4306#ifdef __OPTIMIZE__
4307extern __inline __m256i
4308__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4309_mm512_cvt_roundpd_epi32 (__m512d __A, const int __R)
4310{
4311  return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
4312						    (__v8si)
4313						    _mm256_undefined_si256 (),
4314						    (__mmask8) -1, __R);
4315}
4316
4317extern __inline __m256i
4318__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4319_mm512_mask_cvt_roundpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A,
4320			       const int __R)
4321{
4322  return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
4323						    (__v8si) __W,
4324						    (__mmask8) __U, __R);
4325}
4326
4327extern __inline __m256i
4328__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4329_mm512_maskz_cvt_roundpd_epi32 (__mmask8 __U, __m512d __A, const int __R)
4330{
4331  return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
4332						    (__v8si)
4333						    _mm256_setzero_si256 (),
4334						    (__mmask8) __U, __R);
4335}
4336
4337extern __inline __m256i
4338__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4339_mm512_cvt_roundpd_epu32 (__m512d __A, const int __R)
4340{
4341  return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
4342						     (__v8si)
4343						     _mm256_undefined_si256 (),
4344						     (__mmask8) -1, __R);
4345}
4346
4347extern __inline __m256i
4348__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4349_mm512_mask_cvt_roundpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A,
4350			       const int __R)
4351{
4352  return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
4353						     (__v8si) __W,
4354						     (__mmask8) __U, __R);
4355}
4356
4357extern __inline __m256i
4358__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4359_mm512_maskz_cvt_roundpd_epu32 (__mmask8 __U, __m512d __A, const int __R)
4360{
4361  return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
4362						     (__v8si)
4363						     _mm256_setzero_si256 (),
4364						     (__mmask8) __U, __R);
4365}
4366#else
4367#define _mm512_cvt_roundpd_epi32(A, B)		    \
4368    ((__m256i)__builtin_ia32_cvtpd2dq512_mask(A, (__v8si)_mm256_undefined_si256(), -1, B))
4369
4370#define _mm512_mask_cvt_roundpd_epi32(W, U, A, B)   \
4371    ((__m256i)__builtin_ia32_cvtpd2dq512_mask(A, (__v8si)(W), U, B))
4372
4373#define _mm512_maskz_cvt_roundpd_epi32(U, A, B)     \
4374    ((__m256i)__builtin_ia32_cvtpd2dq512_mask(A, (__v8si)_mm256_setzero_si256(), U, B))
4375
4376#define _mm512_cvt_roundpd_epu32(A, B)		    \
4377    ((__m256i)__builtin_ia32_cvtpd2udq512_mask(A, (__v8si)_mm256_undefined_si256(), -1, B))
4378
4379#define _mm512_mask_cvt_roundpd_epu32(W, U, A, B)   \
4380    ((__m256i)__builtin_ia32_cvtpd2udq512_mask(A, (__v8si)(W), U, B))
4381
4382#define _mm512_maskz_cvt_roundpd_epu32(U, A, B)     \
4383    ((__m256i)__builtin_ia32_cvtpd2udq512_mask(A, (__v8si)_mm256_setzero_si256(), U, B))
4384#endif
4385
4386#ifdef __OPTIMIZE__
4387extern __inline __m512i
4388__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4389_mm512_cvtt_roundps_epi32 (__m512 __A, const int __R)
4390{
4391  return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
4392						     (__v16si)
4393						     _mm512_undefined_si512 (),
4394						     (__mmask16) -1, __R);
4395}
4396
4397extern __inline __m512i
4398__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4399_mm512_mask_cvtt_roundps_epi32 (__m512i __W, __mmask16 __U, __m512 __A,
4400				const int __R)
4401{
4402  return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
4403						     (__v16si) __W,
4404						     (__mmask16) __U, __R);
4405}
4406
4407extern __inline __m512i
4408__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4409_mm512_maskz_cvtt_roundps_epi32 (__mmask16 __U, __m512 __A, const int __R)
4410{
4411  return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
4412						     (__v16si)
4413						     _mm512_setzero_si512 (),
4414						     (__mmask16) __U, __R);
4415}
4416
4417extern __inline __m512i
4418__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4419_mm512_cvtt_roundps_epu32 (__m512 __A, const int __R)
4420{
4421  return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
4422						      (__v16si)
4423						      _mm512_undefined_si512 (),
4424						      (__mmask16) -1, __R);
4425}
4426
4427extern __inline __m512i
4428__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4429_mm512_mask_cvtt_roundps_epu32 (__m512i __W, __mmask16 __U, __m512 __A,
4430				const int __R)
4431{
4432  return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
4433						      (__v16si) __W,
4434						      (__mmask16) __U, __R);
4435}
4436
4437extern __inline __m512i
4438__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4439_mm512_maskz_cvtt_roundps_epu32 (__mmask16 __U, __m512 __A, const int __R)
4440{
4441  return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
4442						      (__v16si)
4443						      _mm512_setzero_si512 (),
4444						      (__mmask16) __U, __R);
4445}
4446#else
4447#define _mm512_cvtt_roundps_epi32(A, B)		     \
4448    ((__m512i)__builtin_ia32_cvttps2dq512_mask(A, (__v16si)_mm512_undefined_si512 (), -1, B))
4449
4450#define _mm512_mask_cvtt_roundps_epi32(W, U, A, B)   \
4451    ((__m512i)__builtin_ia32_cvttps2dq512_mask(A, (__v16si)(W), U, B))
4452
4453#define _mm512_maskz_cvtt_roundps_epi32(U, A, B)     \
4454    ((__m512i)__builtin_ia32_cvttps2dq512_mask(A, (__v16si)_mm512_setzero_si512 (), U, B))
4455
4456#define _mm512_cvtt_roundps_epu32(A, B)		     \
4457    ((__m512i)__builtin_ia32_cvttps2udq512_mask(A, (__v16si)_mm512_undefined_si512 (), -1, B))
4458
4459#define _mm512_mask_cvtt_roundps_epu32(W, U, A, B)   \
4460    ((__m512i)__builtin_ia32_cvttps2udq512_mask(A, (__v16si)(W), U, B))
4461
4462#define _mm512_maskz_cvtt_roundps_epu32(U, A, B)     \
4463    ((__m512i)__builtin_ia32_cvttps2udq512_mask(A, (__v16si)_mm512_setzero_si512 (), U, B))
4464#endif
4465
4466#ifdef __OPTIMIZE__
4467extern __inline __m512i
4468__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4469_mm512_cvt_roundps_epi32 (__m512 __A, const int __R)
4470{
4471  return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
4472						    (__v16si)
4473						    _mm512_undefined_si512 (),
4474						    (__mmask16) -1, __R);
4475}
4476
4477extern __inline __m512i
4478__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4479_mm512_mask_cvt_roundps_epi32 (__m512i __W, __mmask16 __U, __m512 __A,
4480			       const int __R)
4481{
4482  return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
4483						    (__v16si) __W,
4484						    (__mmask16) __U, __R);
4485}
4486
4487extern __inline __m512i
4488__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4489_mm512_maskz_cvt_roundps_epi32 (__mmask16 __U, __m512 __A, const int __R)
4490{
4491  return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
4492						    (__v16si)
4493						    _mm512_setzero_si512 (),
4494						    (__mmask16) __U, __R);
4495}
4496
4497extern __inline __m512i
4498__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4499_mm512_cvt_roundps_epu32 (__m512 __A, const int __R)
4500{
4501  return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
4502						     (__v16si)
4503						     _mm512_undefined_si512 (),
4504						     (__mmask16) -1, __R);
4505}
4506
4507extern __inline __m512i
4508__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4509_mm512_mask_cvt_roundps_epu32 (__m512i __W, __mmask16 __U, __m512 __A,
4510			       const int __R)
4511{
4512  return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
4513						     (__v16si) __W,
4514						     (__mmask16) __U, __R);
4515}
4516
4517extern __inline __m512i
4518__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4519_mm512_maskz_cvt_roundps_epu32 (__mmask16 __U, __m512 __A, const int __R)
4520{
4521  return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
4522						     (__v16si)
4523						     _mm512_setzero_si512 (),
4524						     (__mmask16) __U, __R);
4525}
4526#else
4527#define _mm512_cvt_roundps_epi32(A, B)		    \
4528    ((__m512i)__builtin_ia32_cvtps2dq512_mask(A, (__v16si)_mm512_undefined_si512 (), -1, B))
4529
4530#define _mm512_mask_cvt_roundps_epi32(W, U, A, B)   \
4531    ((__m512i)__builtin_ia32_cvtps2dq512_mask(A, (__v16si)(W), U, B))
4532
4533#define _mm512_maskz_cvt_roundps_epi32(U, A, B)     \
4534    ((__m512i)__builtin_ia32_cvtps2dq512_mask(A, (__v16si)_mm512_setzero_si512 (), U, B))
4535
4536#define _mm512_cvt_roundps_epu32(A, B)		    \
4537    ((__m512i)__builtin_ia32_cvtps2udq512_mask(A, (__v16si)_mm512_undefined_si512 (), -1, B))
4538
4539#define _mm512_mask_cvt_roundps_epu32(W, U, A, B)   \
4540    ((__m512i)__builtin_ia32_cvtps2udq512_mask(A, (__v16si)(W), U, B))
4541
4542#define _mm512_maskz_cvt_roundps_epu32(U, A, B)     \
4543    ((__m512i)__builtin_ia32_cvtps2udq512_mask(A, (__v16si)_mm512_setzero_si512 (), U, B))
4544#endif
4545
4546extern __inline __m128d
4547__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4548_mm_cvtu32_sd (__m128d __A, unsigned __B)
4549{
4550  return (__m128d) __builtin_ia32_cvtusi2sd32 ((__v2df) __A, __B);
4551}
4552
4553#ifdef __x86_64__
4554#ifdef __OPTIMIZE__
4555extern __inline __m128d
4556__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4557_mm_cvt_roundu64_sd (__m128d __A, unsigned long long __B, const int __R)
4558{
4559  return (__m128d) __builtin_ia32_cvtusi2sd64 ((__v2df) __A, __B, __R);
4560}
4561
4562extern __inline __m128d
4563__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4564_mm_cvt_roundi64_sd (__m128d __A, long long __B, const int __R)
4565{
4566  return (__m128d) __builtin_ia32_cvtsi2sd64 ((__v2df) __A, __B, __R);
4567}
4568
4569extern __inline __m128d
4570__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4571_mm_cvt_roundsi64_sd (__m128d __A, long long __B, const int __R)
4572{
4573  return (__m128d) __builtin_ia32_cvtsi2sd64 ((__v2df) __A, __B, __R);
4574}
4575#else
4576#define _mm_cvt_roundu64_sd(A, B, C)   \
4577    (__m128d)__builtin_ia32_cvtusi2sd64(A, B, C)
4578
4579#define _mm_cvt_roundi64_sd(A, B, C)   \
4580    (__m128d)__builtin_ia32_cvtsi2sd64(A, B, C)
4581
4582#define _mm_cvt_roundsi64_sd(A, B, C)   \
4583    (__m128d)__builtin_ia32_cvtsi2sd64(A, B, C)
4584#endif
4585
4586#endif
4587
4588#ifdef __OPTIMIZE__
4589extern __inline __m128
4590__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4591_mm_cvt_roundu32_ss (__m128 __A, unsigned __B, const int __R)
4592{
4593  return (__m128) __builtin_ia32_cvtusi2ss32 ((__v4sf) __A, __B, __R);
4594}
4595
4596extern __inline __m128
4597__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4598_mm_cvt_roundsi32_ss (__m128 __A, int __B, const int __R)
4599{
4600  return (__m128) __builtin_ia32_cvtsi2ss32 ((__v4sf) __A, __B, __R);
4601}
4602
4603extern __inline __m128
4604__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4605_mm_cvt_roundi32_ss (__m128 __A, int __B, const int __R)
4606{
4607  return (__m128) __builtin_ia32_cvtsi2ss32 ((__v4sf) __A, __B, __R);
4608}
4609#else
4610#define _mm_cvt_roundu32_ss(A, B, C)   \
4611    (__m128)__builtin_ia32_cvtusi2ss32(A, B, C)
4612
4613#define _mm_cvt_roundi32_ss(A, B, C)   \
4614    (__m128)__builtin_ia32_cvtsi2ss32(A, B, C)
4615
4616#define _mm_cvt_roundsi32_ss(A, B, C)   \
4617    (__m128)__builtin_ia32_cvtsi2ss32(A, B, C)
4618#endif
4619
4620#ifdef __x86_64__
4621#ifdef __OPTIMIZE__
4622extern __inline __m128
4623__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4624_mm_cvt_roundu64_ss (__m128 __A, unsigned long long __B, const int __R)
4625{
4626  return (__m128) __builtin_ia32_cvtusi2ss64 ((__v4sf) __A, __B, __R);
4627}
4628
4629extern __inline __m128
4630__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4631_mm_cvt_roundsi64_ss (__m128 __A, long long __B, const int __R)
4632{
4633  return (__m128) __builtin_ia32_cvtsi2ss64 ((__v4sf) __A, __B, __R);
4634}
4635
4636extern __inline __m128
4637__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4638_mm_cvt_roundi64_ss (__m128 __A, long long __B, const int __R)
4639{
4640  return (__m128) __builtin_ia32_cvtsi2ss64 ((__v4sf) __A, __B, __R);
4641}
4642#else
4643#define _mm_cvt_roundu64_ss(A, B, C)   \
4644    (__m128)__builtin_ia32_cvtusi2ss64(A, B, C)
4645
4646#define _mm_cvt_roundi64_ss(A, B, C)   \
4647    (__m128)__builtin_ia32_cvtsi2ss64(A, B, C)
4648
4649#define _mm_cvt_roundsi64_ss(A, B, C)   \
4650    (__m128)__builtin_ia32_cvtsi2ss64(A, B, C)
4651#endif
4652
4653#endif
4654
4655extern __inline __m128i
4656__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4657_mm512_cvtepi32_epi8 (__m512i __A)
4658{
4659  return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A,
4660						  (__v16qi)
4661						  _mm_undefined_si128 (),
4662						  (__mmask16) -1);
4663}
4664
4665extern __inline void
4666__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4667_mm512_mask_cvtepi32_storeu_epi8 (void * __P, __mmask16 __M, __m512i __A)
4668{
4669  __builtin_ia32_pmovdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M);
4670}
4671
4672extern __inline __m128i
4673__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4674_mm512_mask_cvtepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A)
4675{
4676  return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A,
4677						  (__v16qi) __O, __M);
4678}
4679
4680extern __inline __m128i
4681__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4682_mm512_maskz_cvtepi32_epi8 (__mmask16 __M, __m512i __A)
4683{
4684  return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A,
4685						  (__v16qi)
4686						  _mm_setzero_si128 (),
4687						  __M);
4688}
4689
4690extern __inline __m128i
4691__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4692_mm512_cvtsepi32_epi8 (__m512i __A)
4693{
4694  return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A,
4695						   (__v16qi)
4696						   _mm_undefined_si128 (),
4697						   (__mmask16) -1);
4698}
4699
4700extern __inline void
4701__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4702_mm512_mask_cvtsepi32_storeu_epi8 (void * __P, __mmask16 __M, __m512i __A)
4703{
4704  __builtin_ia32_pmovsdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M);
4705}
4706
4707extern __inline __m128i
4708__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4709_mm512_mask_cvtsepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A)
4710{
4711  return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A,
4712						   (__v16qi) __O, __M);
4713}
4714
4715extern __inline __m128i
4716__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4717_mm512_maskz_cvtsepi32_epi8 (__mmask16 __M, __m512i __A)
4718{
4719  return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A,
4720						   (__v16qi)
4721						   _mm_setzero_si128 (),
4722						   __M);
4723}
4724
4725extern __inline __m128i
4726__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4727_mm512_cvtusepi32_epi8 (__m512i __A)
4728{
4729  return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A,
4730						    (__v16qi)
4731						    _mm_undefined_si128 (),
4732						    (__mmask16) -1);
4733}
4734
4735extern __inline void
4736__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4737_mm512_mask_cvtusepi32_storeu_epi8 (void * __P, __mmask16 __M, __m512i __A)
4738{
4739  __builtin_ia32_pmovusdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M);
4740}
4741
4742extern __inline __m128i
4743__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4744_mm512_mask_cvtusepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A)
4745{
4746  return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A,
4747						    (__v16qi) __O,
4748						    __M);
4749}
4750
4751extern __inline __m128i
4752__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4753_mm512_maskz_cvtusepi32_epi8 (__mmask16 __M, __m512i __A)
4754{
4755  return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A,
4756						    (__v16qi)
4757						    _mm_setzero_si128 (),
4758						    __M);
4759}
4760
4761extern __inline __m256i
4762__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4763_mm512_cvtepi32_epi16 (__m512i __A)
4764{
4765  return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A,
4766						  (__v16hi)
4767						  _mm256_undefined_si256 (),
4768						  (__mmask16) -1);
4769}
4770
4771extern __inline void
4772__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4773_mm512_mask_cvtepi32_storeu_epi16 (void * __P, __mmask16 __M, __m512i __A)
4774{
4775  __builtin_ia32_pmovdw512mem_mask ((__v16hi *) __P, (__v16si) __A, __M);
4776}
4777
4778extern __inline __m256i
4779__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4780_mm512_mask_cvtepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A)
4781{
4782  return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A,
4783						  (__v16hi) __O, __M);
4784}
4785
4786extern __inline __m256i
4787__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4788_mm512_maskz_cvtepi32_epi16 (__mmask16 __M, __m512i __A)
4789{
4790  return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A,
4791						  (__v16hi)
4792						  _mm256_setzero_si256 (),
4793						  __M);
4794}
4795
4796extern __inline __m256i
4797__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4798_mm512_cvtsepi32_epi16 (__m512i __A)
4799{
4800  return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A,
4801						   (__v16hi)
4802						   _mm256_undefined_si256 (),
4803						   (__mmask16) -1);
4804}
4805
4806extern __inline void
4807__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4808_mm512_mask_cvtsepi32_storeu_epi16 (void *__P, __mmask16 __M, __m512i __A)
4809{
4810  __builtin_ia32_pmovsdw512mem_mask ((__v16hi*) __P, (__v16si) __A, __M);
4811}
4812
4813extern __inline __m256i
4814__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4815_mm512_mask_cvtsepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A)
4816{
4817  return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A,
4818						   (__v16hi) __O, __M);
4819}
4820
4821extern __inline __m256i
4822__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4823_mm512_maskz_cvtsepi32_epi16 (__mmask16 __M, __m512i __A)
4824{
4825  return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A,
4826						   (__v16hi)
4827						   _mm256_setzero_si256 (),
4828						   __M);
4829}
4830
4831extern __inline __m256i
4832__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4833_mm512_cvtusepi32_epi16 (__m512i __A)
4834{
4835  return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A,
4836						    (__v16hi)
4837						    _mm256_undefined_si256 (),
4838						    (__mmask16) -1);
4839}
4840
4841extern __inline void
4842__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4843_mm512_mask_cvtusepi32_storeu_epi16 (void *__P, __mmask16 __M, __m512i __A)
4844{
4845  __builtin_ia32_pmovusdw512mem_mask ((__v16hi*) __P, (__v16si) __A, __M);
4846}
4847
4848extern __inline __m256i
4849__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4850_mm512_mask_cvtusepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A)
4851{
4852  return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A,
4853						    (__v16hi) __O,
4854						    __M);
4855}
4856
4857extern __inline __m256i
4858__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4859_mm512_maskz_cvtusepi32_epi16 (__mmask16 __M, __m512i __A)
4860{
4861  return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A,
4862						    (__v16hi)
4863						    _mm256_setzero_si256 (),
4864						    __M);
4865}
4866
4867extern __inline __m256i
4868__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4869_mm512_cvtepi64_epi32 (__m512i __A)
4870{
4871  return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A,
4872						  (__v8si)
4873						  _mm256_undefined_si256 (),
4874						  (__mmask8) -1);
4875}
4876
4877extern __inline void
4878__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4879_mm512_mask_cvtepi64_storeu_epi32 (void* __P, __mmask8 __M, __m512i __A)
4880{
4881  __builtin_ia32_pmovqd512mem_mask ((__v8si *) __P, (__v8di) __A, __M);
4882}
4883
4884extern __inline __m256i
4885__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4886_mm512_mask_cvtepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A)
4887{
4888  return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A,
4889						  (__v8si) __O, __M);
4890}
4891
4892extern __inline __m256i
4893__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4894_mm512_maskz_cvtepi64_epi32 (__mmask8 __M, __m512i __A)
4895{
4896  return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A,
4897						  (__v8si)
4898						  _mm256_setzero_si256 (),
4899						  __M);
4900}
4901
4902extern __inline __m256i
4903__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4904_mm512_cvtsepi64_epi32 (__m512i __A)
4905{
4906  __v8si __O;
4907  return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A,
4908						   (__v8si)
4909						   _mm256_undefined_si256 (),
4910						   (__mmask8) -1);
4911}
4912
4913extern __inline void
4914__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4915_mm512_mask_cvtsepi64_storeu_epi32 (void *__P, __mmask8 __M, __m512i __A)
4916{
4917  __builtin_ia32_pmovsqd512mem_mask ((__v8si *) __P, (__v8di) __A, __M);
4918}
4919
4920extern __inline __m256i
4921__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4922_mm512_mask_cvtsepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A)
4923{
4924  return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A,
4925						   (__v8si) __O, __M);
4926}
4927
4928extern __inline __m256i
4929__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4930_mm512_maskz_cvtsepi64_epi32 (__mmask8 __M, __m512i __A)
4931{
4932  return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A,
4933						   (__v8si)
4934						   _mm256_setzero_si256 (),
4935						   __M);
4936}
4937
4938extern __inline __m256i
4939__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4940_mm512_cvtusepi64_epi32 (__m512i __A)
4941{
4942  return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A,
4943						    (__v8si)
4944						    _mm256_undefined_si256 (),
4945						    (__mmask8) -1);
4946}
4947
4948extern __inline void
4949__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4950_mm512_mask_cvtusepi64_storeu_epi32 (void* __P, __mmask8 __M, __m512i __A)
4951{
4952  __builtin_ia32_pmovusqd512mem_mask ((__v8si*) __P, (__v8di) __A, __M);
4953}
4954
4955extern __inline __m256i
4956__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4957_mm512_mask_cvtusepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A)
4958{
4959  return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A,
4960						    (__v8si) __O, __M);
4961}
4962
4963extern __inline __m256i
4964__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4965_mm512_maskz_cvtusepi64_epi32 (__mmask8 __M, __m512i __A)
4966{
4967  return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A,
4968						    (__v8si)
4969						    _mm256_setzero_si256 (),
4970						    __M);
4971}
4972
4973extern __inline __m128i
4974__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4975_mm512_cvtepi64_epi16 (__m512i __A)
4976{
4977  return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A,
4978						  (__v8hi)
4979						  _mm_undefined_si128 (),
4980						  (__mmask8) -1);
4981}
4982
4983extern __inline void
4984__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4985_mm512_mask_cvtepi64_storeu_epi16 (void *__P, __mmask8 __M, __m512i __A)
4986{
4987  __builtin_ia32_pmovqw512mem_mask ((__v8hi *) __P, (__v8di) __A, __M);
4988}
4989
4990extern __inline __m128i
4991__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4992_mm512_mask_cvtepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A)
4993{
4994  return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A,
4995						  (__v8hi) __O, __M);
4996}
4997
4998extern __inline __m128i
4999__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5000_mm512_maskz_cvtepi64_epi16 (__mmask8 __M, __m512i __A)
5001{
5002  return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A,
5003						  (__v8hi)
5004						  _mm_setzero_si128 (),
5005						  __M);
5006}
5007
5008extern __inline __m128i
5009__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5010_mm512_cvtsepi64_epi16 (__m512i __A)
5011{
5012  return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A,
5013						   (__v8hi)
5014						   _mm_undefined_si128 (),
5015						   (__mmask8) -1);
5016}
5017
5018extern __inline void
5019__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5020_mm512_mask_cvtsepi64_storeu_epi16 (void * __P, __mmask8 __M, __m512i __A)
5021{
5022  __builtin_ia32_pmovsqw512mem_mask ((__v8hi *) __P, (__v8di) __A, __M);
5023}
5024
5025extern __inline __m128i
5026__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5027_mm512_mask_cvtsepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A)
5028{
5029  return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A,
5030						   (__v8hi) __O, __M);
5031}
5032
5033extern __inline __m128i
5034__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5035_mm512_maskz_cvtsepi64_epi16 (__mmask8 __M, __m512i __A)
5036{
5037  return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A,
5038						   (__v8hi)
5039						   _mm_setzero_si128 (),
5040						   __M);
5041}
5042
5043extern __inline __m128i
5044__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5045_mm512_cvtusepi64_epi16 (__m512i __A)
5046{
5047  return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A,
5048						    (__v8hi)
5049						    _mm_undefined_si128 (),
5050						    (__mmask8) -1);
5051}
5052
5053extern __inline void
5054__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5055_mm512_mask_cvtusepi64_storeu_epi16 (void *__P, __mmask8 __M, __m512i __A)
5056{
5057  __builtin_ia32_pmovusqw512mem_mask ((__v8hi*) __P, (__v8di) __A, __M);
5058}
5059
5060extern __inline __m128i
5061__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5062_mm512_mask_cvtusepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A)
5063{
5064  return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A,
5065						    (__v8hi) __O, __M);
5066}
5067
5068extern __inline __m128i
5069__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5070_mm512_maskz_cvtusepi64_epi16 (__mmask8 __M, __m512i __A)
5071{
5072  return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A,
5073						    (__v8hi)
5074						    _mm_setzero_si128 (),
5075						    __M);
5076}
5077
5078extern __inline __m128i
5079__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5080_mm512_cvtepi64_epi8 (__m512i __A)
5081{
5082  return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A,
5083						  (__v16qi)
5084						  _mm_undefined_si128 (),
5085						  (__mmask8) -1);
5086}
5087
5088extern __inline void
5089__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5090_mm512_mask_cvtepi64_storeu_epi8 (void * __P, __mmask8 __M, __m512i __A)
5091{
5092  __builtin_ia32_pmovqb512mem_mask ((__v16qi *) __P, (__v8di) __A, __M);
5093}
5094
5095extern __inline __m128i
5096__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5097_mm512_mask_cvtepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A)
5098{
5099  return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A,
5100						  (__v16qi) __O, __M);
5101}
5102
5103extern __inline __m128i
5104__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5105_mm512_maskz_cvtepi64_epi8 (__mmask8 __M, __m512i __A)
5106{
5107  return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A,
5108						  (__v16qi)
5109						  _mm_setzero_si128 (),
5110						  __M);
5111}
5112
5113extern __inline __m128i
5114__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5115_mm512_cvtsepi64_epi8 (__m512i __A)
5116{
5117  return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A,
5118						   (__v16qi)
5119						   _mm_undefined_si128 (),
5120						   (__mmask8) -1);
5121}
5122
5123extern __inline void
5124__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5125_mm512_mask_cvtsepi64_storeu_epi8 (void * __P, __mmask8 __M, __m512i __A)
5126{
5127  __builtin_ia32_pmovsqb512mem_mask ((__v16qi *) __P, (__v8di) __A, __M);
5128}
5129
5130extern __inline __m128i
5131__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5132_mm512_mask_cvtsepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A)
5133{
5134  return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A,
5135						   (__v16qi) __O, __M);
5136}
5137
5138extern __inline __m128i
5139__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5140_mm512_maskz_cvtsepi64_epi8 (__mmask8 __M, __m512i __A)
5141{
5142  return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A,
5143						   (__v16qi)
5144						   _mm_setzero_si128 (),
5145						   __M);
5146}
5147
5148extern __inline __m128i
5149__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5150_mm512_cvtusepi64_epi8 (__m512i __A)
5151{
5152  return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A,
5153						    (__v16qi)
5154						    _mm_undefined_si128 (),
5155						    (__mmask8) -1);
5156}
5157
5158extern __inline void
5159__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5160_mm512_mask_cvtusepi64_storeu_epi8 (void * __P, __mmask8 __M, __m512i __A)
5161{
5162  __builtin_ia32_pmovusqb512mem_mask ((__v16qi *) __P, (__v8di) __A, __M);
5163}
5164
5165extern __inline __m128i
5166__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5167_mm512_mask_cvtusepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A)
5168{
5169  return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A,
5170						    (__v16qi) __O,
5171						    __M);
5172}
5173
5174extern __inline __m128i
5175__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5176_mm512_maskz_cvtusepi64_epi8 (__mmask8 __M, __m512i __A)
5177{
5178  return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A,
5179						    (__v16qi)
5180						    _mm_setzero_si128 (),
5181						    __M);
5182}
5183
5184extern __inline __m512d
5185__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5186_mm512_cvtepi32_pd (__m256i __A)
5187{
5188  return (__m512d) __builtin_ia32_cvtdq2pd512_mask ((__v8si) __A,
5189						    (__v8df)
5190						    _mm512_undefined_pd (),
5191						    (__mmask8) -1);
5192}
5193
5194extern __inline __m512d
5195__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5196_mm512_mask_cvtepi32_pd (__m512d __W, __mmask8 __U, __m256i __A)
5197{
5198  return (__m512d) __builtin_ia32_cvtdq2pd512_mask ((__v8si) __A,
5199						    (__v8df) __W,
5200						    (__mmask8) __U);
5201}
5202
5203extern __inline __m512d
5204__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5205_mm512_maskz_cvtepi32_pd (__mmask8 __U, __m256i __A)
5206{
5207  return (__m512d) __builtin_ia32_cvtdq2pd512_mask ((__v8si) __A,
5208						    (__v8df)
5209						    _mm512_setzero_pd (),
5210						    (__mmask8) __U);
5211}
5212
5213extern __inline __m512d
5214__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5215_mm512_cvtepu32_pd (__m256i __A)
5216{
5217  return (__m512d) __builtin_ia32_cvtudq2pd512_mask ((__v8si) __A,
5218						     (__v8df)
5219						     _mm512_undefined_pd (),
5220						     (__mmask8) -1);
5221}
5222
5223extern __inline __m512d
5224__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5225_mm512_mask_cvtepu32_pd (__m512d __W, __mmask8 __U, __m256i __A)
5226{
5227  return (__m512d) __builtin_ia32_cvtudq2pd512_mask ((__v8si) __A,
5228						     (__v8df) __W,
5229						     (__mmask8) __U);
5230}
5231
5232extern __inline __m512d
5233__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5234_mm512_maskz_cvtepu32_pd (__mmask8 __U, __m256i __A)
5235{
5236  return (__m512d) __builtin_ia32_cvtudq2pd512_mask ((__v8si) __A,
5237						     (__v8df)
5238						     _mm512_setzero_pd (),
5239						     (__mmask8) __U);
5240}
5241
5242#ifdef __OPTIMIZE__
5243extern __inline __m512
5244__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5245_mm512_cvt_roundepi32_ps (__m512i __A, const int __R)
5246{
5247  return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
5248						   (__v16sf)
5249						   _mm512_undefined_ps (),
5250						   (__mmask16) -1, __R);
5251}
5252
5253extern __inline __m512
5254__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5255_mm512_mask_cvt_roundepi32_ps (__m512 __W, __mmask16 __U, __m512i __A,
5256			       const int __R)
5257{
5258  return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
5259						   (__v16sf) __W,
5260						   (__mmask16) __U, __R);
5261}
5262
5263extern __inline __m512
5264__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5265_mm512_maskz_cvt_roundepi32_ps (__mmask16 __U, __m512i __A, const int __R)
5266{
5267  return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
5268						   (__v16sf)
5269						   _mm512_setzero_ps (),
5270						   (__mmask16) __U, __R);
5271}
5272
5273extern __inline __m512
5274__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5275_mm512_cvt_roundepu32_ps (__m512i __A, const int __R)
5276{
5277  return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
5278						    (__v16sf)
5279						    _mm512_undefined_ps (),
5280						    (__mmask16) -1, __R);
5281}
5282
5283extern __inline __m512
5284__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5285_mm512_mask_cvt_roundepu32_ps (__m512 __W, __mmask16 __U, __m512i __A,
5286			       const int __R)
5287{
5288  return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
5289						    (__v16sf) __W,
5290						    (__mmask16) __U, __R);
5291}
5292
5293extern __inline __m512
5294__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5295_mm512_maskz_cvt_roundepu32_ps (__mmask16 __U, __m512i __A, const int __R)
5296{
5297  return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
5298						    (__v16sf)
5299						    _mm512_setzero_ps (),
5300						    (__mmask16) __U, __R);
5301}
5302
5303#else
5304#define _mm512_cvt_roundepi32_ps(A, B)        \
5305    (__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(A), (__v16sf)_mm512_undefined_ps(), -1, B)
5306
5307#define _mm512_mask_cvt_roundepi32_ps(W, U, A, B)   \
5308    (__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(A), W, U, B)
5309
5310#define _mm512_maskz_cvt_roundepi32_ps(U, A, B)      \
5311    (__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(A), (__v16sf)_mm512_setzero_ps(), U, B)
5312
5313#define _mm512_cvt_roundepu32_ps(A, B)        \
5314    (__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(A), (__v16sf)_mm512_undefined_ps(), -1, B)
5315
5316#define _mm512_mask_cvt_roundepu32_ps(W, U, A, B)   \
5317    (__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(A), W, U, B)
5318
5319#define _mm512_maskz_cvt_roundepu32_ps(U, A, B)      \
5320    (__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(A), (__v16sf)_mm512_setzero_ps(), U, B)
5321#endif
5322
5323#ifdef __OPTIMIZE__
5324extern __inline __m256d
5325__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5326_mm512_extractf64x4_pd (__m512d __A, const int __imm)
5327{
5328  return (__m256d) __builtin_ia32_extractf64x4_mask ((__v8df) __A,
5329						     __imm,
5330						     (__v4df)
5331						     _mm256_undefined_pd (),
5332						     (__mmask8) -1);
5333}
5334
5335extern __inline __m256d
5336__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5337_mm512_mask_extractf64x4_pd (__m256d __W, __mmask8 __U, __m512d __A,
5338			     const int __imm)
5339{
5340  return (__m256d) __builtin_ia32_extractf64x4_mask ((__v8df) __A,
5341						     __imm,
5342						     (__v4df) __W,
5343						     (__mmask8) __U);
5344}
5345
5346extern __inline __m256d
5347__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5348_mm512_maskz_extractf64x4_pd (__mmask8 __U, __m512d __A, const int __imm)
5349{
5350  return (__m256d) __builtin_ia32_extractf64x4_mask ((__v8df) __A,
5351						     __imm,
5352						     (__v4df)
5353						     _mm256_setzero_pd (),
5354						     (__mmask8) __U);
5355}
5356
5357extern __inline __m128
5358__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5359_mm512_extractf32x4_ps (__m512 __A, const int __imm)
5360{
5361  return (__m128) __builtin_ia32_extractf32x4_mask ((__v16sf) __A,
5362						    __imm,
5363						    (__v4sf)
5364						    _mm_undefined_ps (),
5365						    (__mmask8) -1);
5366}
5367
5368extern __inline __m128
5369__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5370_mm512_mask_extractf32x4_ps (__m128 __W, __mmask8 __U, __m512 __A,
5371			     const int __imm)
5372{
5373  return (__m128) __builtin_ia32_extractf32x4_mask ((__v16sf) __A,
5374						    __imm,
5375						    (__v4sf) __W,
5376						    (__mmask8) __U);
5377}
5378
5379extern __inline __m128
5380__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5381_mm512_maskz_extractf32x4_ps (__mmask8 __U, __m512 __A, const int __imm)
5382{
5383  return (__m128) __builtin_ia32_extractf32x4_mask ((__v16sf) __A,
5384						    __imm,
5385						    (__v4sf)
5386						    _mm_setzero_ps (),
5387						    (__mmask8) __U);
5388}
5389
5390extern __inline __m256i
5391__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5392_mm512_extracti64x4_epi64 (__m512i __A, const int __imm)
5393{
5394  return (__m256i) __builtin_ia32_extracti64x4_mask ((__v8di) __A,
5395						     __imm,
5396						     (__v4di)
5397						     _mm256_undefined_si256 (),
5398						     (__mmask8) -1);
5399}
5400
5401extern __inline __m256i
5402__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5403_mm512_mask_extracti64x4_epi64 (__m256i __W, __mmask8 __U, __m512i __A,
5404				const int __imm)
5405{
5406  return (__m256i) __builtin_ia32_extracti64x4_mask ((__v8di) __A,
5407						     __imm,
5408						     (__v4di) __W,
5409						     (__mmask8) __U);
5410}
5411
5412extern __inline __m256i
5413__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5414_mm512_maskz_extracti64x4_epi64 (__mmask8 __U, __m512i __A, const int __imm)
5415{
5416  return (__m256i) __builtin_ia32_extracti64x4_mask ((__v8di) __A,
5417						     __imm,
5418						     (__v4di)
5419						     _mm256_setzero_si256 (),
5420						     (__mmask8) __U);
5421}
5422
5423extern __inline __m128i
5424__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5425_mm512_extracti32x4_epi32 (__m512i __A, const int __imm)
5426{
5427  return (__m128i) __builtin_ia32_extracti32x4_mask ((__v16si) __A,
5428						     __imm,
5429						     (__v4si)
5430						     _mm_undefined_si128 (),
5431						     (__mmask8) -1);
5432}
5433
5434extern __inline __m128i
5435__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5436_mm512_mask_extracti32x4_epi32 (__m128i __W, __mmask8 __U, __m512i __A,
5437				const int __imm)
5438{
5439  return (__m128i) __builtin_ia32_extracti32x4_mask ((__v16si) __A,
5440						     __imm,
5441						     (__v4si) __W,
5442						     (__mmask8) __U);
5443}
5444
5445extern __inline __m128i
5446__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5447_mm512_maskz_extracti32x4_epi32 (__mmask8 __U, __m512i __A, const int __imm)
5448{
5449  return (__m128i) __builtin_ia32_extracti32x4_mask ((__v16si) __A,
5450						     __imm,
5451						     (__v4si)
5452						     _mm_setzero_si128 (),
5453						     (__mmask8) __U);
5454}
5455#else
5456
5457#define _mm512_extractf64x4_pd(X, C)                                    \
5458  ((__m256d) __builtin_ia32_extractf64x4_mask ((__v8df)(__m512d) (X),   \
5459    (int) (C),\
5460    (__v4df)(__m256d)_mm256_undefined_pd(),\
5461    (__mmask8)-1))
5462
5463#define _mm512_mask_extractf64x4_pd(W, U, X, C)                         \
5464  ((__m256d) __builtin_ia32_extractf64x4_mask ((__v8df)(__m512d) (X),   \
5465    (int) (C),\
5466    (__v4df)(__m256d)(W),\
5467    (__mmask8)(U)))
5468
5469#define _mm512_maskz_extractf64x4_pd(U, X, C)                           \
5470  ((__m256d) __builtin_ia32_extractf64x4_mask ((__v8df)(__m512d) (X),   \
5471    (int) (C),\
5472    (__v4df)(__m256d)_mm256_setzero_pd(),\
5473    (__mmask8)(U)))
5474
5475#define _mm512_extractf32x4_ps(X, C)                                    \
5476  ((__m128) __builtin_ia32_extractf32x4_mask ((__v16sf)(__m512) (X),    \
5477    (int) (C),\
5478    (__v4sf)(__m128)_mm_undefined_ps(),\
5479    (__mmask8)-1))
5480
5481#define _mm512_mask_extractf32x4_ps(W, U, X, C)                         \
5482  ((__m128) __builtin_ia32_extractf32x4_mask ((__v16sf)(__m512) (X),    \
5483    (int) (C),\
5484    (__v4sf)(__m128)(W),\
5485    (__mmask8)(U)))
5486
5487#define _mm512_maskz_extractf32x4_ps(U, X, C)                           \
5488  ((__m128) __builtin_ia32_extractf32x4_mask ((__v16sf)(__m512) (X),    \
5489    (int) (C),\
5490    (__v4sf)(__m128)_mm_setzero_ps(),\
5491    (__mmask8)(U)))
5492
5493#define _mm512_extracti64x4_epi64(X, C)                                 \
5494  ((__m256i) __builtin_ia32_extracti64x4_mask ((__v8di)(__m512i) (X),   \
5495    (int) (C),\
5496    (__v4di)(__m256i)_mm256_undefined_si256 (),\
5497    (__mmask8)-1))
5498
5499#define _mm512_mask_extracti64x4_epi64(W, U, X, C)                      \
5500  ((__m256i) __builtin_ia32_extracti64x4_mask ((__v8di)(__m512i) (X),   \
5501    (int) (C),\
5502    (__v4di)(__m256i)(W),\
5503    (__mmask8)(U)))
5504
5505#define _mm512_maskz_extracti64x4_epi64(U, X, C)                        \
5506  ((__m256i) __builtin_ia32_extracti64x4_mask ((__v8di)(__m512i) (X),   \
5507    (int) (C),\
5508    (__v4di)(__m256i)_mm256_setzero_si256 (),\
5509    (__mmask8)(U)))
5510
5511#define _mm512_extracti32x4_epi32(X, C)                                 \
5512  ((__m128i) __builtin_ia32_extracti32x4_mask ((__v16si)(__m512i) (X),  \
5513    (int) (C),\
5514    (__v4si)(__m128i)_mm_undefined_si128 (),\
5515    (__mmask8)-1))
5516
5517#define _mm512_mask_extracti32x4_epi32(W, U, X, C)                      \
5518  ((__m128i) __builtin_ia32_extracti32x4_mask ((__v16si)(__m512i) (X),  \
5519    (int) (C),\
5520    (__v4si)(__m128i)(W),\
5521    (__mmask8)(U)))
5522
5523#define _mm512_maskz_extracti32x4_epi32(U, X, C)                        \
5524  ((__m128i) __builtin_ia32_extracti32x4_mask ((__v16si)(__m512i) (X),  \
5525    (int) (C),\
5526    (__v4si)(__m128i)_mm_setzero_si128 (),\
5527    (__mmask8)(U)))
5528#endif
5529
5530#ifdef __OPTIMIZE__
5531extern __inline __m512i
5532__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5533_mm512_inserti32x4 (__m512i __A, __m128i __B, const int __imm)
5534{
5535  return (__m512i) __builtin_ia32_inserti32x4_mask ((__v16si) __A,
5536						    (__v4si) __B,
5537						    __imm,
5538						    (__v16si) __A, -1);
5539}
5540
5541extern __inline __m512
5542__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5543_mm512_insertf32x4 (__m512 __A, __m128 __B, const int __imm)
5544{
5545  return (__m512) __builtin_ia32_insertf32x4_mask ((__v16sf) __A,
5546						   (__v4sf) __B,
5547						   __imm,
5548						   (__v16sf) __A, -1);
5549}
5550
5551extern __inline __m512i
5552__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5553_mm512_inserti64x4 (__m512i __A, __m256i __B, const int __imm)
5554{
5555  return (__m512i) __builtin_ia32_inserti64x4_mask ((__v8di) __A,
5556						    (__v4di) __B,
5557						    __imm,
5558						    (__v8di)
5559						    _mm512_undefined_si512 (),
5560						    (__mmask8) -1);
5561}
5562
5563extern __inline __m512i
5564__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5565_mm512_mask_inserti64x4 (__m512i __W, __mmask8 __U, __m512i __A,
5566			 __m256i __B, const int __imm)
5567{
5568  return (__m512i) __builtin_ia32_inserti64x4_mask ((__v8di) __A,
5569						    (__v4di) __B,
5570						    __imm,
5571						    (__v8di) __W,
5572						    (__mmask8) __U);
5573}
5574
5575extern __inline __m512i
5576__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5577_mm512_maskz_inserti64x4 (__mmask8 __U, __m512i __A, __m256i __B,
5578			  const int __imm)
5579{
5580  return (__m512i) __builtin_ia32_inserti64x4_mask ((__v8di) __A,
5581						    (__v4di) __B,
5582						    __imm,
5583						    (__v8di)
5584						    _mm512_setzero_si512 (),
5585						    (__mmask8) __U);
5586}
5587
5588extern __inline __m512d
5589__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5590_mm512_insertf64x4 (__m512d __A, __m256d __B, const int __imm)
5591{
5592  return (__m512d) __builtin_ia32_insertf64x4_mask ((__v8df) __A,
5593						    (__v4df) __B,
5594						    __imm,
5595						    (__v8df)
5596						    _mm512_undefined_pd (),
5597						    (__mmask8) -1);
5598}
5599
5600extern __inline __m512d
5601__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5602_mm512_mask_insertf64x4 (__m512d __W, __mmask8 __U, __m512d __A,
5603			 __m256d __B, const int __imm)
5604{
5605  return (__m512d) __builtin_ia32_insertf64x4_mask ((__v8df) __A,
5606						    (__v4df) __B,
5607						    __imm,
5608						    (__v8df) __W,
5609						    (__mmask8) __U);
5610}
5611
5612extern __inline __m512d
5613__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5614_mm512_maskz_insertf64x4 (__mmask8 __U, __m512d __A, __m256d __B,
5615			  const int __imm)
5616{
5617  return (__m512d) __builtin_ia32_insertf64x4_mask ((__v8df) __A,
5618						    (__v4df) __B,
5619						    __imm,
5620						    (__v8df)
5621						    _mm512_setzero_pd (),
5622						    (__mmask8) __U);
5623}
5624#else
5625#define _mm512_insertf32x4(X, Y, C)                                     \
5626  ((__m512) __builtin_ia32_insertf32x4_mask ((__v16sf)(__m512) (X),     \
5627    (__v4sf)(__m128) (Y), (int) (C), (__v16sf)(__m512) (X), (__mmask16)(-1)))
5628
5629#define _mm512_inserti32x4(X, Y, C)                                     \
5630  ((__m512i) __builtin_ia32_inserti32x4_mask ((__v16si)(__m512i) (X),   \
5631    (__v4si)(__m128i) (Y), (int) (C), (__v16si)(__m512i) (X), (__mmask16)(-1)))
5632
5633#define _mm512_insertf64x4(X, Y, C)                                     \
5634  ((__m512d) __builtin_ia32_insertf64x4_mask ((__v8df)(__m512d) (X),    \
5635    (__v4df)(__m256d) (Y), (int) (C),					\
5636    (__v8df)(__m512d)_mm512_undefined_pd(),				\
5637    (__mmask8)-1))
5638
5639#define _mm512_mask_insertf64x4(W, U, X, Y, C)                          \
5640  ((__m512d) __builtin_ia32_insertf64x4_mask ((__v8df)(__m512d) (X),    \
5641    (__v4df)(__m256d) (Y), (int) (C),					\
5642    (__v8df)(__m512d)(W),						\
5643    (__mmask8)(U)))
5644
5645#define _mm512_maskz_insertf64x4(U, X, Y, C)                            \
5646  ((__m512d) __builtin_ia32_insertf64x4_mask ((__v8df)(__m512d) (X),    \
5647    (__v4df)(__m256d) (Y), (int) (C),					\
5648    (__v8df)(__m512d)_mm512_setzero_pd(),				\
5649    (__mmask8)(U)))
5650
5651#define _mm512_inserti64x4(X, Y, C)                                     \
5652  ((__m512i) __builtin_ia32_inserti64x4_mask ((__v8di)(__m512i) (X),    \
5653    (__v4di)(__m256i) (Y), (int) (C),					\
5654    (__v8di)(__m512i)_mm512_undefined_si512 (),				\
5655    (__mmask8)-1))
5656
5657#define _mm512_mask_inserti64x4(W, U, X, Y, C)                          \
5658  ((__m512i) __builtin_ia32_inserti64x4_mask ((__v8di)(__m512i) (X),    \
5659    (__v4di)(__m256i) (Y), (int) (C),\
5660    (__v8di)(__m512i)(W),\
5661    (__mmask8)(U)))
5662
5663#define _mm512_maskz_inserti64x4(U, X, Y, C)                            \
5664  ((__m512i) __builtin_ia32_inserti64x4_mask ((__v8di)(__m512i) (X),    \
5665    (__v4di)(__m256i) (Y), (int) (C),					\
5666    (__v8di)(__m512i)_mm512_setzero_si512 (),				\
5667    (__mmask8)(U)))
5668#endif
5669
5670extern __inline __m512d
5671__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5672_mm512_loadu_pd (void const *__P)
5673{
5674  return (__m512d) __builtin_ia32_loadupd512_mask ((const __v8df *) __P,
5675						   (__v8df)
5676						   _mm512_undefined_pd (),
5677						   (__mmask8) -1);
5678}
5679
5680extern __inline __m512d
5681__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5682_mm512_mask_loadu_pd (__m512d __W, __mmask8 __U, void const *__P)
5683{
5684  return (__m512d) __builtin_ia32_loadupd512_mask ((const __v8df *) __P,
5685						   (__v8df) __W,
5686						   (__mmask8) __U);
5687}
5688
5689extern __inline __m512d
5690__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5691_mm512_maskz_loadu_pd (__mmask8 __U, void const *__P)
5692{
5693  return (__m512d) __builtin_ia32_loadupd512_mask ((const __v8df *) __P,
5694						   (__v8df)
5695						   _mm512_setzero_pd (),
5696						   (__mmask8) __U);
5697}
5698
5699extern __inline void
5700__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5701_mm512_storeu_pd (void *__P, __m512d __A)
5702{
5703  __builtin_ia32_storeupd512_mask ((__v8df *) __P, (__v8df) __A,
5704				   (__mmask8) -1);
5705}
5706
5707extern __inline void
5708__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5709_mm512_mask_storeu_pd (void *__P, __mmask8 __U, __m512d __A)
5710{
5711  __builtin_ia32_storeupd512_mask ((__v8df *) __P, (__v8df) __A,
5712				   (__mmask8) __U);
5713}
5714
5715extern __inline __m512
5716__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5717_mm512_loadu_ps (void const *__P)
5718{
5719  return (__m512) __builtin_ia32_loadups512_mask ((const __v16sf *) __P,
5720						  (__v16sf)
5721						  _mm512_undefined_ps (),
5722						  (__mmask16) -1);
5723}
5724
5725extern __inline __m512
5726__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5727_mm512_mask_loadu_ps (__m512 __W, __mmask16 __U, void const *__P)
5728{
5729  return (__m512) __builtin_ia32_loadups512_mask ((const __v16sf *) __P,
5730						  (__v16sf) __W,
5731						  (__mmask16) __U);
5732}
5733
5734extern __inline __m512
5735__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5736_mm512_maskz_loadu_ps (__mmask16 __U, void const *__P)
5737{
5738  return (__m512) __builtin_ia32_loadups512_mask ((const __v16sf *) __P,
5739						  (__v16sf)
5740						  _mm512_setzero_ps (),
5741						  (__mmask16) __U);
5742}
5743
5744extern __inline void
5745__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5746_mm512_storeu_ps (void *__P, __m512 __A)
5747{
5748  __builtin_ia32_storeups512_mask ((__v16sf *) __P, (__v16sf) __A,
5749				   (__mmask16) -1);
5750}
5751
5752extern __inline void
5753__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5754_mm512_mask_storeu_ps (void *__P, __mmask16 __U, __m512 __A)
5755{
5756  __builtin_ia32_storeups512_mask ((__v16sf *) __P, (__v16sf) __A,
5757				   (__mmask16) __U);
5758}
5759
5760extern __inline __m512i
5761__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5762_mm512_mask_loadu_epi64 (__m512i __W, __mmask8 __U, void const *__P)
5763{
5764  return (__m512i) __builtin_ia32_loaddqudi512_mask ((const __v8di *) __P,
5765						     (__v8di) __W,
5766						     (__mmask8) __U);
5767}
5768
5769extern __inline __m512i
5770__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5771_mm512_maskz_loadu_epi64 (__mmask8 __U, void const *__P)
5772{
5773  return (__m512i) __builtin_ia32_loaddqudi512_mask ((const __v8di *) __P,
5774						     (__v8di)
5775						     _mm512_setzero_si512 (),
5776						     (__mmask8) __U);
5777}
5778
5779extern __inline void
5780__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5781_mm512_mask_storeu_epi64 (void *__P, __mmask8 __U, __m512i __A)
5782{
5783  __builtin_ia32_storedqudi512_mask ((__v8di *) __P, (__v8di) __A,
5784				     (__mmask8) __U);
5785}
5786
5787extern __inline __m512i
5788__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5789_mm512_loadu_si512 (void const *__P)
5790{
5791  return (__m512i) __builtin_ia32_loaddqusi512_mask ((const __v16si *) __P,
5792						     (__v16si)
5793						     _mm512_setzero_si512 (),
5794						     (__mmask16) -1);
5795}
5796
5797extern __inline __m512i
5798__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5799_mm512_mask_loadu_epi32 (__m512i __W, __mmask16 __U, void const *__P)
5800{
5801  return (__m512i) __builtin_ia32_loaddqusi512_mask ((const __v16si *) __P,
5802						     (__v16si) __W,
5803						     (__mmask16) __U);
5804}
5805
5806extern __inline __m512i
5807__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5808_mm512_maskz_loadu_epi32 (__mmask16 __U, void const *__P)
5809{
5810  return (__m512i) __builtin_ia32_loaddqusi512_mask ((const __v16si *) __P,
5811						     (__v16si)
5812						     _mm512_setzero_si512 (),
5813						     (__mmask16) __U);
5814}
5815
5816extern __inline void
5817__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5818_mm512_storeu_si512 (void *__P, __m512i __A)
5819{
5820  __builtin_ia32_storedqusi512_mask ((__v16si *) __P, (__v16si) __A,
5821				     (__mmask16) -1);
5822}
5823
5824extern __inline void
5825__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5826_mm512_mask_storeu_epi32 (void *__P, __mmask16 __U, __m512i __A)
5827{
5828  __builtin_ia32_storedqusi512_mask ((__v16si *) __P, (__v16si) __A,
5829				     (__mmask16) __U);
5830}
5831
5832extern __inline __m512d
5833__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5834_mm512_permutevar_pd (__m512d __A, __m512i __C)
5835{
5836  return (__m512d) __builtin_ia32_vpermilvarpd512_mask ((__v8df) __A,
5837							(__v8di) __C,
5838							(__v8df)
5839							_mm512_undefined_pd (),
5840							(__mmask8) -1);
5841}
5842
5843extern __inline __m512d
5844__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5845_mm512_mask_permutevar_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512i __C)
5846{
5847  return (__m512d) __builtin_ia32_vpermilvarpd512_mask ((__v8df) __A,
5848							(__v8di) __C,
5849							(__v8df) __W,
5850							(__mmask8) __U);
5851}
5852
5853extern __inline __m512d
5854__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5855_mm512_maskz_permutevar_pd (__mmask8 __U, __m512d __A, __m512i __C)
5856{
5857  return (__m512d) __builtin_ia32_vpermilvarpd512_mask ((__v8df) __A,
5858							(__v8di) __C,
5859							(__v8df)
5860							_mm512_setzero_pd (),
5861							(__mmask8) __U);
5862}
5863
5864extern __inline __m512
5865__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5866_mm512_permutevar_ps (__m512 __A, __m512i __C)
5867{
5868  return (__m512) __builtin_ia32_vpermilvarps512_mask ((__v16sf) __A,
5869						       (__v16si) __C,
5870						       (__v16sf)
5871						       _mm512_undefined_ps (),
5872						       (__mmask16) -1);
5873}
5874
5875extern __inline __m512
5876__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5877_mm512_mask_permutevar_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512i __C)
5878{
5879  return (__m512) __builtin_ia32_vpermilvarps512_mask ((__v16sf) __A,
5880						       (__v16si) __C,
5881						       (__v16sf) __W,
5882						       (__mmask16) __U);
5883}
5884
5885extern __inline __m512
5886__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5887_mm512_maskz_permutevar_ps (__mmask16 __U, __m512 __A, __m512i __C)
5888{
5889  return (__m512) __builtin_ia32_vpermilvarps512_mask ((__v16sf) __A,
5890						       (__v16si) __C,
5891						       (__v16sf)
5892						       _mm512_setzero_ps (),
5893						       (__mmask16) __U);
5894}
5895
5896extern __inline __m512i
5897__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5898_mm512_permutex2var_epi64 (__m512i __A, __m512i __I, __m512i __B)
5899{
5900  return (__m512i) __builtin_ia32_vpermt2varq512_mask ((__v8di) __I
5901						       /* idx */ ,
5902						       (__v8di) __A,
5903						       (__v8di) __B,
5904						       (__mmask8) -1);
5905}
5906
5907extern __inline __m512i
5908__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5909_mm512_mask_permutex2var_epi64 (__m512i __A, __mmask8 __U, __m512i __I,
5910				__m512i __B)
5911{
5912  return (__m512i) __builtin_ia32_vpermt2varq512_mask ((__v8di) __I
5913						       /* idx */ ,
5914						       (__v8di) __A,
5915						       (__v8di) __B,
5916						       (__mmask8) __U);
5917}
5918
5919extern __inline __m512i
5920__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5921_mm512_mask2_permutex2var_epi64 (__m512i __A, __m512i __I,
5922				 __mmask8 __U, __m512i __B)
5923{
5924  return (__m512i) __builtin_ia32_vpermi2varq512_mask ((__v8di) __A,
5925						       (__v8di) __I
5926						       /* idx */ ,
5927						       (__v8di) __B,
5928						       (__mmask8) __U);
5929}
5930
5931extern __inline __m512i
5932__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5933_mm512_maskz_permutex2var_epi64 (__mmask8 __U, __m512i __A,
5934				 __m512i __I, __m512i __B)
5935{
5936  return (__m512i) __builtin_ia32_vpermt2varq512_maskz ((__v8di) __I
5937							/* idx */ ,
5938							(__v8di) __A,
5939							(__v8di) __B,
5940							(__mmask8) __U);
5941}
5942
5943extern __inline __m512i
5944__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5945_mm512_permutex2var_epi32 (__m512i __A, __m512i __I, __m512i __B)
5946{
5947  return (__m512i) __builtin_ia32_vpermt2vard512_mask ((__v16si) __I
5948						       /* idx */ ,
5949						       (__v16si) __A,
5950						       (__v16si) __B,
5951						       (__mmask16) -1);
5952}
5953
5954extern __inline __m512i
5955__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5956_mm512_mask_permutex2var_epi32 (__m512i __A, __mmask16 __U,
5957				__m512i __I, __m512i __B)
5958{
5959  return (__m512i) __builtin_ia32_vpermt2vard512_mask ((__v16si) __I
5960						       /* idx */ ,
5961						       (__v16si) __A,
5962						       (__v16si) __B,
5963						       (__mmask16) __U);
5964}
5965
5966extern __inline __m512i
5967__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5968_mm512_mask2_permutex2var_epi32 (__m512i __A, __m512i __I,
5969				 __mmask16 __U, __m512i __B)
5970{
5971  return (__m512i) __builtin_ia32_vpermi2vard512_mask ((__v16si) __A,
5972						       (__v16si) __I
5973						       /* idx */ ,
5974						       (__v16si) __B,
5975						       (__mmask16) __U);
5976}
5977
5978extern __inline __m512i
5979__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5980_mm512_maskz_permutex2var_epi32 (__mmask16 __U, __m512i __A,
5981				 __m512i __I, __m512i __B)
5982{
5983  return (__m512i) __builtin_ia32_vpermt2vard512_maskz ((__v16si) __I
5984							/* idx */ ,
5985							(__v16si) __A,
5986							(__v16si) __B,
5987							(__mmask16) __U);
5988}
5989
5990extern __inline __m512d
5991__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5992_mm512_permutex2var_pd (__m512d __A, __m512i __I, __m512d __B)
5993{
5994  return (__m512d) __builtin_ia32_vpermt2varpd512_mask ((__v8di) __I
5995							/* idx */ ,
5996							(__v8df) __A,
5997							(__v8df) __B,
5998							(__mmask8) -1);
5999}
6000
6001extern __inline __m512d
6002__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6003_mm512_mask_permutex2var_pd (__m512d __A, __mmask8 __U, __m512i __I,
6004			     __m512d __B)
6005{
6006  return (__m512d) __builtin_ia32_vpermt2varpd512_mask ((__v8di) __I
6007							/* idx */ ,
6008							(__v8df) __A,
6009							(__v8df) __B,
6010							(__mmask8) __U);
6011}
6012
6013extern __inline __m512d
6014__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6015_mm512_mask2_permutex2var_pd (__m512d __A, __m512i __I, __mmask8 __U,
6016			      __m512d __B)
6017{
6018  return (__m512d) __builtin_ia32_vpermi2varpd512_mask ((__v8df) __A,
6019							(__v8di) __I
6020							/* idx */ ,
6021							(__v8df) __B,
6022							(__mmask8) __U);
6023}
6024
6025extern __inline __m512d
6026__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6027_mm512_maskz_permutex2var_pd (__mmask8 __U, __m512d __A, __m512i __I,
6028			      __m512d __B)
6029{
6030  return (__m512d) __builtin_ia32_vpermt2varpd512_maskz ((__v8di) __I
6031							 /* idx */ ,
6032							 (__v8df) __A,
6033							 (__v8df) __B,
6034							 (__mmask8) __U);
6035}
6036
6037extern __inline __m512
6038__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6039_mm512_permutex2var_ps (__m512 __A, __m512i __I, __m512 __B)
6040{
6041  return (__m512) __builtin_ia32_vpermt2varps512_mask ((__v16si) __I
6042						       /* idx */ ,
6043						       (__v16sf) __A,
6044						       (__v16sf) __B,
6045						       (__mmask16) -1);
6046}
6047
6048extern __inline __m512
6049__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6050_mm512_mask_permutex2var_ps (__m512 __A, __mmask16 __U, __m512i __I, __m512 __B)
6051{
6052  return (__m512) __builtin_ia32_vpermt2varps512_mask ((__v16si) __I
6053						       /* idx */ ,
6054						       (__v16sf) __A,
6055						       (__v16sf) __B,
6056						       (__mmask16) __U);
6057}
6058
6059extern __inline __m512
6060__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6061_mm512_mask2_permutex2var_ps (__m512 __A, __m512i __I, __mmask16 __U,
6062			      __m512 __B)
6063{
6064  return (__m512) __builtin_ia32_vpermi2varps512_mask ((__v16sf) __A,
6065						       (__v16si) __I
6066						       /* idx */ ,
6067						       (__v16sf) __B,
6068						       (__mmask16) __U);
6069}
6070
6071extern __inline __m512
6072__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6073_mm512_maskz_permutex2var_ps (__mmask16 __U, __m512 __A, __m512i __I,
6074			      __m512 __B)
6075{
6076  return (__m512) __builtin_ia32_vpermt2varps512_maskz ((__v16si) __I
6077							/* idx */ ,
6078							(__v16sf) __A,
6079							(__v16sf) __B,
6080							(__mmask16) __U);
6081}
6082
6083#ifdef __OPTIMIZE__
6084extern __inline __m512d
6085__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6086_mm512_permute_pd (__m512d __X, const int __C)
6087{
6088  return (__m512d) __builtin_ia32_vpermilpd512_mask ((__v8df) __X, __C,
6089						     (__v8df)
6090						     _mm512_undefined_pd (),
6091						     (__mmask8) -1);
6092}
6093
6094extern __inline __m512d
6095__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6096_mm512_mask_permute_pd (__m512d __W, __mmask8 __U, __m512d __X, const int __C)
6097{
6098  return (__m512d) __builtin_ia32_vpermilpd512_mask ((__v8df) __X, __C,
6099						     (__v8df) __W,
6100						     (__mmask8) __U);
6101}
6102
6103extern __inline __m512d
6104__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6105_mm512_maskz_permute_pd (__mmask8 __U, __m512d __X, const int __C)
6106{
6107  return (__m512d) __builtin_ia32_vpermilpd512_mask ((__v8df) __X, __C,
6108						     (__v8df)
6109						     _mm512_setzero_pd (),
6110						     (__mmask8) __U);
6111}
6112
6113extern __inline __m512
6114__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6115_mm512_permute_ps (__m512 __X, const int __C)
6116{
6117  return (__m512) __builtin_ia32_vpermilps512_mask ((__v16sf) __X, __C,
6118						    (__v16sf)
6119						    _mm512_undefined_ps (),
6120						    (__mmask16) -1);
6121}
6122
6123extern __inline __m512
6124__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6125_mm512_mask_permute_ps (__m512 __W, __mmask16 __U, __m512 __X, const int __C)
6126{
6127  return (__m512) __builtin_ia32_vpermilps512_mask ((__v16sf) __X, __C,
6128						    (__v16sf) __W,
6129						    (__mmask16) __U);
6130}
6131
6132extern __inline __m512
6133__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6134_mm512_maskz_permute_ps (__mmask16 __U, __m512 __X, const int __C)
6135{
6136  return (__m512) __builtin_ia32_vpermilps512_mask ((__v16sf) __X, __C,
6137						    (__v16sf)
6138						    _mm512_setzero_ps (),
6139						    (__mmask16) __U);
6140}
6141#else
6142#define _mm512_permute_pd(X, C)							    \
6143  ((__m512d) __builtin_ia32_vpermilpd512_mask ((__v8df)(__m512d)(X), (int)(C),	    \
6144					      (__v8df)(__m512d)_mm512_undefined_pd(),\
6145					      (__mmask8)(-1)))
6146
6147#define _mm512_mask_permute_pd(W, U, X, C)					    \
6148  ((__m512d) __builtin_ia32_vpermilpd512_mask ((__v8df)(__m512d)(X), (int)(C),	    \
6149					      (__v8df)(__m512d)(W),		    \
6150					      (__mmask8)(U)))
6151
6152#define _mm512_maskz_permute_pd(U, X, C)					    \
6153  ((__m512d) __builtin_ia32_vpermilpd512_mask ((__v8df)(__m512d)(X), (int)(C),	    \
6154					      (__v8df)(__m512d)_mm512_setzero_pd(), \
6155					      (__mmask8)(U)))
6156
6157#define _mm512_permute_ps(X, C)							    \
6158  ((__m512) __builtin_ia32_vpermilps512_mask ((__v16sf)(__m512)(X), (int)(C),	    \
6159					      (__v16sf)(__m512)_mm512_undefined_ps(),\
6160					      (__mmask16)(-1)))
6161
6162#define _mm512_mask_permute_ps(W, U, X, C)					    \
6163  ((__m512) __builtin_ia32_vpermilps512_mask ((__v16sf)(__m512)(X), (int)(C),	    \
6164					      (__v16sf)(__m512)(W),		    \
6165					      (__mmask16)(U)))
6166
6167#define _mm512_maskz_permute_ps(U, X, C)					    \
6168  ((__m512) __builtin_ia32_vpermilps512_mask ((__v16sf)(__m512)(X), (int)(C),	    \
6169					      (__v16sf)(__m512)_mm512_setzero_ps(), \
6170					      (__mmask16)(U)))
6171#endif
6172
6173#ifdef __OPTIMIZE__
6174extern __inline __m512i
6175__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6176_mm512_permutex_epi64 (__m512i __X, const int __I)
6177{
6178  return (__m512i) __builtin_ia32_permdi512_mask ((__v8di) __X, __I,
6179						  (__v8di)
6180						  _mm512_undefined_si512 (),
6181						  (__mmask8) (-1));
6182}
6183
6184extern __inline __m512i
6185__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6186_mm512_mask_permutex_epi64 (__m512i __W, __mmask8 __M,
6187			    __m512i __X, const int __I)
6188{
6189  return (__m512i) __builtin_ia32_permdi512_mask ((__v8di) __X, __I,
6190						  (__v8di) __W,
6191						  (__mmask8) __M);
6192}
6193
6194extern __inline __m512i
6195__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6196_mm512_maskz_permutex_epi64 (__mmask8 __M, __m512i __X, const int __I)
6197{
6198  return (__m512i) __builtin_ia32_permdi512_mask ((__v8di) __X, __I,
6199						  (__v8di)
6200						  _mm512_setzero_si512 (),
6201						  (__mmask8) __M);
6202}
6203
6204extern __inline __m512d
6205__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6206_mm512_permutex_pd (__m512d __X, const int __M)
6207{
6208  return (__m512d) __builtin_ia32_permdf512_mask ((__v8df) __X, __M,
6209						  (__v8df)
6210						  _mm512_undefined_pd (),
6211						  (__mmask8) -1);
6212}
6213
6214extern __inline __m512d
6215__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6216_mm512_mask_permutex_pd (__m512d __W, __mmask8 __U, __m512d __X, const int __M)
6217{
6218  return (__m512d) __builtin_ia32_permdf512_mask ((__v8df) __X, __M,
6219						  (__v8df) __W,
6220						  (__mmask8) __U);
6221}
6222
6223extern __inline __m512d
6224__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6225_mm512_maskz_permutex_pd (__mmask8 __U, __m512d __X, const int __M)
6226{
6227  return (__m512d) __builtin_ia32_permdf512_mask ((__v8df) __X, __M,
6228						  (__v8df)
6229						  _mm512_setzero_pd (),
6230						  (__mmask8) __U);
6231}
6232#else
6233#define _mm512_permutex_pd(X, M)						\
6234  ((__m512d) __builtin_ia32_permdf512_mask ((__v8df)(__m512d)(X), (int)(M),	\
6235					    (__v8df)(__m512d)_mm512_undefined_pd(),\
6236					    (__mmask8)-1))
6237
6238#define _mm512_mask_permutex_pd(W, U, X, M)					\
6239  ((__m512d) __builtin_ia32_permdf512_mask ((__v8df)(__m512d)(X), (int)(M),	\
6240					    (__v8df)(__m512d)(W), (__mmask8)(U)))
6241
6242#define _mm512_maskz_permutex_pd(U, X, M)					\
6243  ((__m512d) __builtin_ia32_permdf512_mask ((__v8df)(__m512d)(X), (int)(M),	\
6244					    (__v8df)(__m512d)_mm512_setzero_pd(),\
6245					    (__mmask8)(U)))
6246
6247#define _mm512_permutex_epi64(X, I)			          \
6248  ((__m512i) __builtin_ia32_permdi512_mask ((__v8di)(__m512i)(X), \
6249					    (int)(I),             \
6250					    (__v8di)(__m512i)	  \
6251					    (_mm512_undefined_si512 ()),\
6252					    (__mmask8)(-1)))
6253
6254#define _mm512_maskz_permutex_epi64(M, X, I)                 \
6255  ((__m512i) __builtin_ia32_permdi512_mask ((__v8di)(__m512i)(X), \
6256					    (int)(I),             \
6257					    (__v8di)(__m512i)     \
6258					    (_mm512_setzero_si512 ()),\
6259					    (__mmask8)(M)))
6260
6261#define _mm512_mask_permutex_epi64(W, M, X, I)               \
6262  ((__m512i) __builtin_ia32_permdi512_mask ((__v8di)(__m512i)(X), \
6263					    (int)(I),             \
6264					    (__v8di)(__m512i)(W), \
6265					    (__mmask8)(M)))
6266#endif
6267
6268extern __inline __m512i
6269__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6270_mm512_maskz_permutexvar_epi64 (__mmask8 __M, __m512i __X, __m512i __Y)
6271{
6272  return (__m512i) __builtin_ia32_permvardi512_mask ((__v8di) __Y,
6273						     (__v8di) __X,
6274						     (__v8di)
6275						     _mm512_setzero_si512 (),
6276						     __M);
6277}
6278
6279extern __inline __m512i
6280__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6281_mm512_permutexvar_epi64 (__m512i __X, __m512i __Y)
6282{
6283  return (__m512i) __builtin_ia32_permvardi512_mask ((__v8di) __Y,
6284						     (__v8di) __X,
6285						     (__v8di)
6286						     _mm512_undefined_si512 (),
6287						     (__mmask8) -1);
6288}
6289
6290extern __inline __m512i
6291__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6292_mm512_mask_permutexvar_epi64 (__m512i __W, __mmask8 __M, __m512i __X,
6293			       __m512i __Y)
6294{
6295  return (__m512i) __builtin_ia32_permvardi512_mask ((__v8di) __Y,
6296						     (__v8di) __X,
6297						     (__v8di) __W,
6298						     __M);
6299}
6300
6301extern __inline __m512i
6302__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6303_mm512_maskz_permutexvar_epi32 (__mmask16 __M, __m512i __X, __m512i __Y)
6304{
6305  return (__m512i) __builtin_ia32_permvarsi512_mask ((__v16si) __Y,
6306						     (__v16si) __X,
6307						     (__v16si)
6308						     _mm512_setzero_si512 (),
6309						     __M);
6310}
6311
6312extern __inline __m512i
6313__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6314_mm512_permutexvar_epi32 (__m512i __X, __m512i __Y)
6315{
6316  return (__m512i) __builtin_ia32_permvarsi512_mask ((__v16si) __Y,
6317						     (__v16si) __X,
6318						     (__v16si)
6319						     _mm512_undefined_si512 (),
6320						     (__mmask16) -1);
6321}
6322
6323extern __inline __m512i
6324__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6325_mm512_mask_permutexvar_epi32 (__m512i __W, __mmask16 __M, __m512i __X,
6326			       __m512i __Y)
6327{
6328  return (__m512i) __builtin_ia32_permvarsi512_mask ((__v16si) __Y,
6329						     (__v16si) __X,
6330						     (__v16si) __W,
6331						     __M);
6332}
6333
6334extern __inline __m512d
6335__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6336_mm512_permutexvar_pd (__m512i __X, __m512d __Y)
6337{
6338  return (__m512d) __builtin_ia32_permvardf512_mask ((__v8df) __Y,
6339						     (__v8di) __X,
6340						     (__v8df)
6341						     _mm512_undefined_pd (),
6342						     (__mmask8) -1);
6343}
6344
6345extern __inline __m512d
6346__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6347_mm512_mask_permutexvar_pd (__m512d __W, __mmask8 __U, __m512i __X, __m512d __Y)
6348{
6349  return (__m512d) __builtin_ia32_permvardf512_mask ((__v8df) __Y,
6350						     (__v8di) __X,
6351						     (__v8df) __W,
6352						     (__mmask8) __U);
6353}
6354
6355extern __inline __m512d
6356__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6357_mm512_maskz_permutexvar_pd (__mmask8 __U, __m512i __X, __m512d __Y)
6358{
6359  return (__m512d) __builtin_ia32_permvardf512_mask ((__v8df) __Y,
6360						     (__v8di) __X,
6361						     (__v8df)
6362						     _mm512_setzero_pd (),
6363						     (__mmask8) __U);
6364}
6365
6366extern __inline __m512
6367__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6368_mm512_permutexvar_ps (__m512i __X, __m512 __Y)
6369{
6370  return (__m512) __builtin_ia32_permvarsf512_mask ((__v16sf) __Y,
6371						    (__v16si) __X,
6372						    (__v16sf)
6373						    _mm512_undefined_ps (),
6374						    (__mmask16) -1);
6375}
6376
6377extern __inline __m512
6378__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6379_mm512_mask_permutexvar_ps (__m512 __W, __mmask16 __U, __m512i __X, __m512 __Y)
6380{
6381  return (__m512) __builtin_ia32_permvarsf512_mask ((__v16sf) __Y,
6382						    (__v16si) __X,
6383						    (__v16sf) __W,
6384						    (__mmask16) __U);
6385}
6386
6387extern __inline __m512
6388__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6389_mm512_maskz_permutexvar_ps (__mmask16 __U, __m512i __X, __m512 __Y)
6390{
6391  return (__m512) __builtin_ia32_permvarsf512_mask ((__v16sf) __Y,
6392						    (__v16si) __X,
6393						    (__v16sf)
6394						    _mm512_setzero_ps (),
6395						    (__mmask16) __U);
6396}
6397
6398#ifdef __OPTIMIZE__
6399extern __inline __m512
6400__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6401_mm512_shuffle_ps (__m512 __M, __m512 __V, const int __imm)
6402{
6403  return (__m512) __builtin_ia32_shufps512_mask ((__v16sf) __M,
6404						 (__v16sf) __V, __imm,
6405						 (__v16sf)
6406						 _mm512_undefined_ps (),
6407						 (__mmask16) -1);
6408}
6409
6410extern __inline __m512
6411__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6412_mm512_mask_shuffle_ps (__m512 __W, __mmask16 __U, __m512 __M,
6413			__m512 __V, const int __imm)
6414{
6415  return (__m512) __builtin_ia32_shufps512_mask ((__v16sf) __M,
6416						 (__v16sf) __V, __imm,
6417						 (__v16sf) __W,
6418						 (__mmask16) __U);
6419}
6420
6421extern __inline __m512
6422__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6423_mm512_maskz_shuffle_ps (__mmask16 __U, __m512 __M, __m512 __V, const int __imm)
6424{
6425  return (__m512) __builtin_ia32_shufps512_mask ((__v16sf) __M,
6426						 (__v16sf) __V, __imm,
6427						 (__v16sf)
6428						 _mm512_setzero_ps (),
6429						 (__mmask16) __U);
6430}
6431
6432extern __inline __m512d
6433__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6434_mm512_shuffle_pd (__m512d __M, __m512d __V, const int __imm)
6435{
6436  return (__m512d) __builtin_ia32_shufpd512_mask ((__v8df) __M,
6437						  (__v8df) __V, __imm,
6438						  (__v8df)
6439						  _mm512_undefined_pd (),
6440						  (__mmask8) -1);
6441}
6442
6443extern __inline __m512d
6444__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6445_mm512_mask_shuffle_pd (__m512d __W, __mmask8 __U, __m512d __M,
6446			__m512d __V, const int __imm)
6447{
6448  return (__m512d) __builtin_ia32_shufpd512_mask ((__v8df) __M,
6449						  (__v8df) __V, __imm,
6450						  (__v8df) __W,
6451						  (__mmask8) __U);
6452}
6453
6454extern __inline __m512d
6455__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6456_mm512_maskz_shuffle_pd (__mmask8 __U, __m512d __M, __m512d __V,
6457			 const int __imm)
6458{
6459  return (__m512d) __builtin_ia32_shufpd512_mask ((__v8df) __M,
6460						  (__v8df) __V, __imm,
6461						  (__v8df)
6462						  _mm512_setzero_pd (),
6463						  (__mmask8) __U);
6464}
6465
6466extern __inline __m512d
6467__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6468_mm512_fixupimm_round_pd (__m512d __A, __m512d __B, __m512i __C,
6469			  const int __imm, const int __R)
6470{
6471  return (__m512d) __builtin_ia32_fixupimmpd512_mask ((__v8df) __A,
6472						      (__v8df) __B,
6473						      (__v8di) __C,
6474						      __imm,
6475						      (__mmask8) -1, __R);
6476}
6477
6478extern __inline __m512d
6479__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6480_mm512_mask_fixupimm_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
6481			       __m512i __C, const int __imm, const int __R)
6482{
6483  return (__m512d) __builtin_ia32_fixupimmpd512_mask ((__v8df) __A,
6484						      (__v8df) __B,
6485						      (__v8di) __C,
6486						      __imm,
6487						      (__mmask8) __U, __R);
6488}
6489
6490extern __inline __m512d
6491__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6492_mm512_maskz_fixupimm_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
6493				__m512i __C, const int __imm, const int __R)
6494{
6495  return (__m512d) __builtin_ia32_fixupimmpd512_maskz ((__v8df) __A,
6496						       (__v8df) __B,
6497						       (__v8di) __C,
6498						       __imm,
6499						       (__mmask8) __U, __R);
6500}
6501
6502extern __inline __m512
6503__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6504_mm512_fixupimm_round_ps (__m512 __A, __m512 __B, __m512i __C,
6505			  const int __imm, const int __R)
6506{
6507  return (__m512) __builtin_ia32_fixupimmps512_mask ((__v16sf) __A,
6508						     (__v16sf) __B,
6509						     (__v16si) __C,
6510						     __imm,
6511						     (__mmask16) -1, __R);
6512}
6513
6514extern __inline __m512
6515__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6516_mm512_mask_fixupimm_round_ps (__m512 __A, __mmask16 __U, __m512 __B,
6517			       __m512i __C, const int __imm, const int __R)
6518{
6519  return (__m512) __builtin_ia32_fixupimmps512_mask ((__v16sf) __A,
6520						     (__v16sf) __B,
6521						     (__v16si) __C,
6522						     __imm,
6523						     (__mmask16) __U, __R);
6524}
6525
6526extern __inline __m512
6527__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6528_mm512_maskz_fixupimm_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
6529				__m512i __C, const int __imm, const int __R)
6530{
6531  return (__m512) __builtin_ia32_fixupimmps512_maskz ((__v16sf) __A,
6532						      (__v16sf) __B,
6533						      (__v16si) __C,
6534						      __imm,
6535						      (__mmask16) __U, __R);
6536}
6537
6538extern __inline __m128d
6539__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6540_mm_fixupimm_round_sd (__m128d __A, __m128d __B, __m128i __C,
6541		       const int __imm, const int __R)
6542{
6543  return (__m128d) __builtin_ia32_fixupimmsd_mask ((__v2df) __A,
6544						   (__v2df) __B,
6545						   (__v2di) __C, __imm,
6546						   (__mmask8) -1, __R);
6547}
6548
6549extern __inline __m128d
6550__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6551_mm_mask_fixupimm_round_sd (__m128d __A, __mmask8 __U, __m128d __B,
6552			    __m128i __C, const int __imm, const int __R)
6553{
6554  return (__m128d) __builtin_ia32_fixupimmsd_mask ((__v2df) __A,
6555						   (__v2df) __B,
6556						   (__v2di) __C, __imm,
6557						   (__mmask8) __U, __R);
6558}
6559
6560extern __inline __m128d
6561__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6562_mm_maskz_fixupimm_round_sd (__mmask8 __U, __m128d __A, __m128d __B,
6563			     __m128i __C, const int __imm, const int __R)
6564{
6565  return (__m128d) __builtin_ia32_fixupimmsd_maskz ((__v2df) __A,
6566						    (__v2df) __B,
6567						    (__v2di) __C,
6568						    __imm,
6569						    (__mmask8) __U, __R);
6570}
6571
6572extern __inline __m128
6573__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6574_mm_fixupimm_round_ss (__m128 __A, __m128 __B, __m128i __C,
6575		       const int __imm, const int __R)
6576{
6577  return (__m128) __builtin_ia32_fixupimmss_mask ((__v4sf) __A,
6578						  (__v4sf) __B,
6579						  (__v4si) __C, __imm,
6580						  (__mmask8) -1, __R);
6581}
6582
6583extern __inline __m128
6584__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6585_mm_mask_fixupimm_round_ss (__m128 __A, __mmask8 __U, __m128 __B,
6586			    __m128i __C, const int __imm, const int __R)
6587{
6588  return (__m128) __builtin_ia32_fixupimmss_mask ((__v4sf) __A,
6589						  (__v4sf) __B,
6590						  (__v4si) __C, __imm,
6591						  (__mmask8) __U, __R);
6592}
6593
6594extern __inline __m128
6595__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6596_mm_maskz_fixupimm_round_ss (__mmask8 __U, __m128 __A, __m128 __B,
6597			     __m128i __C, const int __imm, const int __R)
6598{
6599  return (__m128) __builtin_ia32_fixupimmss_maskz ((__v4sf) __A,
6600						   (__v4sf) __B,
6601						   (__v4si) __C, __imm,
6602						   (__mmask8) __U, __R);
6603}
6604
6605#else
6606#define _mm512_shuffle_pd(X, Y, C)                                      \
6607    ((__m512d)__builtin_ia32_shufpd512_mask ((__v8df)(__m512d)(X),           \
6608        (__v8df)(__m512d)(Y), (int)(C),\
6609    (__v8df)(__m512d)_mm512_undefined_pd(),\
6610    (__mmask8)-1))
6611
6612#define _mm512_mask_shuffle_pd(W, U, X, Y, C)                           \
6613    ((__m512d)__builtin_ia32_shufpd512_mask ((__v8df)(__m512d)(X),           \
6614        (__v8df)(__m512d)(Y), (int)(C),\
6615    (__v8df)(__m512d)(W),\
6616    (__mmask8)(U)))
6617
6618#define _mm512_maskz_shuffle_pd(U, X, Y, C)                             \
6619    ((__m512d)__builtin_ia32_shufpd512_mask ((__v8df)(__m512d)(X),           \
6620        (__v8df)(__m512d)(Y), (int)(C),\
6621    (__v8df)(__m512d)_mm512_setzero_pd(),\
6622    (__mmask8)(U)))
6623
6624#define _mm512_shuffle_ps(X, Y, C)                                      \
6625    ((__m512)__builtin_ia32_shufps512_mask ((__v16sf)(__m512)(X),            \
6626        (__v16sf)(__m512)(Y), (int)(C),\
6627    (__v16sf)(__m512)_mm512_undefined_ps(),\
6628    (__mmask16)-1))
6629
6630#define _mm512_mask_shuffle_ps(W, U, X, Y, C)                           \
6631    ((__m512)__builtin_ia32_shufps512_mask ((__v16sf)(__m512)(X),            \
6632        (__v16sf)(__m512)(Y), (int)(C),\
6633    (__v16sf)(__m512)(W),\
6634    (__mmask16)(U)))
6635
6636#define _mm512_maskz_shuffle_ps(U, X, Y, C)                             \
6637    ((__m512)__builtin_ia32_shufps512_mask ((__v16sf)(__m512)(X),            \
6638        (__v16sf)(__m512)(Y), (int)(C),\
6639    (__v16sf)(__m512)_mm512_setzero_ps(),\
6640    (__mmask16)(U)))
6641
6642#define _mm512_fixupimm_round_pd(X, Y, Z, C, R)					\
6643  ((__m512d)__builtin_ia32_fixupimmpd512_mask ((__v8df)(__m512d)(X),	\
6644      (__v8df)(__m512d)(Y), (__v8di)(__m512i)(Z), (int)(C),		\
6645      (__mmask8)(-1), (R)))
6646
6647#define _mm512_mask_fixupimm_round_pd(X, U, Y, Z, C, R)                          \
6648  ((__m512d)__builtin_ia32_fixupimmpd512_mask ((__v8df)(__m512d)(X),    \
6649      (__v8df)(__m512d)(Y), (__v8di)(__m512i)(Z), (int)(C),             \
6650      (__mmask8)(U), (R)))
6651
6652#define _mm512_maskz_fixupimm_round_pd(U, X, Y, Z, C, R)                         \
6653  ((__m512d)__builtin_ia32_fixupimmpd512_maskz ((__v8df)(__m512d)(X),   \
6654      (__v8df)(__m512d)(Y), (__v8di)(__m512i)(Z), (int)(C),             \
6655      (__mmask8)(U), (R)))
6656
6657#define _mm512_fixupimm_round_ps(X, Y, Z, C, R)					\
6658  ((__m512)__builtin_ia32_fixupimmps512_mask ((__v16sf)(__m512)(X),	\
6659    (__v16sf)(__m512)(Y), (__v16si)(__m512i)(Z), (int)(C),		\
6660    (__mmask16)(-1), (R)))
6661
6662#define _mm512_mask_fixupimm_round_ps(X, U, Y, Z, C, R)                          \
6663  ((__m512)__builtin_ia32_fixupimmps512_mask ((__v16sf)(__m512)(X),     \
6664    (__v16sf)(__m512)(Y), (__v16si)(__m512i)(Z), (int)(C),              \
6665    (__mmask16)(U), (R)))
6666
6667#define _mm512_maskz_fixupimm_round_ps(U, X, Y, Z, C, R)                         \
6668  ((__m512)__builtin_ia32_fixupimmps512_maskz ((__v16sf)(__m512)(X),    \
6669    (__v16sf)(__m512)(Y), (__v16si)(__m512i)(Z), (int)(C),              \
6670    (__mmask16)(U), (R)))
6671
6672#define _mm_fixupimm_round_sd(X, Y, Z, C, R)					\
6673    ((__m128d)__builtin_ia32_fixupimmsd_mask ((__v2df)(__m128d)(X),	\
6674      (__v2df)(__m128d)(Y), (__v2di)(__m128i)(Z), (int)(C),		\
6675      (__mmask8)(-1), (R)))
6676
6677#define _mm_mask_fixupimm_round_sd(X, U, Y, Z, C, R)				\
6678    ((__m128d)__builtin_ia32_fixupimmsd_mask ((__v2df)(__m128d)(X),	\
6679      (__v2df)(__m128d)(Y), (__v2di)(__m128i)(Z), (int)(C),		\
6680      (__mmask8)(U), (R)))
6681
6682#define _mm_maskz_fixupimm_round_sd(U, X, Y, Z, C, R)				\
6683    ((__m128d)__builtin_ia32_fixupimmsd_maskz ((__v2df)(__m128d)(X),	\
6684      (__v2df)(__m128d)(Y), (__v2di)(__m128i)(Z), (int)(C),		\
6685      (__mmask8)(U), (R)))
6686
6687#define _mm_fixupimm_round_ss(X, Y, Z, C, R)					\
6688    ((__m128)__builtin_ia32_fixupimmss_mask ((__v4sf)(__m128)(X),	\
6689      (__v4sf)(__m128)(Y), (__v4si)(__m128i)(Z), (int)(C),		\
6690      (__mmask8)(-1), (R)))
6691
6692#define _mm_mask_fixupimm_round_ss(X, U, Y, Z, C, R)				\
6693    ((__m128)__builtin_ia32_fixupimmss_mask ((__v4sf)(__m128)(X),	\
6694      (__v4sf)(__m128)(Y), (__v4si)(__m128i)(Z), (int)(C),		\
6695      (__mmask8)(U), (R)))
6696
6697#define _mm_maskz_fixupimm_round_ss(U, X, Y, Z, C, R)				\
6698    ((__m128)__builtin_ia32_fixupimmss_maskz ((__v4sf)(__m128)(X),	\
6699      (__v4sf)(__m128)(Y), (__v4si)(__m128i)(Z), (int)(C),		\
6700      (__mmask8)(U), (R)))
6701#endif
6702
6703extern __inline __m512
6704__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6705_mm512_movehdup_ps (__m512 __A)
6706{
6707  return (__m512) __builtin_ia32_movshdup512_mask ((__v16sf) __A,
6708						   (__v16sf)
6709						   _mm512_undefined_ps (),
6710						   (__mmask16) -1);
6711}
6712
6713extern __inline __m512
6714__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6715_mm512_mask_movehdup_ps (__m512 __W, __mmask16 __U, __m512 __A)
6716{
6717  return (__m512) __builtin_ia32_movshdup512_mask ((__v16sf) __A,
6718						   (__v16sf) __W,
6719						   (__mmask16) __U);
6720}
6721
6722extern __inline __m512
6723__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6724_mm512_maskz_movehdup_ps (__mmask16 __U, __m512 __A)
6725{
6726  return (__m512) __builtin_ia32_movshdup512_mask ((__v16sf) __A,
6727						   (__v16sf)
6728						   _mm512_setzero_ps (),
6729						   (__mmask16) __U);
6730}
6731
6732extern __inline __m512
6733__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6734_mm512_moveldup_ps (__m512 __A)
6735{
6736  return (__m512) __builtin_ia32_movsldup512_mask ((__v16sf) __A,
6737						   (__v16sf)
6738						   _mm512_undefined_ps (),
6739						   (__mmask16) -1);
6740}
6741
6742extern __inline __m512
6743__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6744_mm512_mask_moveldup_ps (__m512 __W, __mmask16 __U, __m512 __A)
6745{
6746  return (__m512) __builtin_ia32_movsldup512_mask ((__v16sf) __A,
6747						   (__v16sf) __W,
6748						   (__mmask16) __U);
6749}
6750
6751extern __inline __m512
6752__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6753_mm512_maskz_moveldup_ps (__mmask16 __U, __m512 __A)
6754{
6755  return (__m512) __builtin_ia32_movsldup512_mask ((__v16sf) __A,
6756						   (__v16sf)
6757						   _mm512_setzero_ps (),
6758						   (__mmask16) __U);
6759}
6760
6761extern __inline __m512i
6762__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6763_mm512_or_si512 (__m512i __A, __m512i __B)
6764{
6765  return (__m512i) ((__v16su) __A | (__v16su) __B);
6766}
6767
6768extern __inline __m512i
6769__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6770_mm512_or_epi32 (__m512i __A, __m512i __B)
6771{
6772  return (__m512i) ((__v16su) __A | (__v16su) __B);
6773}
6774
6775extern __inline __m512i
6776__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6777_mm512_mask_or_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
6778{
6779  return (__m512i) __builtin_ia32_pord512_mask ((__v16si) __A,
6780						(__v16si) __B,
6781						(__v16si) __W,
6782						(__mmask16) __U);
6783}
6784
6785extern __inline __m512i
6786__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6787_mm512_maskz_or_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
6788{
6789  return (__m512i) __builtin_ia32_pord512_mask ((__v16si) __A,
6790						(__v16si) __B,
6791						(__v16si)
6792						_mm512_setzero_si512 (),
6793						(__mmask16) __U);
6794}
6795
6796extern __inline __m512i
6797__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6798_mm512_or_epi64 (__m512i __A, __m512i __B)
6799{
6800  return (__m512i) ((__v8du) __A | (__v8du) __B);
6801}
6802
6803extern __inline __m512i
6804__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6805_mm512_mask_or_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
6806{
6807  return (__m512i) __builtin_ia32_porq512_mask ((__v8di) __A,
6808						(__v8di) __B,
6809						(__v8di) __W,
6810						(__mmask8) __U);
6811}
6812
6813extern __inline __m512i
6814__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6815_mm512_maskz_or_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
6816{
6817  return (__m512i) __builtin_ia32_porq512_mask ((__v8di) __A,
6818						(__v8di) __B,
6819						(__v8di)
6820						_mm512_setzero_si512 (),
6821						(__mmask8) __U);
6822}
6823
6824extern __inline __m512i
6825__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6826_mm512_xor_si512 (__m512i __A, __m512i __B)
6827{
6828  return (__m512i) ((__v16su) __A ^ (__v16su) __B);
6829}
6830
6831extern __inline __m512i
6832__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6833_mm512_xor_epi32 (__m512i __A, __m512i __B)
6834{
6835  return (__m512i) ((__v16su) __A ^ (__v16su) __B);
6836}
6837
6838extern __inline __m512i
6839__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6840_mm512_mask_xor_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
6841{
6842  return (__m512i) __builtin_ia32_pxord512_mask ((__v16si) __A,
6843						 (__v16si) __B,
6844						 (__v16si) __W,
6845						 (__mmask16) __U);
6846}
6847
6848extern __inline __m512i
6849__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6850_mm512_maskz_xor_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
6851{
6852  return (__m512i) __builtin_ia32_pxord512_mask ((__v16si) __A,
6853						 (__v16si) __B,
6854						 (__v16si)
6855						 _mm512_setzero_si512 (),
6856						 (__mmask16) __U);
6857}
6858
6859extern __inline __m512i
6860__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6861_mm512_xor_epi64 (__m512i __A, __m512i __B)
6862{
6863  return (__m512i) ((__v8du) __A ^ (__v8du) __B);
6864}
6865
6866extern __inline __m512i
6867__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6868_mm512_mask_xor_epi64 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
6869{
6870  return (__m512i) __builtin_ia32_pxorq512_mask ((__v8di) __A,
6871						 (__v8di) __B,
6872						 (__v8di) __W,
6873						 (__mmask8) __U);
6874}
6875
6876extern __inline __m512i
6877__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6878_mm512_maskz_xor_epi64 (__mmask16 __U, __m512i __A, __m512i __B)
6879{
6880  return (__m512i) __builtin_ia32_pxorq512_mask ((__v8di) __A,
6881						 (__v8di) __B,
6882						 (__v8di)
6883						 _mm512_setzero_si512 (),
6884						 (__mmask8) __U);
6885}
6886
6887#ifdef __OPTIMIZE__
6888extern __inline __m512i
6889__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6890_mm512_rol_epi32 (__m512i __A, const int __B)
6891{
6892  return (__m512i) __builtin_ia32_prold512_mask ((__v16si) __A, __B,
6893						 (__v16si)
6894						 _mm512_undefined_si512 (),
6895						 (__mmask16) -1);
6896}
6897
6898extern __inline __m512i
6899__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6900_mm512_mask_rol_epi32 (__m512i __W, __mmask16 __U, __m512i __A, const int __B)
6901{
6902  return (__m512i) __builtin_ia32_prold512_mask ((__v16si) __A, __B,
6903						 (__v16si) __W,
6904						 (__mmask16) __U);
6905}
6906
6907extern __inline __m512i
6908__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6909_mm512_maskz_rol_epi32 (__mmask16 __U, __m512i __A, const int __B)
6910{
6911  return (__m512i) __builtin_ia32_prold512_mask ((__v16si) __A, __B,
6912						 (__v16si)
6913						 _mm512_setzero_si512 (),
6914						 (__mmask16) __U);
6915}
6916
6917extern __inline __m512i
6918__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6919_mm512_ror_epi32 (__m512i __A, int __B)
6920{
6921  return (__m512i) __builtin_ia32_prord512_mask ((__v16si) __A, __B,
6922						 (__v16si)
6923						 _mm512_undefined_si512 (),
6924						 (__mmask16) -1);
6925}
6926
6927extern __inline __m512i
6928__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6929_mm512_mask_ror_epi32 (__m512i __W, __mmask16 __U, __m512i __A, int __B)
6930{
6931  return (__m512i) __builtin_ia32_prord512_mask ((__v16si) __A, __B,
6932						 (__v16si) __W,
6933						 (__mmask16) __U);
6934}
6935
6936extern __inline __m512i
6937__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6938_mm512_maskz_ror_epi32 (__mmask16 __U, __m512i __A, int __B)
6939{
6940  return (__m512i) __builtin_ia32_prord512_mask ((__v16si) __A, __B,
6941						 (__v16si)
6942						 _mm512_setzero_si512 (),
6943						 (__mmask16) __U);
6944}
6945
6946extern __inline __m512i
6947__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6948_mm512_rol_epi64 (__m512i __A, const int __B)
6949{
6950  return (__m512i) __builtin_ia32_prolq512_mask ((__v8di) __A, __B,
6951						 (__v8di)
6952						 _mm512_undefined_si512 (),
6953						 (__mmask8) -1);
6954}
6955
6956extern __inline __m512i
6957__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6958_mm512_mask_rol_epi64 (__m512i __W, __mmask8 __U, __m512i __A, const int __B)
6959{
6960  return (__m512i) __builtin_ia32_prolq512_mask ((__v8di) __A, __B,
6961						 (__v8di) __W,
6962						 (__mmask8) __U);
6963}
6964
6965extern __inline __m512i
6966__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6967_mm512_maskz_rol_epi64 (__mmask8 __U, __m512i __A, const int __B)
6968{
6969  return (__m512i) __builtin_ia32_prolq512_mask ((__v8di) __A, __B,
6970						 (__v8di)
6971						 _mm512_setzero_si512 (),
6972						 (__mmask8) __U);
6973}
6974
6975extern __inline __m512i
6976__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6977_mm512_ror_epi64 (__m512i __A, int __B)
6978{
6979  return (__m512i) __builtin_ia32_prorq512_mask ((__v8di) __A, __B,
6980						 (__v8di)
6981						 _mm512_undefined_si512 (),
6982						 (__mmask8) -1);
6983}
6984
6985extern __inline __m512i
6986__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6987_mm512_mask_ror_epi64 (__m512i __W, __mmask8 __U, __m512i __A, int __B)
6988{
6989  return (__m512i) __builtin_ia32_prorq512_mask ((__v8di) __A, __B,
6990						 (__v8di) __W,
6991						 (__mmask8) __U);
6992}
6993
6994extern __inline __m512i
6995__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6996_mm512_maskz_ror_epi64 (__mmask8 __U, __m512i __A, int __B)
6997{
6998  return (__m512i) __builtin_ia32_prorq512_mask ((__v8di) __A, __B,
6999						 (__v8di)
7000						 _mm512_setzero_si512 (),
7001						 (__mmask8) __U);
7002}
7003
7004#else
7005#define _mm512_rol_epi32(A, B)						  \
7006    ((__m512i)__builtin_ia32_prold512_mask ((__v16si)(__m512i)(A),	  \
7007					    (int)(B),			  \
7008					    (__v16si)_mm512_undefined_si512 (), \
7009					    (__mmask16)(-1)))
7010#define _mm512_mask_rol_epi32(W, U, A, B)				  \
7011    ((__m512i)__builtin_ia32_prold512_mask ((__v16si)(__m512i)(A),	  \
7012					    (int)(B),			  \
7013					    (__v16si)(__m512i)(W),	  \
7014					    (__mmask16)(U)))
7015#define _mm512_maskz_rol_epi32(U, A, B)					  \
7016    ((__m512i)__builtin_ia32_prold512_mask ((__v16si)(__m512i)(A),	  \
7017					    (int)(B),			  \
7018					    (__v16si)_mm512_setzero_si512 (), \
7019					    (__mmask16)(U)))
7020#define _mm512_ror_epi32(A, B)						  \
7021    ((__m512i)__builtin_ia32_prord512_mask ((__v16si)(__m512i)(A),	  \
7022					    (int)(B),			  \
7023					    (__v16si)_mm512_undefined_si512 (), \
7024					    (__mmask16)(-1)))
7025#define _mm512_mask_ror_epi32(W, U, A, B)				  \
7026    ((__m512i)__builtin_ia32_prord512_mask ((__v16si)(__m512i)(A),	  \
7027					    (int)(B),			  \
7028					    (__v16si)(__m512i)(W),	  \
7029					    (__mmask16)(U)))
7030#define _mm512_maskz_ror_epi32(U, A, B)					  \
7031    ((__m512i)__builtin_ia32_prord512_mask ((__v16si)(__m512i)(A),	  \
7032					    (int)(B),			  \
7033					    (__v16si)_mm512_setzero_si512 (), \
7034					    (__mmask16)(U)))
7035#define _mm512_rol_epi64(A, B)						  \
7036    ((__m512i)__builtin_ia32_prolq512_mask ((__v8di)(__m512i)(A),	  \
7037					    (int)(B),			  \
7038					    (__v8di)_mm512_undefined_si512 (),  \
7039					    (__mmask8)(-1)))
7040#define _mm512_mask_rol_epi64(W, U, A, B)				  \
7041    ((__m512i)__builtin_ia32_prolq512_mask ((__v8di)(__m512i)(A),	  \
7042					    (int)(B),			  \
7043					    (__v8di)(__m512i)(W),	  \
7044					    (__mmask8)(U)))
7045#define _mm512_maskz_rol_epi64(U, A, B)					  \
7046    ((__m512i)__builtin_ia32_prolq512_mask ((__v8di)(__m512i)(A),	  \
7047					    (int)(B),			  \
7048					    (__v8di)_mm512_setzero_si512 (),  \
7049					    (__mmask8)(U)))
7050
7051#define _mm512_ror_epi64(A, B)						  \
7052    ((__m512i)__builtin_ia32_prorq512_mask ((__v8di)(__m512i)(A),	  \
7053					    (int)(B),			  \
7054					    (__v8di)_mm512_undefined_si512 (),  \
7055					    (__mmask8)(-1)))
7056#define _mm512_mask_ror_epi64(W, U, A, B)				  \
7057    ((__m512i)__builtin_ia32_prorq512_mask ((__v8di)(__m512i)(A),	  \
7058					    (int)(B),			  \
7059					    (__v8di)(__m512i)(W),	  \
7060					    (__mmask8)(U)))
7061#define _mm512_maskz_ror_epi64(U, A, B)					  \
7062    ((__m512i)__builtin_ia32_prorq512_mask ((__v8di)(__m512i)(A),	  \
7063					    (int)(B),			  \
7064					    (__v8di)_mm512_setzero_si512 (),  \
7065					    (__mmask8)(U)))
7066#endif
7067
7068extern __inline __m512i
7069__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7070_mm512_and_si512 (__m512i __A, __m512i __B)
7071{
7072  return (__m512i) ((__v16su) __A & (__v16su) __B);
7073}
7074
7075extern __inline __m512i
7076__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7077_mm512_and_epi32 (__m512i __A, __m512i __B)
7078{
7079  return (__m512i) ((__v16su) __A & (__v16su) __B);
7080}
7081
7082extern __inline __m512i
7083__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7084_mm512_mask_and_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
7085{
7086  return (__m512i) __builtin_ia32_pandd512_mask ((__v16si) __A,
7087						 (__v16si) __B,
7088						 (__v16si) __W,
7089						 (__mmask16) __U);
7090}
7091
7092extern __inline __m512i
7093__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7094_mm512_maskz_and_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
7095{
7096  return (__m512i) __builtin_ia32_pandd512_mask ((__v16si) __A,
7097						 (__v16si) __B,
7098						 (__v16si)
7099						 _mm512_setzero_si512 (),
7100						 (__mmask16) __U);
7101}
7102
7103extern __inline __m512i
7104__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7105_mm512_and_epi64 (__m512i __A, __m512i __B)
7106{
7107  return (__m512i) ((__v8du) __A & (__v8du) __B);
7108}
7109
7110extern __inline __m512i
7111__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7112_mm512_mask_and_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
7113{
7114  return (__m512i) __builtin_ia32_pandq512_mask ((__v8di) __A,
7115						 (__v8di) __B,
7116						 (__v8di) __W, __U);
7117}
7118
7119extern __inline __m512i
7120__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7121_mm512_maskz_and_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
7122{
7123  return (__m512i) __builtin_ia32_pandq512_mask ((__v8di) __A,
7124						 (__v8di) __B,
7125						 (__v8di)
7126						 _mm512_setzero_pd (),
7127						 __U);
7128}
7129
7130extern __inline __m512i
7131__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7132_mm512_andnot_si512 (__m512i __A, __m512i __B)
7133{
7134  return (__m512i) __builtin_ia32_pandnd512_mask ((__v16si) __A,
7135						  (__v16si) __B,
7136						  (__v16si)
7137						  _mm512_undefined_si512 (),
7138						  (__mmask16) -1);
7139}
7140
7141extern __inline __m512i
7142__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7143_mm512_andnot_epi32 (__m512i __A, __m512i __B)
7144{
7145  return (__m512i) __builtin_ia32_pandnd512_mask ((__v16si) __A,
7146						  (__v16si) __B,
7147						  (__v16si)
7148						  _mm512_undefined_si512 (),
7149						  (__mmask16) -1);
7150}
7151
7152extern __inline __m512i
7153__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7154_mm512_mask_andnot_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
7155{
7156  return (__m512i) __builtin_ia32_pandnd512_mask ((__v16si) __A,
7157						  (__v16si) __B,
7158						  (__v16si) __W,
7159						  (__mmask16) __U);
7160}
7161
7162extern __inline __m512i
7163__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7164_mm512_maskz_andnot_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
7165{
7166  return (__m512i) __builtin_ia32_pandnd512_mask ((__v16si) __A,
7167						  (__v16si) __B,
7168						  (__v16si)
7169						  _mm512_setzero_si512 (),
7170						  (__mmask16) __U);
7171}
7172
7173extern __inline __m512i
7174__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7175_mm512_andnot_epi64 (__m512i __A, __m512i __B)
7176{
7177  return (__m512i) __builtin_ia32_pandnq512_mask ((__v8di) __A,
7178						  (__v8di) __B,
7179						  (__v8di)
7180						  _mm512_undefined_si512 (),
7181						  (__mmask8) -1);
7182}
7183
7184extern __inline __m512i
7185__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7186_mm512_mask_andnot_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
7187{
7188  return (__m512i) __builtin_ia32_pandnq512_mask ((__v8di) __A,
7189						  (__v8di) __B,
7190						  (__v8di) __W, __U);
7191}
7192
7193extern __inline __m512i
7194__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7195_mm512_maskz_andnot_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
7196{
7197  return (__m512i) __builtin_ia32_pandnq512_mask ((__v8di) __A,
7198						  (__v8di) __B,
7199						  (__v8di)
7200						  _mm512_setzero_pd (),
7201						  __U);
7202}
7203
7204extern __inline __mmask16
7205__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7206_mm512_test_epi32_mask (__m512i __A, __m512i __B)
7207{
7208  return (__mmask16) __builtin_ia32_ptestmd512 ((__v16si) __A,
7209						(__v16si) __B,
7210						(__mmask16) -1);
7211}
7212
7213extern __inline __mmask16
7214__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7215_mm512_mask_test_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B)
7216{
7217  return (__mmask16) __builtin_ia32_ptestmd512 ((__v16si) __A,
7218						(__v16si) __B, __U);
7219}
7220
7221extern __inline __mmask8
7222__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7223_mm512_test_epi64_mask (__m512i __A, __m512i __B)
7224{
7225  return (__mmask8) __builtin_ia32_ptestmq512 ((__v8di) __A,
7226					       (__v8di) __B,
7227					       (__mmask8) -1);
7228}
7229
7230extern __inline __mmask8
7231__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7232_mm512_mask_test_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B)
7233{
7234  return (__mmask8) __builtin_ia32_ptestmq512 ((__v8di) __A, (__v8di) __B, __U);
7235}
7236
7237extern __inline __mmask16
7238__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7239_mm512_testn_epi32_mask (__m512i __A, __m512i __B)
7240{
7241  return (__mmask16) __builtin_ia32_ptestnmd512 ((__v16si) __A,
7242						 (__v16si) __B,
7243						 (__mmask16) -1);
7244}
7245
7246extern __inline __mmask16
7247__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7248_mm512_mask_testn_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B)
7249{
7250  return (__mmask16) __builtin_ia32_ptestnmd512 ((__v16si) __A,
7251						 (__v16si) __B, __U);
7252}
7253
7254extern __inline __mmask8
7255__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7256_mm512_testn_epi64_mask (__m512i __A, __m512i __B)
7257{
7258  return (__mmask8) __builtin_ia32_ptestnmq512 ((__v8di) __A,
7259						(__v8di) __B,
7260						(__mmask8) -1);
7261}
7262
7263extern __inline __mmask8
7264__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7265_mm512_mask_testn_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B)
7266{
7267  return (__mmask8) __builtin_ia32_ptestnmq512 ((__v8di) __A,
7268						(__v8di) __B, __U);
7269}
7270
7271extern __inline __m512i
7272__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7273_mm512_unpackhi_epi32 (__m512i __A, __m512i __B)
7274{
7275  return (__m512i) __builtin_ia32_punpckhdq512_mask ((__v16si) __A,
7276						     (__v16si) __B,
7277						     (__v16si)
7278						     _mm512_undefined_si512 (),
7279						     (__mmask16) -1);
7280}
7281
7282extern __inline __m512i
7283__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7284_mm512_mask_unpackhi_epi32 (__m512i __W, __mmask16 __U, __m512i __A,
7285			    __m512i __B)
7286{
7287  return (__m512i) __builtin_ia32_punpckhdq512_mask ((__v16si) __A,
7288						     (__v16si) __B,
7289						     (__v16si) __W,
7290						     (__mmask16) __U);
7291}
7292
7293extern __inline __m512i
7294__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7295_mm512_maskz_unpackhi_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
7296{
7297  return (__m512i) __builtin_ia32_punpckhdq512_mask ((__v16si) __A,
7298						     (__v16si) __B,
7299						     (__v16si)
7300						     _mm512_setzero_si512 (),
7301						     (__mmask16) __U);
7302}
7303
7304extern __inline __m512i
7305__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7306_mm512_unpackhi_epi64 (__m512i __A, __m512i __B)
7307{
7308  return (__m512i) __builtin_ia32_punpckhqdq512_mask ((__v8di) __A,
7309						      (__v8di) __B,
7310						      (__v8di)
7311						      _mm512_undefined_si512 (),
7312						      (__mmask8) -1);
7313}
7314
7315extern __inline __m512i
7316__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7317_mm512_mask_unpackhi_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
7318{
7319  return (__m512i) __builtin_ia32_punpckhqdq512_mask ((__v8di) __A,
7320						      (__v8di) __B,
7321						      (__v8di) __W,
7322						      (__mmask8) __U);
7323}
7324
7325extern __inline __m512i
7326__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7327_mm512_maskz_unpackhi_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
7328{
7329  return (__m512i) __builtin_ia32_punpckhqdq512_mask ((__v8di) __A,
7330						      (__v8di) __B,
7331						      (__v8di)
7332						      _mm512_setzero_si512 (),
7333						      (__mmask8) __U);
7334}
7335
7336extern __inline __m512i
7337__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7338_mm512_unpacklo_epi32 (__m512i __A, __m512i __B)
7339{
7340  return (__m512i) __builtin_ia32_punpckldq512_mask ((__v16si) __A,
7341						     (__v16si) __B,
7342						     (__v16si)
7343						     _mm512_undefined_si512 (),
7344						     (__mmask16) -1);
7345}
7346
7347extern __inline __m512i
7348__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7349_mm512_mask_unpacklo_epi32 (__m512i __W, __mmask16 __U, __m512i __A,
7350			    __m512i __B)
7351{
7352  return (__m512i) __builtin_ia32_punpckldq512_mask ((__v16si) __A,
7353						     (__v16si) __B,
7354						     (__v16si) __W,
7355						     (__mmask16) __U);
7356}
7357
7358extern __inline __m512i
7359__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7360_mm512_maskz_unpacklo_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
7361{
7362  return (__m512i) __builtin_ia32_punpckldq512_mask ((__v16si) __A,
7363						     (__v16si) __B,
7364						     (__v16si)
7365						     _mm512_setzero_si512 (),
7366						     (__mmask16) __U);
7367}
7368
7369extern __inline __m512i
7370__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7371_mm512_unpacklo_epi64 (__m512i __A, __m512i __B)
7372{
7373  return (__m512i) __builtin_ia32_punpcklqdq512_mask ((__v8di) __A,
7374						      (__v8di) __B,
7375						      (__v8di)
7376						      _mm512_undefined_si512 (),
7377						      (__mmask8) -1);
7378}
7379
7380extern __inline __m512i
7381__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7382_mm512_mask_unpacklo_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
7383{
7384  return (__m512i) __builtin_ia32_punpcklqdq512_mask ((__v8di) __A,
7385						      (__v8di) __B,
7386						      (__v8di) __W,
7387						      (__mmask8) __U);
7388}
7389
7390extern __inline __m512i
7391__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7392_mm512_maskz_unpacklo_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
7393{
7394  return (__m512i) __builtin_ia32_punpcklqdq512_mask ((__v8di) __A,
7395						      (__v8di) __B,
7396						      (__v8di)
7397						      _mm512_setzero_si512 (),
7398						      (__mmask8) __U);
7399}
7400
7401#ifdef __x86_64__
7402#ifdef __OPTIMIZE__
7403extern __inline unsigned long long
7404__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7405_mm_cvt_roundss_u64 (__m128 __A, const int __R)
7406{
7407  return (unsigned long long) __builtin_ia32_vcvtss2usi64 ((__v4sf) __A, __R);
7408}
7409
7410extern __inline long long
7411__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7412_mm_cvt_roundss_si64 (__m128 __A, const int __R)
7413{
7414  return (long long) __builtin_ia32_vcvtss2si64 ((__v4sf) __A, __R);
7415}
7416
7417extern __inline long long
7418__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7419_mm_cvt_roundss_i64 (__m128 __A, const int __R)
7420{
7421  return (long long) __builtin_ia32_vcvtss2si64 ((__v4sf) __A, __R);
7422}
7423
7424extern __inline unsigned long long
7425__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7426_mm_cvtt_roundss_u64 (__m128 __A, const int __R)
7427{
7428  return (unsigned long long) __builtin_ia32_vcvttss2usi64 ((__v4sf) __A, __R);
7429}
7430
7431extern __inline long long
7432__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7433_mm_cvtt_roundss_i64 (__m128 __A, const int __R)
7434{
7435  return (long long) __builtin_ia32_vcvttss2si64 ((__v4sf) __A, __R);
7436}
7437
7438extern __inline long long
7439__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7440_mm_cvtt_roundss_si64 (__m128 __A, const int __R)
7441{
7442  return (long long) __builtin_ia32_vcvttss2si64 ((__v4sf) __A, __R);
7443}
7444#else
7445#define _mm_cvt_roundss_u64(A, B)   \
7446    ((unsigned long long)__builtin_ia32_vcvtss2usi64(A, B))
7447
7448#define _mm_cvt_roundss_si64(A, B)   \
7449    ((long long)__builtin_ia32_vcvtss2si64(A, B))
7450
7451#define _mm_cvt_roundss_i64(A, B)   \
7452    ((long long)__builtin_ia32_vcvtss2si64(A, B))
7453
7454#define _mm_cvtt_roundss_u64(A, B)  \
7455    ((unsigned long long)__builtin_ia32_vcvttss2usi64(A, B))
7456
7457#define _mm_cvtt_roundss_i64(A, B)  \
7458    ((long long)__builtin_ia32_vcvttss2si64(A, B))
7459
7460#define _mm_cvtt_roundss_si64(A, B)  \
7461    ((long long)__builtin_ia32_vcvttss2si64(A, B))
7462#endif
7463#endif
7464
7465#ifdef __OPTIMIZE__
7466extern __inline unsigned
7467__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7468_mm_cvt_roundss_u32 (__m128 __A, const int __R)
7469{
7470  return (unsigned) __builtin_ia32_vcvtss2usi32 ((__v4sf) __A, __R);
7471}
7472
7473extern __inline int
7474__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7475_mm_cvt_roundss_si32 (__m128 __A, const int __R)
7476{
7477  return (int) __builtin_ia32_vcvtss2si32 ((__v4sf) __A, __R);
7478}
7479
7480extern __inline int
7481__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7482_mm_cvt_roundss_i32 (__m128 __A, const int __R)
7483{
7484  return (int) __builtin_ia32_vcvtss2si32 ((__v4sf) __A, __R);
7485}
7486
7487extern __inline unsigned
7488__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7489_mm_cvtt_roundss_u32 (__m128 __A, const int __R)
7490{
7491  return (unsigned) __builtin_ia32_vcvttss2usi32 ((__v4sf) __A, __R);
7492}
7493
7494extern __inline int
7495__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7496_mm_cvtt_roundss_i32 (__m128 __A, const int __R)
7497{
7498  return (int) __builtin_ia32_vcvttss2si32 ((__v4sf) __A, __R);
7499}
7500
7501extern __inline int
7502__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7503_mm_cvtt_roundss_si32 (__m128 __A, const int __R)
7504{
7505  return (int) __builtin_ia32_vcvttss2si32 ((__v4sf) __A, __R);
7506}
7507#else
7508#define _mm_cvt_roundss_u32(A, B)   \
7509    ((unsigned)__builtin_ia32_vcvtss2usi32(A, B))
7510
7511#define _mm_cvt_roundss_si32(A, B)   \
7512    ((int)__builtin_ia32_vcvtss2si32(A, B))
7513
7514#define _mm_cvt_roundss_i32(A, B)   \
7515    ((int)__builtin_ia32_vcvtss2si32(A, B))
7516
7517#define _mm_cvtt_roundss_u32(A, B)  \
7518    ((unsigned)__builtin_ia32_vcvttss2usi32(A, B))
7519
7520#define _mm_cvtt_roundss_si32(A, B)  \
7521    ((int)__builtin_ia32_vcvttss2si32(A, B))
7522
7523#define _mm_cvtt_roundss_i32(A, B)  \
7524    ((int)__builtin_ia32_vcvttss2si32(A, B))
7525#endif
7526
7527#ifdef __x86_64__
7528#ifdef __OPTIMIZE__
7529extern __inline unsigned long long
7530__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7531_mm_cvt_roundsd_u64 (__m128d __A, const int __R)
7532{
7533  return (unsigned long long) __builtin_ia32_vcvtsd2usi64 ((__v2df) __A, __R);
7534}
7535
7536extern __inline long long
7537__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7538_mm_cvt_roundsd_si64 (__m128d __A, const int __R)
7539{
7540  return (long long) __builtin_ia32_vcvtsd2si64 ((__v2df) __A, __R);
7541}
7542
7543extern __inline long long
7544__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7545_mm_cvt_roundsd_i64 (__m128d __A, const int __R)
7546{
7547  return (long long) __builtin_ia32_vcvtsd2si64 ((__v2df) __A, __R);
7548}
7549
7550extern __inline unsigned long long
7551__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7552_mm_cvtt_roundsd_u64 (__m128d __A, const int __R)
7553{
7554  return (unsigned long long) __builtin_ia32_vcvttsd2usi64 ((__v2df) __A, __R);
7555}
7556
7557extern __inline long long
7558__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7559_mm_cvtt_roundsd_si64 (__m128d __A, const int __R)
7560{
7561  return (long long) __builtin_ia32_vcvttsd2si64 ((__v2df) __A, __R);
7562}
7563
7564extern __inline long long
7565__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7566_mm_cvtt_roundsd_i64 (__m128d __A, const int __R)
7567{
7568  return (long long) __builtin_ia32_vcvttsd2si64 ((__v2df) __A, __R);
7569}
7570#else
7571#define _mm_cvt_roundsd_u64(A, B)   \
7572    ((unsigned long long)__builtin_ia32_vcvtsd2usi64(A, B))
7573
7574#define _mm_cvt_roundsd_si64(A, B)   \
7575    ((long long)__builtin_ia32_vcvtsd2si64(A, B))
7576
7577#define _mm_cvt_roundsd_i64(A, B)   \
7578    ((long long)__builtin_ia32_vcvtsd2si64(A, B))
7579
7580#define _mm_cvtt_roundsd_u64(A, B)   \
7581    ((unsigned long long)__builtin_ia32_vcvttsd2usi64(A, B))
7582
7583#define _mm_cvtt_roundsd_si64(A, B)   \
7584    ((long long)__builtin_ia32_vcvttsd2si64(A, B))
7585
7586#define _mm_cvtt_roundsd_i64(A, B)   \
7587    ((long long)__builtin_ia32_vcvttsd2si64(A, B))
7588#endif
7589#endif
7590
7591#ifdef __OPTIMIZE__
7592extern __inline unsigned
7593__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7594_mm_cvt_roundsd_u32 (__m128d __A, const int __R)
7595{
7596  return (unsigned) __builtin_ia32_vcvtsd2usi32 ((__v2df) __A, __R);
7597}
7598
7599extern __inline int
7600__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7601_mm_cvt_roundsd_si32 (__m128d __A, const int __R)
7602{
7603  return (int) __builtin_ia32_vcvtsd2si32 ((__v2df) __A, __R);
7604}
7605
7606extern __inline int
7607__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7608_mm_cvt_roundsd_i32 (__m128d __A, const int __R)
7609{
7610  return (int) __builtin_ia32_vcvtsd2si32 ((__v2df) __A, __R);
7611}
7612
7613extern __inline unsigned
7614__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7615_mm_cvtt_roundsd_u32 (__m128d __A, const int __R)
7616{
7617  return (unsigned) __builtin_ia32_vcvttsd2usi32 ((__v2df) __A, __R);
7618}
7619
7620extern __inline int
7621__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7622_mm_cvtt_roundsd_i32 (__m128d __A, const int __R)
7623{
7624  return (int) __builtin_ia32_vcvttsd2si32 ((__v2df) __A, __R);
7625}
7626
7627extern __inline int
7628__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7629_mm_cvtt_roundsd_si32 (__m128d __A, const int __R)
7630{
7631  return (int) __builtin_ia32_vcvttsd2si32 ((__v2df) __A, __R);
7632}
7633#else
7634#define _mm_cvt_roundsd_u32(A, B)   \
7635    ((unsigned)__builtin_ia32_vcvtsd2usi32(A, B))
7636
7637#define _mm_cvt_roundsd_si32(A, B)   \
7638    ((int)__builtin_ia32_vcvtsd2si32(A, B))
7639
7640#define _mm_cvt_roundsd_i32(A, B)   \
7641    ((int)__builtin_ia32_vcvtsd2si32(A, B))
7642
7643#define _mm_cvtt_roundsd_u32(A, B)   \
7644    ((unsigned)__builtin_ia32_vcvttsd2usi32(A, B))
7645
7646#define _mm_cvtt_roundsd_si32(A, B)   \
7647    ((int)__builtin_ia32_vcvttsd2si32(A, B))
7648
7649#define _mm_cvtt_roundsd_i32(A, B)   \
7650    ((int)__builtin_ia32_vcvttsd2si32(A, B))
7651#endif
7652
7653extern __inline __m512d
7654__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7655_mm512_movedup_pd (__m512d __A)
7656{
7657  return (__m512d) __builtin_ia32_movddup512_mask ((__v8df) __A,
7658						   (__v8df)
7659						   _mm512_undefined_pd (),
7660						   (__mmask8) -1);
7661}
7662
7663extern __inline __m512d
7664__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7665_mm512_mask_movedup_pd (__m512d __W, __mmask8 __U, __m512d __A)
7666{
7667  return (__m512d) __builtin_ia32_movddup512_mask ((__v8df) __A,
7668						   (__v8df) __W,
7669						   (__mmask8) __U);
7670}
7671
7672extern __inline __m512d
7673__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7674_mm512_maskz_movedup_pd (__mmask8 __U, __m512d __A)
7675{
7676  return (__m512d) __builtin_ia32_movddup512_mask ((__v8df) __A,
7677						   (__v8df)
7678						   _mm512_setzero_pd (),
7679						   (__mmask8) __U);
7680}
7681
7682extern __inline __m512d
7683__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7684_mm512_unpacklo_pd (__m512d __A, __m512d __B)
7685{
7686  return (__m512d) __builtin_ia32_unpcklpd512_mask ((__v8df) __A,
7687						    (__v8df) __B,
7688						    (__v8df)
7689						    _mm512_undefined_pd (),
7690						    (__mmask8) -1);
7691}
7692
7693extern __inline __m512d
7694__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7695_mm512_mask_unpacklo_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
7696{
7697  return (__m512d) __builtin_ia32_unpcklpd512_mask ((__v8df) __A,
7698						    (__v8df) __B,
7699						    (__v8df) __W,
7700						    (__mmask8) __U);
7701}
7702
7703extern __inline __m512d
7704__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7705_mm512_maskz_unpacklo_pd (__mmask8 __U, __m512d __A, __m512d __B)
7706{
7707  return (__m512d) __builtin_ia32_unpcklpd512_mask ((__v8df) __A,
7708						    (__v8df) __B,
7709						    (__v8df)
7710						    _mm512_setzero_pd (),
7711						    (__mmask8) __U);
7712}
7713
7714extern __inline __m512d
7715__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7716_mm512_unpackhi_pd (__m512d __A, __m512d __B)
7717{
7718  return (__m512d) __builtin_ia32_unpckhpd512_mask ((__v8df) __A,
7719						    (__v8df) __B,
7720						    (__v8df)
7721						    _mm512_undefined_pd (),
7722						    (__mmask8) -1);
7723}
7724
7725extern __inline __m512d
7726__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7727_mm512_mask_unpackhi_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
7728{
7729  return (__m512d) __builtin_ia32_unpckhpd512_mask ((__v8df) __A,
7730						    (__v8df) __B,
7731						    (__v8df) __W,
7732						    (__mmask8) __U);
7733}
7734
7735extern __inline __m512d
7736__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7737_mm512_maskz_unpackhi_pd (__mmask8 __U, __m512d __A, __m512d __B)
7738{
7739  return (__m512d) __builtin_ia32_unpckhpd512_mask ((__v8df) __A,
7740						    (__v8df) __B,
7741						    (__v8df)
7742						    _mm512_setzero_pd (),
7743						    (__mmask8) __U);
7744}
7745
7746extern __inline __m512
7747__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7748_mm512_unpackhi_ps (__m512 __A, __m512 __B)
7749{
7750  return (__m512) __builtin_ia32_unpckhps512_mask ((__v16sf) __A,
7751						   (__v16sf) __B,
7752						   (__v16sf)
7753						   _mm512_undefined_ps (),
7754						   (__mmask16) -1);
7755}
7756
7757extern __inline __m512
7758__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7759_mm512_mask_unpackhi_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
7760{
7761  return (__m512) __builtin_ia32_unpckhps512_mask ((__v16sf) __A,
7762						   (__v16sf) __B,
7763						   (__v16sf) __W,
7764						   (__mmask16) __U);
7765}
7766
7767extern __inline __m512
7768__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7769_mm512_maskz_unpackhi_ps (__mmask16 __U, __m512 __A, __m512 __B)
7770{
7771  return (__m512) __builtin_ia32_unpckhps512_mask ((__v16sf) __A,
7772						   (__v16sf) __B,
7773						   (__v16sf)
7774						   _mm512_setzero_ps (),
7775						   (__mmask16) __U);
7776}
7777
7778#ifdef __OPTIMIZE__
7779extern __inline __m512d
7780__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7781_mm512_cvt_roundps_pd (__m256 __A, const int __R)
7782{
7783  return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
7784						    (__v8df)
7785						    _mm512_undefined_pd (),
7786						    (__mmask8) -1, __R);
7787}
7788
7789extern __inline __m512d
7790__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7791_mm512_mask_cvt_roundps_pd (__m512d __W, __mmask8 __U, __m256 __A,
7792			    const int __R)
7793{
7794  return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
7795						    (__v8df) __W,
7796						    (__mmask8) __U, __R);
7797}
7798
7799extern __inline __m512d
7800__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7801_mm512_maskz_cvt_roundps_pd (__mmask8 __U, __m256 __A, const int __R)
7802{
7803  return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
7804						    (__v8df)
7805						    _mm512_setzero_pd (),
7806						    (__mmask8) __U, __R);
7807}
7808
7809extern __inline __m512
7810__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7811_mm512_cvt_roundph_ps (__m256i __A, const int __R)
7812{
7813  return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
7814						    (__v16sf)
7815						    _mm512_undefined_ps (),
7816						    (__mmask16) -1, __R);
7817}
7818
7819extern __inline __m512
7820__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7821_mm512_mask_cvt_roundph_ps (__m512 __W, __mmask16 __U, __m256i __A,
7822			    const int __R)
7823{
7824  return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
7825						    (__v16sf) __W,
7826						    (__mmask16) __U, __R);
7827}
7828
7829extern __inline __m512
7830__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7831_mm512_maskz_cvt_roundph_ps (__mmask16 __U, __m256i __A, const int __R)
7832{
7833  return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
7834						    (__v16sf)
7835						    _mm512_setzero_ps (),
7836						    (__mmask16) __U, __R);
7837}
7838
7839extern __inline __m256i
7840__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7841_mm512_cvt_roundps_ph (__m512 __A, const int __I)
7842{
7843  return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A,
7844						     __I,
7845						     (__v16hi)
7846						     _mm256_undefined_si256 (),
7847						     -1);
7848}
7849
7850extern __inline __m256i
7851__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7852_mm512_cvtps_ph (__m512 __A, const int __I)
7853{
7854  return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A,
7855						     __I,
7856						     (__v16hi)
7857						     _mm256_undefined_si256 (),
7858						     -1);
7859}
7860
7861extern __inline __m256i
7862__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7863_mm512_mask_cvt_roundps_ph (__m256i __U, __mmask16 __W, __m512 __A,
7864			    const int __I)
7865{
7866  return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A,
7867						     __I,
7868						     (__v16hi) __U,
7869						     (__mmask16) __W);
7870}
7871
7872extern __inline __m256i
7873__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7874_mm512_mask_cvtps_ph (__m256i __U, __mmask16 __W, __m512 __A, const int __I)
7875{
7876  return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A,
7877						     __I,
7878						     (__v16hi) __U,
7879						     (__mmask16) __W);
7880}
7881
7882extern __inline __m256i
7883__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7884_mm512_maskz_cvt_roundps_ph (__mmask16 __W, __m512 __A, const int __I)
7885{
7886  return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A,
7887						     __I,
7888						     (__v16hi)
7889						     _mm256_setzero_si256 (),
7890						     (__mmask16) __W);
7891}
7892
7893extern __inline __m256i
7894__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7895_mm512_maskz_cvtps_ph (__mmask16 __W, __m512 __A, const int __I)
7896{
7897  return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A,
7898						     __I,
7899						     (__v16hi)
7900						     _mm256_setzero_si256 (),
7901						     (__mmask16) __W);
7902}
7903#else
7904#define _mm512_cvt_roundps_pd(A, B)		 \
7905    (__m512d)__builtin_ia32_cvtps2pd512_mask(A, (__v8df)_mm512_undefined_pd(), -1, B)
7906
7907#define _mm512_mask_cvt_roundps_pd(W, U, A, B)   \
7908    (__m512d)__builtin_ia32_cvtps2pd512_mask(A, (__v8df)(W), U, B)
7909
7910#define _mm512_maskz_cvt_roundps_pd(U, A, B)     \
7911    (__m512d)__builtin_ia32_cvtps2pd512_mask(A, (__v8df)_mm512_setzero_pd(), U, B)
7912
7913#define _mm512_cvt_roundph_ps(A, B)		 \
7914    (__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(A), (__v16sf)_mm512_undefined_ps(), -1, B)
7915
7916#define _mm512_mask_cvt_roundph_ps(W, U, A, B)   \
7917    (__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(A), (__v16sf)(W), U, B)
7918
7919#define _mm512_maskz_cvt_roundph_ps(U, A, B)     \
7920    (__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(A), (__v16sf)_mm512_setzero_ps(), U, B)
7921
7922#define _mm512_cvt_roundps_ph(A, I)						 \
7923  ((__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf)(__m512) A, (int) (I),\
7924    (__v16hi)_mm256_undefined_si256 (), -1))
7925#define _mm512_cvtps_ph(A, I)						 \
7926  ((__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf)(__m512) A, (int) (I),\
7927    (__v16hi)_mm256_undefined_si256 (), -1))
7928#define _mm512_mask_cvt_roundps_ph(U, W, A, I)				 \
7929  ((__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf)(__m512) A, (int) (I),\
7930    (__v16hi)(__m256i)(U), (__mmask16) (W)))
7931#define _mm512_mask_cvtps_ph(U, W, A, I)				 \
7932  ((__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf)(__m512) A, (int) (I),\
7933    (__v16hi)(__m256i)(U), (__mmask16) (W)))
7934#define _mm512_maskz_cvt_roundps_ph(W, A, I)					 \
7935  ((__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf)(__m512) A, (int) (I),\
7936    (__v16hi)_mm256_setzero_si256 (), (__mmask16) (W)))
7937#define _mm512_maskz_cvtps_ph(W, A, I)					 \
7938  ((__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf)(__m512) A, (int) (I),\
7939    (__v16hi)_mm256_setzero_si256 (), (__mmask16) (W)))
7940#endif
7941
7942#ifdef __OPTIMIZE__
7943extern __inline __m256
7944__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7945_mm512_cvt_roundpd_ps (__m512d __A, const int __R)
7946{
7947  return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
7948						   (__v8sf)
7949						   _mm256_undefined_ps (),
7950						   (__mmask8) -1, __R);
7951}
7952
7953extern __inline __m256
7954__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7955_mm512_mask_cvt_roundpd_ps (__m256 __W, __mmask8 __U, __m512d __A,
7956			    const int __R)
7957{
7958  return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
7959						   (__v8sf) __W,
7960						   (__mmask8) __U, __R);
7961}
7962
7963extern __inline __m256
7964__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7965_mm512_maskz_cvt_roundpd_ps (__mmask8 __U, __m512d __A, const int __R)
7966{
7967  return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
7968						   (__v8sf)
7969						   _mm256_setzero_ps (),
7970						   (__mmask8) __U, __R);
7971}
7972
7973extern __inline __m128
7974__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7975_mm_cvt_roundsd_ss (__m128 __A, __m128d __B, const int __R)
7976{
7977  return (__m128) __builtin_ia32_cvtsd2ss_round ((__v4sf) __A,
7978						 (__v2df) __B,
7979						 __R);
7980}
7981
7982extern __inline __m128d
7983__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7984_mm_cvt_roundss_sd (__m128d __A, __m128 __B, const int __R)
7985{
7986  return (__m128d) __builtin_ia32_cvtss2sd_round ((__v2df) __A,
7987						  (__v4sf) __B,
7988						  __R);
7989}
7990#else
7991#define _mm512_cvt_roundpd_ps(A, B)		 \
7992    (__m256)__builtin_ia32_cvtpd2ps512_mask(A, (__v8sf)_mm256_undefined_ps(), -1, B)
7993
7994#define _mm512_mask_cvt_roundpd_ps(W, U, A, B)   \
7995    (__m256)__builtin_ia32_cvtpd2ps512_mask(A, (__v8sf)(W), U, B)
7996
7997#define _mm512_maskz_cvt_roundpd_ps(U, A, B)     \
7998    (__m256)__builtin_ia32_cvtpd2ps512_mask(A, (__v8sf)_mm256_setzero_ps(), U, B)
7999
8000#define _mm_cvt_roundsd_ss(A, B, C)		 \
8001    (__m128)__builtin_ia32_cvtsd2ss_round(A, B, C)
8002
8003#define _mm_cvt_roundss_sd(A, B, C)		 \
8004    (__m128d)__builtin_ia32_cvtss2sd_round(A, B, C)
8005#endif
8006
8007extern __inline void
8008__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8009_mm512_stream_si512 (__m512i * __P, __m512i __A)
8010{
8011  __builtin_ia32_movntdq512 ((__v8di *) __P, (__v8di) __A);
8012}
8013
8014extern __inline void
8015__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8016_mm512_stream_ps (float *__P, __m512 __A)
8017{
8018  __builtin_ia32_movntps512 (__P, (__v16sf) __A);
8019}
8020
8021extern __inline void
8022__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8023_mm512_stream_pd (double *__P, __m512d __A)
8024{
8025  __builtin_ia32_movntpd512 (__P, (__v8df) __A);
8026}
8027
8028extern __inline __m512i
8029__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8030_mm512_stream_load_si512 (void *__P)
8031{
8032  return __builtin_ia32_movntdqa512 ((__v8di *)__P);
8033}
8034
8035/* Constants for mantissa extraction */
8036typedef enum
8037{
8038  _MM_MANT_NORM_1_2,		/* interval [1, 2)      */
8039  _MM_MANT_NORM_p5_2,		/* interval [0.5, 2)    */
8040  _MM_MANT_NORM_p5_1,		/* interval [0.5, 1)    */
8041  _MM_MANT_NORM_p75_1p5		/* interval [0.75, 1.5) */
8042} _MM_MANTISSA_NORM_ENUM;
8043
8044typedef enum
8045{
8046  _MM_MANT_SIGN_src,		/* sign = sign(SRC)     */
8047  _MM_MANT_SIGN_zero,		/* sign = 0             */
8048  _MM_MANT_SIGN_nan		/* DEST = NaN if sign(SRC) = 1 */
8049} _MM_MANTISSA_SIGN_ENUM;
8050
8051#ifdef __OPTIMIZE__
8052extern __inline __m128
8053__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8054_mm_getexp_round_ss (__m128 __A, __m128 __B, const int __R)
8055{
8056  return (__m128) __builtin_ia32_getexpss128_round ((__v4sf) __A,
8057						    (__v4sf) __B,
8058						    __R);
8059}
8060
8061extern __inline __m128d
8062__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8063_mm_getexp_round_sd (__m128d __A, __m128d __B, const int __R)
8064{
8065  return (__m128d) __builtin_ia32_getexpsd128_round ((__v2df) __A,
8066						     (__v2df) __B,
8067						     __R);
8068}
8069
8070extern __inline __m512
8071__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8072_mm512_getexp_round_ps (__m512 __A, const int __R)
8073{
8074  return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
8075						   (__v16sf)
8076						   _mm512_undefined_ps (),
8077						   (__mmask16) -1, __R);
8078}
8079
8080extern __inline __m512
8081__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8082_mm512_mask_getexp_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
8083			     const int __R)
8084{
8085  return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
8086						   (__v16sf) __W,
8087						   (__mmask16) __U, __R);
8088}
8089
8090extern __inline __m512
8091__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8092_mm512_maskz_getexp_round_ps (__mmask16 __U, __m512 __A, const int __R)
8093{
8094  return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
8095						   (__v16sf)
8096						   _mm512_setzero_ps (),
8097						   (__mmask16) __U, __R);
8098}
8099
8100extern __inline __m512d
8101__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8102_mm512_getexp_round_pd (__m512d __A, const int __R)
8103{
8104  return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
8105						    (__v8df)
8106						    _mm512_undefined_pd (),
8107						    (__mmask8) -1, __R);
8108}
8109
8110extern __inline __m512d
8111__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8112_mm512_mask_getexp_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
8113			     const int __R)
8114{
8115  return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
8116						    (__v8df) __W,
8117						    (__mmask8) __U, __R);
8118}
8119
8120extern __inline __m512d
8121__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8122_mm512_maskz_getexp_round_pd (__mmask8 __U, __m512d __A, const int __R)
8123{
8124  return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
8125						    (__v8df)
8126						    _mm512_setzero_pd (),
8127						    (__mmask8) __U, __R);
8128}
8129
8130extern __inline __m512d
8131__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8132_mm512_getmant_round_pd (__m512d __A, _MM_MANTISSA_NORM_ENUM __B,
8133			 _MM_MANTISSA_SIGN_ENUM __C, const int __R)
8134{
8135  return (__m512d) __builtin_ia32_getmantpd512_mask ((__v8df) __A,
8136						     (__C << 2) | __B,
8137						     _mm512_undefined_pd (),
8138						     (__mmask8) -1, __R);
8139}
8140
8141extern __inline __m512d
8142__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8143_mm512_mask_getmant_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
8144			      _MM_MANTISSA_NORM_ENUM __B,
8145			      _MM_MANTISSA_SIGN_ENUM __C, const int __R)
8146{
8147  return (__m512d) __builtin_ia32_getmantpd512_mask ((__v8df) __A,
8148						     (__C << 2) | __B,
8149						     (__v8df) __W, __U,
8150						     __R);
8151}
8152
8153extern __inline __m512d
8154__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8155_mm512_maskz_getmant_round_pd (__mmask8 __U, __m512d __A,
8156			       _MM_MANTISSA_NORM_ENUM __B,
8157			       _MM_MANTISSA_SIGN_ENUM __C, const int __R)
8158{
8159  return (__m512d) __builtin_ia32_getmantpd512_mask ((__v8df) __A,
8160						     (__C << 2) | __B,
8161						     (__v8df)
8162						     _mm512_setzero_pd (),
8163						     __U, __R);
8164}
8165
8166extern __inline __m512
8167__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8168_mm512_getmant_round_ps (__m512 __A, _MM_MANTISSA_NORM_ENUM __B,
8169			 _MM_MANTISSA_SIGN_ENUM __C, const int __R)
8170{
8171  return (__m512) __builtin_ia32_getmantps512_mask ((__v16sf) __A,
8172						    (__C << 2) | __B,
8173						    _mm512_undefined_ps (),
8174						    (__mmask16) -1, __R);
8175}
8176
8177extern __inline __m512
8178__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8179_mm512_mask_getmant_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
8180			      _MM_MANTISSA_NORM_ENUM __B,
8181			      _MM_MANTISSA_SIGN_ENUM __C, const int __R)
8182{
8183  return (__m512) __builtin_ia32_getmantps512_mask ((__v16sf) __A,
8184						    (__C << 2) | __B,
8185						    (__v16sf) __W, __U,
8186						    __R);
8187}
8188
8189extern __inline __m512
8190__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8191_mm512_maskz_getmant_round_ps (__mmask16 __U, __m512 __A,
8192			       _MM_MANTISSA_NORM_ENUM __B,
8193			       _MM_MANTISSA_SIGN_ENUM __C, const int __R)
8194{
8195  return (__m512) __builtin_ia32_getmantps512_mask ((__v16sf) __A,
8196						    (__C << 2) | __B,
8197						    (__v16sf)
8198						    _mm512_setzero_ps (),
8199						    __U, __R);
8200}
8201
8202extern __inline __m128d
8203__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8204_mm_getmant_round_sd (__m128d __A, __m128d __B,
8205		      _MM_MANTISSA_NORM_ENUM __C,
8206		      _MM_MANTISSA_SIGN_ENUM __D, const int __R)
8207{
8208  return (__m128d) __builtin_ia32_getmantsd_round ((__v2df) __A,
8209						  (__v2df) __B,
8210						  (__D << 2) | __C,
8211						   __R);
8212}
8213
8214extern __inline __m128
8215__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8216_mm_getmant_round_ss (__m128 __A, __m128 __B,
8217		      _MM_MANTISSA_NORM_ENUM __C,
8218		      _MM_MANTISSA_SIGN_ENUM __D, const int __R)
8219{
8220  return (__m128) __builtin_ia32_getmantss_round ((__v4sf) __A,
8221						  (__v4sf) __B,
8222						  (__D << 2) | __C,
8223						  __R);
8224}
8225
8226#else
8227#define _mm512_getmant_round_pd(X, B, C, R)                                                  \
8228  ((__m512d)__builtin_ia32_getmantpd512_mask ((__v8df)(__m512d)(X),                 \
8229                                              (int)(((C)<<2) | (B)),                \
8230                                              (__v8df)(__m512d)_mm512_undefined_pd(), \
8231                                              (__mmask8)-1,\
8232					      (R)))
8233
8234#define _mm512_mask_getmant_round_pd(W, U, X, B, C, R)                                       \
8235  ((__m512d)__builtin_ia32_getmantpd512_mask ((__v8df)(__m512d)(X),                 \
8236                                              (int)(((C)<<2) | (B)),                \
8237                                              (__v8df)(__m512d)(W),                 \
8238                                              (__mmask8)(U),\
8239					      (R)))
8240
8241#define _mm512_maskz_getmant_round_pd(U, X, B, C, R)                                         \
8242  ((__m512d)__builtin_ia32_getmantpd512_mask ((__v8df)(__m512d)(X),                 \
8243                                              (int)(((C)<<2) | (B)),                \
8244                                              (__v8df)(__m512d)_mm512_setzero_pd(), \
8245                                              (__mmask8)(U),\
8246					      (R)))
8247#define _mm512_getmant_round_ps(X, B, C, R)                                                  \
8248  ((__m512)__builtin_ia32_getmantps512_mask ((__v16sf)(__m512)(X),                  \
8249                                             (int)(((C)<<2) | (B)),                 \
8250                                             (__v16sf)(__m512)_mm512_undefined_ps(), \
8251                                             (__mmask16)-1,\
8252					     (R)))
8253
8254#define _mm512_mask_getmant_round_ps(W, U, X, B, C, R)                                       \
8255  ((__m512)__builtin_ia32_getmantps512_mask ((__v16sf)(__m512)(X),                  \
8256                                             (int)(((C)<<2) | (B)),                 \
8257                                             (__v16sf)(__m512)(W),                  \
8258                                             (__mmask16)(U),\
8259					     (R)))
8260
8261#define _mm512_maskz_getmant_round_ps(U, X, B, C, R)                                         \
8262  ((__m512)__builtin_ia32_getmantps512_mask ((__v16sf)(__m512)(X),                  \
8263                                             (int)(((C)<<2) | (B)),                 \
8264                                             (__v16sf)(__m512)_mm512_setzero_ps(),  \
8265                                             (__mmask16)(U),\
8266					     (R)))
8267#define _mm_getmant_round_sd(X, Y, C, D, R)                                                  \
8268  ((__m128d)__builtin_ia32_getmantsd_round ((__v2df)(__m128d)(X),                    \
8269					    (__v2df)(__m128d)(Y),	\
8270					    (int)(((D)<<2) | (C)),	\
8271					    (R)))
8272
8273#define _mm_getmant_round_ss(X, Y, C, D, R)                                                  \
8274  ((__m128)__builtin_ia32_getmantss_round ((__v4sf)(__m128)(X),                      \
8275					   (__v4sf)(__m128)(Y),		\
8276					   (int)(((D)<<2) | (C)),	\
8277					   (R)))
8278
8279#define _mm_getexp_round_ss(A, B, R)						      \
8280  ((__m128)__builtin_ia32_getexpss128_round((__v4sf)(__m128)(A), (__v4sf)(__m128)(B), R))
8281
8282#define _mm_getexp_round_sd(A, B, R)						       \
8283  ((__m128d)__builtin_ia32_getexpsd128_round((__v2df)(__m128d)(A), (__v2df)(__m128d)(B), R))
8284
8285#define _mm512_getexp_round_ps(A, R)						\
8286  ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A),		\
8287  (__v16sf)_mm512_undefined_ps(), (__mmask16)-1, R))
8288
8289#define _mm512_mask_getexp_round_ps(W, U, A, R)					\
8290  ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A),		\
8291  (__v16sf)(__m512)(W), (__mmask16)(U), R))
8292
8293#define _mm512_maskz_getexp_round_ps(U, A, R)					\
8294  ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A),		\
8295  (__v16sf)_mm512_setzero_ps(), (__mmask16)(U), R))
8296
8297#define _mm512_getexp_round_pd(A, R)						\
8298  ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A),		\
8299  (__v8df)_mm512_undefined_pd(), (__mmask8)-1, R))
8300
8301#define _mm512_mask_getexp_round_pd(W, U, A, R)					\
8302  ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A),		\
8303  (__v8df)(__m512d)(W), (__mmask8)(U), R))
8304
8305#define _mm512_maskz_getexp_round_pd(U, A, R)					\
8306  ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A),		\
8307  (__v8df)_mm512_setzero_pd(), (__mmask8)(U), R))
8308#endif
8309
8310#ifdef __OPTIMIZE__
8311extern __inline __m512
8312__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8313_mm512_roundscale_round_ps (__m512 __A, const int __imm, const int __R)
8314{
8315  return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A, __imm,
8316						  (__v16sf)
8317						  _mm512_undefined_ps (),
8318						  -1, __R);
8319}
8320
8321extern __inline __m512
8322__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8323_mm512_mask_roundscale_round_ps (__m512 __A, __mmask16 __B, __m512 __C,
8324				 const int __imm, const int __R)
8325{
8326  return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __C, __imm,
8327						  (__v16sf) __A,
8328						  (__mmask16) __B, __R);
8329}
8330
8331extern __inline __m512
8332__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8333_mm512_maskz_roundscale_round_ps (__mmask16 __A, __m512 __B,
8334				  const int __imm, const int __R)
8335{
8336  return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __B,
8337						  __imm,
8338						  (__v16sf)
8339						  _mm512_setzero_ps (),
8340						  (__mmask16) __A, __R);
8341}
8342
8343extern __inline __m512d
8344__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8345_mm512_roundscale_round_pd (__m512d __A, const int __imm, const int __R)
8346{
8347  return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A, __imm,
8348						   (__v8df)
8349						   _mm512_undefined_pd (),
8350						   -1, __R);
8351}
8352
8353extern __inline __m512d
8354__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8355_mm512_mask_roundscale_round_pd (__m512d __A, __mmask8 __B,
8356				 __m512d __C, const int __imm, const int __R)
8357{
8358  return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __C, __imm,
8359						   (__v8df) __A,
8360						   (__mmask8) __B, __R);
8361}
8362
8363extern __inline __m512d
8364__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8365_mm512_maskz_roundscale_round_pd (__mmask8 __A, __m512d __B,
8366				  const int __imm, const int __R)
8367{
8368  return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __B,
8369						   __imm,
8370						   (__v8df)
8371						   _mm512_setzero_pd (),
8372						   (__mmask8) __A, __R);
8373}
8374
8375extern __inline __m128
8376__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8377_mm_roundscale_round_ss (__m128 __A, __m128 __B, const int __imm, const int __R)
8378{
8379  return (__m128) __builtin_ia32_rndscaless_round ((__v4sf) __A,
8380						   (__v4sf) __B, __imm, __R);
8381}
8382
8383extern __inline __m128d
8384__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8385_mm_roundscale_round_sd (__m128d __A, __m128d __B, const int __imm,
8386			 const int __R)
8387{
8388  return (__m128d) __builtin_ia32_rndscalesd_round ((__v2df) __A,
8389						    (__v2df) __B, __imm, __R);
8390}
8391
8392#else
8393#define _mm512_roundscale_round_ps(A, B, R) \
8394  ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(A), (int)(B),\
8395    (__v16sf)_mm512_undefined_ps(), (__mmask16)(-1), R))
8396#define _mm512_mask_roundscale_round_ps(A, B, C, D, R)				\
8397  ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(C),	\
8398					    (int)(D),			\
8399					    (__v16sf)(__m512)(A),	\
8400					    (__mmask16)(B), R))
8401#define _mm512_maskz_roundscale_round_ps(A, B, C, R)				\
8402  ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(B),	\
8403					    (int)(C),			\
8404					    (__v16sf)_mm512_setzero_ps(),\
8405					    (__mmask16)(A), R))
8406#define _mm512_roundscale_round_pd(A, B, R) \
8407  ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(A), (int)(B),\
8408    (__v8df)_mm512_undefined_pd(), (__mmask8)(-1), R))
8409#define _mm512_mask_roundscale_round_pd(A, B, C, D, R)				\
8410  ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(C),	\
8411					     (int)(D),			\
8412					     (__v8df)(__m512d)(A),	\
8413					     (__mmask8)(B), R))
8414#define _mm512_maskz_roundscale_round_pd(A, B, C, R)				\
8415  ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(B),	\
8416					     (int)(C),			\
8417					     (__v8df)_mm512_setzero_pd(),\
8418					     (__mmask8)(A), R))
8419#define _mm_roundscale_round_ss(A, B, C, R)					\
8420  ((__m128) __builtin_ia32_rndscaless_round ((__v4sf)(__m128)(A),	\
8421    (__v4sf)(__m128)(B), (int)(C), R))
8422#define _mm_roundscale_round_sd(A, B, C, R)					\
8423  ((__m128d) __builtin_ia32_rndscalesd_round ((__v2df)(__m128d)(A),	\
8424    (__v2df)(__m128d)(B), (int)(C), R))
8425#endif
8426
8427extern __inline __m512
8428__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8429_mm512_floor_ps (__m512 __A)
8430{
8431  return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
8432						  _MM_FROUND_FLOOR,
8433						  (__v16sf) __A, -1,
8434						  _MM_FROUND_CUR_DIRECTION);
8435}
8436
8437extern __inline __m512d
8438__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8439_mm512_floor_pd (__m512d __A)
8440{
8441  return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
8442						   _MM_FROUND_FLOOR,
8443						   (__v8df) __A, -1,
8444						   _MM_FROUND_CUR_DIRECTION);
8445}
8446
8447extern __inline __m512
8448__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8449_mm512_ceil_ps (__m512 __A)
8450{
8451  return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
8452						  _MM_FROUND_CEIL,
8453						  (__v16sf) __A, -1,
8454						  _MM_FROUND_CUR_DIRECTION);
8455}
8456
8457extern __inline __m512d
8458__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8459_mm512_ceil_pd (__m512d __A)
8460{
8461  return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
8462						   _MM_FROUND_CEIL,
8463						   (__v8df) __A, -1,
8464						   _MM_FROUND_CUR_DIRECTION);
8465}
8466
8467extern __inline __m512
8468__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8469_mm512_mask_floor_ps (__m512 __W, __mmask16 __U, __m512 __A)
8470{
8471  return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
8472						  _MM_FROUND_FLOOR,
8473						  (__v16sf) __W, __U,
8474						  _MM_FROUND_CUR_DIRECTION);
8475}
8476
8477extern __inline __m512d
8478__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8479_mm512_mask_floor_pd (__m512d __W, __mmask8 __U, __m512d __A)
8480{
8481  return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
8482						   _MM_FROUND_FLOOR,
8483						   (__v8df) __W, __U,
8484						   _MM_FROUND_CUR_DIRECTION);
8485}
8486
8487extern __inline __m512
8488__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8489_mm512_mask_ceil_ps (__m512 __W, __mmask16 __U, __m512 __A)
8490{
8491  return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
8492						  _MM_FROUND_CEIL,
8493						  (__v16sf) __W, __U,
8494						  _MM_FROUND_CUR_DIRECTION);
8495}
8496
8497extern __inline __m512d
8498__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8499_mm512_mask_ceil_pd (__m512d __W, __mmask8 __U, __m512d __A)
8500{
8501  return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
8502						   _MM_FROUND_CEIL,
8503						   (__v8df) __W, __U,
8504						   _MM_FROUND_CUR_DIRECTION);
8505}
8506
8507#ifdef __OPTIMIZE__
8508extern __inline __m512i
8509__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8510_mm512_alignr_epi32 (__m512i __A, __m512i __B, const int __imm)
8511{
8512  return (__m512i) __builtin_ia32_alignd512_mask ((__v16si) __A,
8513						  (__v16si) __B, __imm,
8514						  (__v16si)
8515						  _mm512_undefined_si512 (),
8516						  (__mmask16) -1);
8517}
8518
8519extern __inline __m512i
8520__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8521_mm512_mask_alignr_epi32 (__m512i __W, __mmask16 __U, __m512i __A,
8522			  __m512i __B, const int __imm)
8523{
8524  return (__m512i) __builtin_ia32_alignd512_mask ((__v16si) __A,
8525						  (__v16si) __B, __imm,
8526						  (__v16si) __W,
8527						  (__mmask16) __U);
8528}
8529
8530extern __inline __m512i
8531__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8532_mm512_maskz_alignr_epi32 (__mmask16 __U, __m512i __A, __m512i __B,
8533			   const int __imm)
8534{
8535  return (__m512i) __builtin_ia32_alignd512_mask ((__v16si) __A,
8536						  (__v16si) __B, __imm,
8537						  (__v16si)
8538						  _mm512_setzero_si512 (),
8539						  (__mmask16) __U);
8540}
8541
8542extern __inline __m512i
8543__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8544_mm512_alignr_epi64 (__m512i __A, __m512i __B, const int __imm)
8545{
8546  return (__m512i) __builtin_ia32_alignq512_mask ((__v8di) __A,
8547						  (__v8di) __B, __imm,
8548						  (__v8di)
8549						  _mm512_undefined_si512 (),
8550						  (__mmask8) -1);
8551}
8552
8553extern __inline __m512i
8554__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8555_mm512_mask_alignr_epi64 (__m512i __W, __mmask8 __U, __m512i __A,
8556			  __m512i __B, const int __imm)
8557{
8558  return (__m512i) __builtin_ia32_alignq512_mask ((__v8di) __A,
8559						  (__v8di) __B, __imm,
8560						  (__v8di) __W,
8561						  (__mmask8) __U);
8562}
8563
8564extern __inline __m512i
8565__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8566_mm512_maskz_alignr_epi64 (__mmask8 __U, __m512i __A, __m512i __B,
8567			   const int __imm)
8568{
8569  return (__m512i) __builtin_ia32_alignq512_mask ((__v8di) __A,
8570						  (__v8di) __B, __imm,
8571						  (__v8di)
8572						  _mm512_setzero_si512 (),
8573						  (__mmask8) __U);
8574}
8575#else
8576#define _mm512_alignr_epi32(X, Y, C)                                        \
8577    ((__m512i)__builtin_ia32_alignd512_mask ((__v16si)(__m512i)(X),         \
8578        (__v16si)(__m512i)(Y), (int)(C), (__v16si)_mm512_undefined_si512 (),\
8579        (__mmask16)-1))
8580
8581#define _mm512_mask_alignr_epi32(W, U, X, Y, C)                             \
8582    ((__m512i)__builtin_ia32_alignd512_mask ((__v16si)(__m512i)(X),         \
8583        (__v16si)(__m512i)(Y), (int)(C), (__v16si)(__m512i)(W),             \
8584        (__mmask16)(U)))
8585
8586#define _mm512_maskz_alignr_epi32(U, X, Y, C)                               \
8587    ((__m512i)__builtin_ia32_alignd512_mask ((__v16si)(__m512i)(X),         \
8588        (__v16si)(__m512i)(Y), (int)(C), (__v16si)_mm512_setzero_si512 (),\
8589        (__mmask16)(U)))
8590
8591#define _mm512_alignr_epi64(X, Y, C)                                        \
8592    ((__m512i)__builtin_ia32_alignq512_mask ((__v8di)(__m512i)(X),          \
8593        (__v8di)(__m512i)(Y), (int)(C), (__v8di)_mm512_undefined_si512 (),  \
8594	(__mmask8)-1))
8595
8596#define _mm512_mask_alignr_epi64(W, U, X, Y, C)                             \
8597    ((__m512i)__builtin_ia32_alignq512_mask ((__v8di)(__m512i)(X),          \
8598        (__v8di)(__m512i)(Y), (int)(C), (__v8di)(__m512i)(W), (__mmask8)(U)))
8599
8600#define _mm512_maskz_alignr_epi64(U, X, Y, C)                               \
8601    ((__m512i)__builtin_ia32_alignq512_mask ((__v8di)(__m512i)(X),          \
8602        (__v8di)(__m512i)(Y), (int)(C), (__v8di)_mm512_setzero_si512 (),\
8603        (__mmask8)(U)))
8604#endif
8605
8606extern __inline __mmask16
8607__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8608_mm512_cmpeq_epi32_mask (__m512i __A, __m512i __B)
8609{
8610  return (__mmask16) __builtin_ia32_pcmpeqd512_mask ((__v16si) __A,
8611						     (__v16si) __B,
8612						     (__mmask16) -1);
8613}
8614
8615extern __inline __mmask16
8616__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8617_mm512_mask_cmpeq_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B)
8618{
8619  return (__mmask16) __builtin_ia32_pcmpeqd512_mask ((__v16si) __A,
8620						     (__v16si) __B, __U);
8621}
8622
8623extern __inline __mmask8
8624__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8625_mm512_mask_cmpeq_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B)
8626{
8627  return (__mmask8) __builtin_ia32_pcmpeqq512_mask ((__v8di) __A,
8628						    (__v8di) __B, __U);
8629}
8630
8631extern __inline __mmask8
8632__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8633_mm512_cmpeq_epi64_mask (__m512i __A, __m512i __B)
8634{
8635  return (__mmask8) __builtin_ia32_pcmpeqq512_mask ((__v8di) __A,
8636						    (__v8di) __B,
8637						    (__mmask8) -1);
8638}
8639
8640extern __inline __mmask16
8641__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8642_mm512_cmpgt_epi32_mask (__m512i __A, __m512i __B)
8643{
8644  return (__mmask16) __builtin_ia32_pcmpgtd512_mask ((__v16si) __A,
8645						     (__v16si) __B,
8646						     (__mmask16) -1);
8647}
8648
8649extern __inline __mmask16
8650__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8651_mm512_mask_cmpgt_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B)
8652{
8653  return (__mmask16) __builtin_ia32_pcmpgtd512_mask ((__v16si) __A,
8654						     (__v16si) __B, __U);
8655}
8656
8657extern __inline __mmask8
8658__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8659_mm512_mask_cmpgt_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B)
8660{
8661  return (__mmask8) __builtin_ia32_pcmpgtq512_mask ((__v8di) __A,
8662						    (__v8di) __B, __U);
8663}
8664
8665extern __inline __mmask8
8666__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8667_mm512_cmpgt_epi64_mask (__m512i __A, __m512i __B)
8668{
8669  return (__mmask8) __builtin_ia32_pcmpgtq512_mask ((__v8di) __A,
8670						    (__v8di) __B,
8671						    (__mmask8) -1);
8672}
8673
8674extern __inline __mmask16
8675__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8676_mm512_cmpge_epi32_mask (__m512i __X, __m512i __Y)
8677{
8678  return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
8679						    (__v16si) __Y, 5,
8680						    (__mmask16) -1);
8681}
8682
8683extern __inline __mmask16
8684__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8685_mm512_mask_cmpge_epi32_mask (__mmask16 __M, __m512i __X, __m512i __Y)
8686{
8687  return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
8688						    (__v16si) __Y, 5,
8689						    (__mmask16) __M);
8690}
8691
8692extern __inline __mmask16
8693__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8694_mm512_mask_cmpge_epu32_mask (__mmask16 __M, __m512i __X, __m512i __Y)
8695{
8696  return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
8697						    (__v16si) __Y, 5,
8698						    (__mmask16) __M);
8699}
8700
8701extern __inline __mmask16
8702__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8703_mm512_cmpge_epu32_mask (__m512i __X, __m512i __Y)
8704{
8705  return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
8706						    (__v16si) __Y, 5,
8707						    (__mmask16) -1);
8708}
8709
8710extern __inline __mmask8
8711__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8712_mm512_mask_cmpge_epi64_mask (__mmask8 __M, __m512i __X, __m512i __Y)
8713{
8714  return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
8715						    (__v8di) __Y, 5,
8716						    (__mmask8) __M);
8717}
8718
8719extern __inline __mmask8
8720__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8721_mm512_cmpge_epi64_mask (__m512i __X, __m512i __Y)
8722{
8723  return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
8724						    (__v8di) __Y, 5,
8725						    (__mmask8) -1);
8726}
8727
8728extern __inline __mmask8
8729__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8730_mm512_mask_cmpge_epu64_mask (__mmask8 __M, __m512i __X, __m512i __Y)
8731{
8732  return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
8733						    (__v8di) __Y, 5,
8734						    (__mmask8) __M);
8735}
8736
8737extern __inline __mmask8
8738__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8739_mm512_cmpge_epu64_mask (__m512i __X, __m512i __Y)
8740{
8741  return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
8742						    (__v8di) __Y, 5,
8743						    (__mmask8) -1);
8744}
8745
8746extern __inline __mmask16
8747__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8748_mm512_mask_cmple_epi32_mask (__mmask16 __M, __m512i __X, __m512i __Y)
8749{
8750  return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
8751						    (__v16si) __Y, 2,
8752						    (__mmask16) __M);
8753}
8754
8755extern __inline __mmask16
8756__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8757_mm512_cmple_epi32_mask (__m512i __X, __m512i __Y)
8758{
8759  return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
8760						    (__v16si) __Y, 2,
8761						    (__mmask16) -1);
8762}
8763
8764extern __inline __mmask16
8765__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8766_mm512_mask_cmple_epu32_mask (__mmask16 __M, __m512i __X, __m512i __Y)
8767{
8768  return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
8769						    (__v16si) __Y, 2,
8770						    (__mmask16) __M);
8771}
8772
8773extern __inline __mmask16
8774__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8775_mm512_cmple_epu32_mask (__m512i __X, __m512i __Y)
8776{
8777  return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
8778						    (__v16si) __Y, 2,
8779						    (__mmask16) -1);
8780}
8781
8782extern __inline __mmask8
8783__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8784_mm512_mask_cmple_epi64_mask (__mmask8 __M, __m512i __X, __m512i __Y)
8785{
8786  return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
8787						    (__v8di) __Y, 2,
8788						    (__mmask8) __M);
8789}
8790
8791extern __inline __mmask8
8792__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8793_mm512_cmple_epi64_mask (__m512i __X, __m512i __Y)
8794{
8795  return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
8796						    (__v8di) __Y, 2,
8797						    (__mmask8) -1);
8798}
8799
8800extern __inline __mmask8
8801__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8802_mm512_mask_cmple_epu64_mask (__mmask8 __M, __m512i __X, __m512i __Y)
8803{
8804  return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
8805						    (__v8di) __Y, 2,
8806						    (__mmask8) __M);
8807}
8808
8809extern __inline __mmask8
8810__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8811_mm512_cmple_epu64_mask (__m512i __X, __m512i __Y)
8812{
8813  return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
8814						    (__v8di) __Y, 2,
8815						    (__mmask8) -1);
8816}
8817
8818extern __inline __mmask16
8819__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8820_mm512_mask_cmplt_epi32_mask (__mmask16 __M, __m512i __X, __m512i __Y)
8821{
8822  return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
8823						    (__v16si) __Y, 1,
8824						    (__mmask16) __M);
8825}
8826
8827extern __inline __mmask16
8828__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8829_mm512_cmplt_epi32_mask (__m512i __X, __m512i __Y)
8830{
8831  return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
8832						    (__v16si) __Y, 1,
8833						    (__mmask16) -1);
8834}
8835
8836extern __inline __mmask16
8837__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8838_mm512_mask_cmplt_epu32_mask (__mmask16 __M, __m512i __X, __m512i __Y)
8839{
8840  return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
8841						    (__v16si) __Y, 1,
8842						    (__mmask16) __M);
8843}
8844
8845extern __inline __mmask16
8846__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8847_mm512_cmplt_epu32_mask (__m512i __X, __m512i __Y)
8848{
8849  return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
8850						    (__v16si) __Y, 1,
8851						    (__mmask16) -1);
8852}
8853
8854extern __inline __mmask8
8855__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8856_mm512_mask_cmplt_epi64_mask (__mmask8 __M, __m512i __X, __m512i __Y)
8857{
8858  return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
8859						    (__v8di) __Y, 1,
8860						    (__mmask8) __M);
8861}
8862
8863extern __inline __mmask8
8864__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8865_mm512_cmplt_epi64_mask (__m512i __X, __m512i __Y)
8866{
8867  return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
8868						    (__v8di) __Y, 1,
8869						    (__mmask8) -1);
8870}
8871
8872extern __inline __mmask8
8873__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8874_mm512_mask_cmplt_epu64_mask (__mmask8 __M, __m512i __X, __m512i __Y)
8875{
8876  return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
8877						    (__v8di) __Y, 1,
8878						    (__mmask8) __M);
8879}
8880
8881extern __inline __mmask8
8882__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8883_mm512_cmplt_epu64_mask (__m512i __X, __m512i __Y)
8884{
8885  return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
8886						    (__v8di) __Y, 1,
8887						    (__mmask8) -1);
8888}
8889
8890extern __inline __mmask16
8891__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8892_mm512_cmpneq_epi32_mask (__m512i __X, __m512i __Y)
8893{
8894  return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
8895						    (__v16si) __Y, 4,
8896						    (__mmask16) -1);
8897}
8898
8899extern __inline __mmask16
8900__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8901_mm512_mask_cmpneq_epi32_mask (__mmask16 __M, __m512i __X, __m512i __Y)
8902{
8903  return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
8904						    (__v16si) __Y, 4,
8905						    (__mmask16) __M);
8906}
8907
8908extern __inline __mmask16
8909__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8910_mm512_mask_cmpneq_epu32_mask (__mmask16 __M, __m512i __X, __m512i __Y)
8911{
8912  return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
8913						    (__v16si) __Y, 4,
8914						    (__mmask16) __M);
8915}
8916
8917extern __inline __mmask16
8918__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8919_mm512_cmpneq_epu32_mask (__m512i __X, __m512i __Y)
8920{
8921  return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
8922						    (__v16si) __Y, 4,
8923						    (__mmask16) -1);
8924}
8925
8926extern __inline __mmask8
8927__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8928_mm512_mask_cmpneq_epi64_mask (__mmask16 __M, __m512i __X, __m512i __Y)
8929{
8930  return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
8931						    (__v8di) __Y, 4,
8932						    (__mmask8) __M);
8933}
8934
8935extern __inline __mmask8
8936__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8937_mm512_cmpneq_epi64_mask (__m512i __X, __m512i __Y)
8938{
8939  return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
8940						    (__v8di) __Y, 4,
8941						    (__mmask8) -1);
8942}
8943
8944extern __inline __mmask8
8945__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8946_mm512_mask_cmpneq_epu64_mask (__mmask8 __M, __m512i __X, __m512i __Y)
8947{
8948  return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
8949						    (__v8di) __Y, 4,
8950						    (__mmask8) __M);
8951}
8952
8953extern __inline __mmask8
8954__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8955_mm512_cmpneq_epu64_mask (__m512i __X, __m512i __Y)
8956{
8957  return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
8958						    (__v8di) __Y, 4,
8959						    (__mmask8) -1);
8960}
8961
8962#define _MM_CMPINT_EQ	    0x0
8963#define _MM_CMPINT_LT	    0x1
8964#define _MM_CMPINT_LE	    0x2
8965#define _MM_CMPINT_UNUSED   0x3
8966#define _MM_CMPINT_NE	    0x4
8967#define _MM_CMPINT_NLT	    0x5
8968#define _MM_CMPINT_GE	    0x5
8969#define _MM_CMPINT_NLE	    0x6
8970#define _MM_CMPINT_GT	    0x6
8971
8972#ifdef __OPTIMIZE__
8973extern __inline __mmask8
8974__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8975_mm512_cmp_epi64_mask (__m512i __X, __m512i __Y, const int __P)
8976{
8977  return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
8978						 (__v8di) __Y, __P,
8979						 (__mmask8) -1);
8980}
8981
8982extern __inline __mmask16
8983__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8984_mm512_cmp_epi32_mask (__m512i __X, __m512i __Y, const int __P)
8985{
8986  return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
8987						  (__v16si) __Y, __P,
8988						  (__mmask16) -1);
8989}
8990
8991extern __inline __mmask8
8992__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8993_mm512_cmp_epu64_mask (__m512i __X, __m512i __Y, const int __P)
8994{
8995  return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
8996						  (__v8di) __Y, __P,
8997						  (__mmask8) -1);
8998}
8999
9000extern __inline __mmask16
9001__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9002_mm512_cmp_epu32_mask (__m512i __X, __m512i __Y, const int __P)
9003{
9004  return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
9005						   (__v16si) __Y, __P,
9006						   (__mmask16) -1);
9007}
9008
9009extern __inline __mmask8
9010__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9011_mm512_cmp_round_pd_mask (__m512d __X, __m512d __Y, const int __P,
9012			  const int __R)
9013{
9014  return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
9015						  (__v8df) __Y, __P,
9016						  (__mmask8) -1, __R);
9017}
9018
9019extern __inline __mmask16
9020__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9021_mm512_cmp_round_ps_mask (__m512 __X, __m512 __Y, const int __P, const int __R)
9022{
9023  return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
9024						   (__v16sf) __Y, __P,
9025						   (__mmask16) -1, __R);
9026}
9027
9028extern __inline __mmask8
9029__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9030_mm512_mask_cmp_epi64_mask (__mmask8 __U, __m512i __X, __m512i __Y,
9031			    const int __P)
9032{
9033  return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
9034						 (__v8di) __Y, __P,
9035						 (__mmask8) __U);
9036}
9037
9038extern __inline __mmask16
9039__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9040_mm512_mask_cmp_epi32_mask (__mmask16 __U, __m512i __X, __m512i __Y,
9041			    const int __P)
9042{
9043  return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
9044						  (__v16si) __Y, __P,
9045						  (__mmask16) __U);
9046}
9047
9048extern __inline __mmask8
9049__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9050_mm512_mask_cmp_epu64_mask (__mmask8 __U, __m512i __X, __m512i __Y,
9051			    const int __P)
9052{
9053  return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
9054						  (__v8di) __Y, __P,
9055						  (__mmask8) __U);
9056}
9057
9058extern __inline __mmask16
9059__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9060_mm512_mask_cmp_epu32_mask (__mmask16 __U, __m512i __X, __m512i __Y,
9061			    const int __P)
9062{
9063  return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
9064						   (__v16si) __Y, __P,
9065						   (__mmask16) __U);
9066}
9067
9068extern __inline __mmask8
9069__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9070_mm512_mask_cmp_round_pd_mask (__mmask8 __U, __m512d __X, __m512d __Y,
9071			       const int __P, const int __R)
9072{
9073  return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
9074						  (__v8df) __Y, __P,
9075						  (__mmask8) __U, __R);
9076}
9077
9078extern __inline __mmask16
9079__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9080_mm512_mask_cmp_round_ps_mask (__mmask16 __U, __m512 __X, __m512 __Y,
9081			       const int __P, const int __R)
9082{
9083  return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
9084						   (__v16sf) __Y, __P,
9085						   (__mmask16) __U, __R);
9086}
9087
9088extern __inline __mmask8
9089__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9090_mm_cmp_round_sd_mask (__m128d __X, __m128d __Y, const int __P, const int __R)
9091{
9092  return (__mmask8) __builtin_ia32_cmpsd_mask ((__v2df) __X,
9093					       (__v2df) __Y, __P,
9094					       (__mmask8) -1, __R);
9095}
9096
9097extern __inline __mmask8
9098__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9099_mm_mask_cmp_round_sd_mask (__mmask8 __M, __m128d __X, __m128d __Y,
9100			    const int __P, const int __R)
9101{
9102  return (__mmask8) __builtin_ia32_cmpsd_mask ((__v2df) __X,
9103					       (__v2df) __Y, __P,
9104					       (__mmask8) __M, __R);
9105}
9106
9107extern __inline __mmask8
9108__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9109_mm_cmp_round_ss_mask (__m128 __X, __m128 __Y, const int __P, const int __R)
9110{
9111  return (__mmask8) __builtin_ia32_cmpss_mask ((__v4sf) __X,
9112					       (__v4sf) __Y, __P,
9113					       (__mmask8) -1, __R);
9114}
9115
9116extern __inline __mmask8
9117__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9118_mm_mask_cmp_round_ss_mask (__mmask8 __M, __m128 __X, __m128 __Y,
9119			    const int __P, const int __R)
9120{
9121  return (__mmask8) __builtin_ia32_cmpss_mask ((__v4sf) __X,
9122					       (__v4sf) __Y, __P,
9123					       (__mmask8) __M, __R);
9124}
9125
9126#else
9127#define _mm512_cmp_epi64_mask(X, Y, P)					\
9128  ((__mmask8) __builtin_ia32_cmpq512_mask ((__v8di)(__m512i)(X),	\
9129					   (__v8di)(__m512i)(Y), (int)(P),\
9130					   (__mmask8)-1))
9131
9132#define _mm512_cmp_epi32_mask(X, Y, P)					\
9133  ((__mmask8) __builtin_ia32_cmpd512_mask ((__v16si)(__m512i)(X),	\
9134					   (__v16si)(__m512i)(Y), (int)(P),\
9135					   (__mmask16)-1))
9136
9137#define _mm512_cmp_epu64_mask(X, Y, P)					\
9138  ((__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di)(__m512i)(X),	\
9139					    (__v8di)(__m512i)(Y), (int)(P),\
9140					    (__mmask8)-1))
9141
9142#define _mm512_cmp_epu32_mask(X, Y, P)					\
9143  ((__mmask8) __builtin_ia32_ucmpd512_mask ((__v16si)(__m512i)(X),	\
9144					    (__v16si)(__m512i)(Y), (int)(P),\
9145					    (__mmask16)-1))
9146
9147#define _mm512_cmp_round_pd_mask(X, Y, P, R)					\
9148  ((__mmask8) __builtin_ia32_cmppd512_mask ((__v8df)(__m512d)(X),	\
9149					    (__v8df)(__m512d)(Y), (int)(P),\
9150					    (__mmask8)-1, R))
9151
9152#define _mm512_cmp_round_ps_mask(X, Y, P, R)					\
9153  ((__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf)(__m512)(X),	\
9154					     (__v16sf)(__m512)(Y), (int)(P),\
9155					     (__mmask16)-1, R))
9156
9157#define _mm512_mask_cmp_epi64_mask(M, X, Y, P)					\
9158  ((__mmask8) __builtin_ia32_cmpq512_mask ((__v8di)(__m512i)(X),	\
9159					   (__v8di)(__m512i)(Y), (int)(P),\
9160					   (__mmask8)M))
9161
9162#define _mm512_mask_cmp_epi32_mask(M, X, Y, P)					\
9163  ((__mmask8) __builtin_ia32_cmpd512_mask ((__v16si)(__m512i)(X),	\
9164					   (__v16si)(__m512i)(Y), (int)(P),\
9165					   (__mmask16)M))
9166
9167#define _mm512_mask_cmp_epu64_mask(M, X, Y, P)					\
9168  ((__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di)(__m512i)(X),	\
9169					    (__v8di)(__m512i)(Y), (int)(P),\
9170					    (__mmask8)M))
9171
9172#define _mm512_mask_cmp_epu32_mask(M, X, Y, P)					\
9173  ((__mmask8) __builtin_ia32_ucmpd512_mask ((__v16si)(__m512i)(X),	\
9174					    (__v16si)(__m512i)(Y), (int)(P),\
9175					    (__mmask16)M))
9176
9177#define _mm512_mask_cmp_round_pd_mask(M, X, Y, P, R)					\
9178  ((__mmask8) __builtin_ia32_cmppd512_mask ((__v8df)(__m512d)(X),	\
9179					    (__v8df)(__m512d)(Y), (int)(P),\
9180					    (__mmask8)M, R))
9181
9182#define _mm512_mask_cmp_round_ps_mask(M, X, Y, P, R)					\
9183  ((__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf)(__m512)(X),	\
9184					     (__v16sf)(__m512)(Y), (int)(P),\
9185					     (__mmask16)M, R))
9186
9187#define _mm_cmp_round_sd_mask(X, Y, P, R)					\
9188  ((__mmask8) __builtin_ia32_cmpsd_mask ((__v2df)(__m128d)(X),		\
9189					 (__v2df)(__m128d)(Y), (int)(P),\
9190					 (__mmask8)-1, R))
9191
9192#define _mm_mask_cmp_round_sd_mask(M, X, Y, P, R)					\
9193  ((__mmask8) __builtin_ia32_cmpsd_mask ((__v2df)(__m128d)(X),		\
9194					 (__v2df)(__m128d)(Y), (int)(P),\
9195					 (M), R))
9196
9197#define _mm_cmp_round_ss_mask(X, Y, P, R)					\
9198  ((__mmask8) __builtin_ia32_cmpss_mask ((__v4sf)(__m128)(X),		\
9199					 (__v4sf)(__m128)(Y), (int)(P), \
9200					 (__mmask8)-1, R))
9201
9202#define _mm_mask_cmp_round_ss_mask(M, X, Y, P, R)					\
9203  ((__mmask8) __builtin_ia32_cmpss_mask ((__v4sf)(__m128)(X),		\
9204					 (__v4sf)(__m128)(Y), (int)(P), \
9205					 (M), R))
9206#endif
9207
9208#ifdef __OPTIMIZE__
9209extern __inline __m512
9210__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9211_mm512_i32gather_ps (__m512i __index, float const *__addr, int __scale)
9212{
9213  __m512 v1_old = _mm512_undefined_ps ();
9214  __mmask16 mask = 0xFFFF;
9215
9216  return (__m512) __builtin_ia32_gathersiv16sf ((__v16sf) v1_old,
9217						__addr,
9218						(__v16si) __index,
9219						mask, __scale);
9220}
9221
9222extern __inline __m512
9223__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9224_mm512_mask_i32gather_ps (__m512 v1_old, __mmask16 __mask,
9225			  __m512i __index, float const *__addr, int __scale)
9226{
9227  return (__m512) __builtin_ia32_gathersiv16sf ((__v16sf) v1_old,
9228						__addr,
9229						(__v16si) __index,
9230						__mask, __scale);
9231}
9232
9233extern __inline __m512d
9234__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9235_mm512_i32gather_pd (__m256i __index, double const *__addr, int __scale)
9236{
9237  __m512d v1_old = _mm512_undefined_pd ();
9238  __mmask8 mask = 0xFF;
9239
9240  return (__m512d) __builtin_ia32_gathersiv8df ((__v8df) v1_old,
9241						__addr,
9242						(__v8si) __index, mask,
9243						__scale);
9244}
9245
9246extern __inline __m512d
9247__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9248_mm512_mask_i32gather_pd (__m512d __v1_old, __mmask8 __mask,
9249			  __m256i __index, double const *__addr, int __scale)
9250{
9251  return (__m512d) __builtin_ia32_gathersiv8df ((__v8df) __v1_old,
9252						__addr,
9253						(__v8si) __index,
9254						__mask, __scale);
9255}
9256
9257extern __inline __m256
9258__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9259_mm512_i64gather_ps (__m512i __index, float const *__addr, int __scale)
9260{
9261  __m256 v1_old = _mm256_undefined_ps ();
9262  __mmask8 mask = 0xFF;
9263
9264  return (__m256) __builtin_ia32_gatherdiv16sf ((__v8sf) v1_old,
9265						__addr,
9266						(__v8di) __index, mask,
9267						__scale);
9268}
9269
9270extern __inline __m256
9271__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9272_mm512_mask_i64gather_ps (__m256 __v1_old, __mmask8 __mask,
9273			  __m512i __index, float const *__addr, int __scale)
9274{
9275  return (__m256) __builtin_ia32_gatherdiv16sf ((__v8sf) __v1_old,
9276						__addr,
9277						(__v8di) __index,
9278						__mask, __scale);
9279}
9280
9281extern __inline __m512d
9282__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9283_mm512_i64gather_pd (__m512i __index, double const *__addr, int __scale)
9284{
9285  __m512d v1_old = _mm512_undefined_pd ();
9286  __mmask8 mask = 0xFF;
9287
9288  return (__m512d) __builtin_ia32_gatherdiv8df ((__v8df) v1_old,
9289						__addr,
9290						(__v8di) __index, mask,
9291						__scale);
9292}
9293
9294extern __inline __m512d
9295__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9296_mm512_mask_i64gather_pd (__m512d __v1_old, __mmask8 __mask,
9297			  __m512i __index, double const *__addr, int __scale)
9298{
9299  return (__m512d) __builtin_ia32_gatherdiv8df ((__v8df) __v1_old,
9300						__addr,
9301						(__v8di) __index,
9302						__mask, __scale);
9303}
9304
9305extern __inline __m512i
9306__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9307_mm512_i32gather_epi32 (__m512i __index, int const *__addr, int __scale)
9308{
9309  __m512i v1_old = _mm512_undefined_si512 ();
9310  __mmask16 mask = 0xFFFF;
9311
9312  return (__m512i) __builtin_ia32_gathersiv16si ((__v16si) v1_old,
9313						 __addr,
9314						 (__v16si) __index,
9315						 mask, __scale);
9316}
9317
9318extern __inline __m512i
9319__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9320_mm512_mask_i32gather_epi32 (__m512i __v1_old, __mmask16 __mask,
9321			     __m512i __index, int const *__addr, int __scale)
9322{
9323  return (__m512i) __builtin_ia32_gathersiv16si ((__v16si) __v1_old,
9324						 __addr,
9325						 (__v16si) __index,
9326						 __mask, __scale);
9327}
9328
9329extern __inline __m512i
9330__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9331_mm512_i32gather_epi64 (__m256i __index, long long const *__addr, int __scale)
9332{
9333  __m512i v1_old = _mm512_undefined_si512 ();
9334  __mmask8 mask = 0xFF;
9335
9336  return (__m512i) __builtin_ia32_gathersiv8di ((__v8di) v1_old,
9337						__addr,
9338						(__v8si) __index, mask,
9339						__scale);
9340}
9341
9342extern __inline __m512i
9343__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9344_mm512_mask_i32gather_epi64 (__m512i __v1_old, __mmask8 __mask,
9345			     __m256i __index, long long const *__addr,
9346			     int __scale)
9347{
9348  return (__m512i) __builtin_ia32_gathersiv8di ((__v8di) __v1_old,
9349						__addr,
9350						(__v8si) __index,
9351						__mask, __scale);
9352}
9353
9354extern __inline __m256i
9355__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9356_mm512_i64gather_epi32 (__m512i __index, int const *__addr, int __scale)
9357{
9358  __m256i v1_old = _mm256_undefined_si256 ();
9359  __mmask8 mask = 0xFF;
9360
9361  return (__m256i) __builtin_ia32_gatherdiv16si ((__v8si) v1_old,
9362						 __addr,
9363						 (__v8di) __index,
9364						 mask, __scale);
9365}
9366
9367extern __inline __m256i
9368__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9369_mm512_mask_i64gather_epi32 (__m256i __v1_old, __mmask8 __mask,
9370			     __m512i __index, int const *__addr, int __scale)
9371{
9372  return (__m256i) __builtin_ia32_gatherdiv16si ((__v8si) __v1_old,
9373						 __addr,
9374						 (__v8di) __index,
9375						 __mask, __scale);
9376}
9377
9378extern __inline __m512i
9379__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9380_mm512_i64gather_epi64 (__m512i __index, long long const *__addr, int __scale)
9381{
9382  __m512i v1_old = _mm512_undefined_si512 ();
9383  __mmask8 mask = 0xFF;
9384
9385  return (__m512i) __builtin_ia32_gatherdiv8di ((__v8di) v1_old,
9386						__addr,
9387						(__v8di) __index, mask,
9388						__scale);
9389}
9390
9391extern __inline __m512i
9392__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9393_mm512_mask_i64gather_epi64 (__m512i __v1_old, __mmask8 __mask,
9394			     __m512i __index, long long const *__addr,
9395			     int __scale)
9396{
9397  return (__m512i) __builtin_ia32_gatherdiv8di ((__v8di) __v1_old,
9398						__addr,
9399						(__v8di) __index,
9400						__mask, __scale);
9401}
9402
9403extern __inline void
9404__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9405_mm512_i32scatter_ps (float *__addr, __m512i __index, __m512 __v1, int __scale)
9406{
9407  __builtin_ia32_scattersiv16sf (__addr, (__mmask16) 0xFFFF,
9408				 (__v16si) __index, (__v16sf) __v1, __scale);
9409}
9410
9411extern __inline void
9412__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9413_mm512_mask_i32scatter_ps (float *__addr, __mmask16 __mask,
9414			   __m512i __index, __m512 __v1, int __scale)
9415{
9416  __builtin_ia32_scattersiv16sf (__addr, __mask, (__v16si) __index,
9417				 (__v16sf) __v1, __scale);
9418}
9419
9420extern __inline void
9421__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9422_mm512_i32scatter_pd (double *__addr, __m256i __index, __m512d __v1,
9423		      int __scale)
9424{
9425  __builtin_ia32_scattersiv8df (__addr, (__mmask8) 0xFF,
9426				(__v8si) __index, (__v8df) __v1, __scale);
9427}
9428
9429extern __inline void
9430__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9431_mm512_mask_i32scatter_pd (double *__addr, __mmask8 __mask,
9432			   __m256i __index, __m512d __v1, int __scale)
9433{
9434  __builtin_ia32_scattersiv8df (__addr, __mask, (__v8si) __index,
9435				(__v8df) __v1, __scale);
9436}
9437
9438extern __inline void
9439__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9440_mm512_i64scatter_ps (float *__addr, __m512i __index, __m256 __v1, int __scale)
9441{
9442  __builtin_ia32_scatterdiv16sf (__addr, (__mmask8) 0xFF,
9443				 (__v8di) __index, (__v8sf) __v1, __scale);
9444}
9445
9446extern __inline void
9447__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9448_mm512_mask_i64scatter_ps (float *__addr, __mmask8 __mask,
9449			   __m512i __index, __m256 __v1, int __scale)
9450{
9451  __builtin_ia32_scatterdiv16sf (__addr, __mask, (__v8di) __index,
9452				 (__v8sf) __v1, __scale);
9453}
9454
9455extern __inline void
9456__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9457_mm512_i64scatter_pd (double *__addr, __m512i __index, __m512d __v1,
9458		      int __scale)
9459{
9460  __builtin_ia32_scatterdiv8df (__addr, (__mmask8) 0xFF,
9461				(__v8di) __index, (__v8df) __v1, __scale);
9462}
9463
9464extern __inline void
9465__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9466_mm512_mask_i64scatter_pd (double *__addr, __mmask8 __mask,
9467			   __m512i __index, __m512d __v1, int __scale)
9468{
9469  __builtin_ia32_scatterdiv8df (__addr, __mask, (__v8di) __index,
9470				(__v8df) __v1, __scale);
9471}
9472
9473extern __inline void
9474__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9475_mm512_i32scatter_epi32 (int *__addr, __m512i __index,
9476			 __m512i __v1, int __scale)
9477{
9478  __builtin_ia32_scattersiv16si (__addr, (__mmask16) 0xFFFF,
9479				 (__v16si) __index, (__v16si) __v1, __scale);
9480}
9481
9482extern __inline void
9483__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9484_mm512_mask_i32scatter_epi32 (int *__addr, __mmask16 __mask,
9485			      __m512i __index, __m512i __v1, int __scale)
9486{
9487  __builtin_ia32_scattersiv16si (__addr, __mask, (__v16si) __index,
9488				 (__v16si) __v1, __scale);
9489}
9490
9491extern __inline void
9492__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9493_mm512_i32scatter_epi64 (long long *__addr, __m256i __index,
9494			 __m512i __v1, int __scale)
9495{
9496  __builtin_ia32_scattersiv8di (__addr, (__mmask8) 0xFF,
9497				(__v8si) __index, (__v8di) __v1, __scale);
9498}
9499
9500extern __inline void
9501__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9502_mm512_mask_i32scatter_epi64 (long long *__addr, __mmask8 __mask,
9503			      __m256i __index, __m512i __v1, int __scale)
9504{
9505  __builtin_ia32_scattersiv8di (__addr, __mask, (__v8si) __index,
9506				(__v8di) __v1, __scale);
9507}
9508
9509extern __inline void
9510__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9511_mm512_i64scatter_epi32 (int *__addr, __m512i __index,
9512			 __m256i __v1, int __scale)
9513{
9514  __builtin_ia32_scatterdiv16si (__addr, (__mmask8) 0xFF,
9515				 (__v8di) __index, (__v8si) __v1, __scale);
9516}
9517
9518extern __inline void
9519__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9520_mm512_mask_i64scatter_epi32 (int *__addr, __mmask8 __mask,
9521			      __m512i __index, __m256i __v1, int __scale)
9522{
9523  __builtin_ia32_scatterdiv16si (__addr, __mask, (__v8di) __index,
9524				 (__v8si) __v1, __scale);
9525}
9526
9527extern __inline void
9528__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9529_mm512_i64scatter_epi64 (long long *__addr, __m512i __index,
9530			 __m512i __v1, int __scale)
9531{
9532  __builtin_ia32_scatterdiv8di (__addr, (__mmask8) 0xFF,
9533				(__v8di) __index, (__v8di) __v1, __scale);
9534}
9535
9536extern __inline void
9537__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9538_mm512_mask_i64scatter_epi64 (long long *__addr, __mmask8 __mask,
9539			      __m512i __index, __m512i __v1, int __scale)
9540{
9541  __builtin_ia32_scatterdiv8di (__addr, __mask, (__v8di) __index,
9542				(__v8di) __v1, __scale);
9543}
9544#else
9545#define _mm512_i32gather_ps(INDEX, ADDR, SCALE)				\
9546  (__m512) __builtin_ia32_gathersiv16sf ((__v16sf)_mm512_undefined_ps(),\
9547					 (float const *)ADDR,		\
9548					 (__v16si)(__m512i)INDEX,	\
9549					 (__mmask16)0xFFFF, (int)SCALE)
9550
9551#define _mm512_mask_i32gather_ps(V1OLD, MASK, INDEX, ADDR, SCALE)	\
9552  (__m512) __builtin_ia32_gathersiv16sf ((__v16sf)(__m512)V1OLD,	\
9553					 (float const *)ADDR,		\
9554					 (__v16si)(__m512i)INDEX,	\
9555					 (__mmask16)MASK, (int)SCALE)
9556
9557#define _mm512_i32gather_pd(INDEX, ADDR, SCALE)				\
9558  (__m512d) __builtin_ia32_gathersiv8df ((__v8df)_mm512_undefined_pd(),	\
9559					 (double const *)ADDR,		\
9560					 (__v8si)(__m256i)INDEX,	\
9561					 (__mmask8)0xFF, (int)SCALE)
9562
9563#define _mm512_mask_i32gather_pd(V1OLD, MASK, INDEX, ADDR, SCALE)	\
9564  (__m512d) __builtin_ia32_gathersiv8df ((__v8df)(__m512d)V1OLD,	\
9565					 (double const *)ADDR,		\
9566					 (__v8si)(__m256i)INDEX,	\
9567					 (__mmask8)MASK, (int)SCALE)
9568
9569#define _mm512_i64gather_ps(INDEX, ADDR, SCALE)				\
9570  (__m256) __builtin_ia32_gatherdiv16sf ((__v8sf)_mm256_undefined_ps(),	\
9571					 (float const *)ADDR,		\
9572					 (__v8di)(__m512i)INDEX,	\
9573					 (__mmask8)0xFF, (int)SCALE)
9574
9575#define _mm512_mask_i64gather_ps(V1OLD, MASK, INDEX, ADDR, SCALE)	\
9576  (__m256) __builtin_ia32_gatherdiv16sf ((__v8sf)(__m256)V1OLD,		\
9577					 (float const *)ADDR,		\
9578					 (__v8di)(__m512i)INDEX,	\
9579					 (__mmask8)MASK, (int)SCALE)
9580
9581#define _mm512_i64gather_pd(INDEX, ADDR, SCALE)				\
9582  (__m512d) __builtin_ia32_gatherdiv8df ((__v8df)_mm512_undefined_pd(),	\
9583					 (double const *)ADDR,		\
9584					 (__v8di)(__m512i)INDEX,	\
9585					 (__mmask8)0xFF, (int)SCALE)
9586
9587#define _mm512_mask_i64gather_pd(V1OLD, MASK, INDEX, ADDR, SCALE)	\
9588  (__m512d) __builtin_ia32_gatherdiv8df ((__v8df)(__m512d)V1OLD,	\
9589					 (double const *)ADDR,		\
9590					 (__v8di)(__m512i)INDEX,	\
9591					 (__mmask8)MASK, (int)SCALE)
9592
9593#define _mm512_i32gather_epi32(INDEX, ADDR, SCALE)			\
9594  (__m512i) __builtin_ia32_gathersiv16si ((__v16si)_mm512_undefined_si512 (),	\
9595					  (int const *)ADDR,		\
9596					  (__v16si)(__m512i)INDEX,	\
9597					  (__mmask16)0xFFFF, (int)SCALE)
9598
9599#define _mm512_mask_i32gather_epi32(V1OLD, MASK, INDEX, ADDR, SCALE)	\
9600  (__m512i) __builtin_ia32_gathersiv16si ((__v16si)(__m512i)V1OLD,	\
9601					  (int const *)ADDR,		\
9602					  (__v16si)(__m512i)INDEX,	\
9603					  (__mmask16)MASK, (int)SCALE)
9604
9605#define _mm512_i32gather_epi64(INDEX, ADDR, SCALE)			\
9606  (__m512i) __builtin_ia32_gathersiv8di ((__v8di)_mm512_undefined_si512 (),	\
9607					 (long long const *)ADDR,	\
9608					 (__v8si)(__m256i)INDEX,	\
9609					 (__mmask8)0xFF, (int)SCALE)
9610
9611#define _mm512_mask_i32gather_epi64(V1OLD, MASK, INDEX, ADDR, SCALE)	\
9612  (__m512i) __builtin_ia32_gathersiv8di ((__v8di)(__m512i)V1OLD,	\
9613					 (long long const *)ADDR,	\
9614					 (__v8si)(__m256i)INDEX,	\
9615					 (__mmask8)MASK, (int)SCALE)
9616
9617#define _mm512_i64gather_epi32(INDEX, ADDR, SCALE)			  \
9618  (__m256i) __builtin_ia32_gatherdiv16si ((__v8si)_mm256_undefined_si256(), \
9619					  (int const *)ADDR,		  \
9620					  (__v8di)(__m512i)INDEX,	  \
9621					  (__mmask8)0xFF, (int)SCALE)
9622
9623#define _mm512_mask_i64gather_epi32(V1OLD, MASK, INDEX, ADDR, SCALE)	\
9624  (__m256i) __builtin_ia32_gatherdiv16si ((__v8si)(__m256i)V1OLD,	\
9625					  (int const *)ADDR,		\
9626					  (__v8di)(__m512i)INDEX,	\
9627					  (__mmask8)MASK, (int)SCALE)
9628
9629#define _mm512_i64gather_epi64(INDEX, ADDR, SCALE)			\
9630  (__m512i) __builtin_ia32_gatherdiv8di ((__v8di)_mm512_undefined_si512 (),	\
9631					 (long long const *)ADDR,	\
9632					 (__v8di)(__m512i)INDEX,	\
9633					 (__mmask8)0xFF, (int)SCALE)
9634
9635#define _mm512_mask_i64gather_epi64(V1OLD, MASK, INDEX, ADDR, SCALE)	\
9636  (__m512i) __builtin_ia32_gatherdiv8di ((__v8di)(__m512i)V1OLD,	\
9637					 (long long const *)ADDR,	\
9638					 (__v8di)(__m512i)INDEX,	\
9639					 (__mmask8)MASK, (int)SCALE)
9640
9641#define _mm512_i32scatter_ps(ADDR, INDEX, V1, SCALE)			\
9642  __builtin_ia32_scattersiv16sf ((float *)ADDR, (__mmask16)0xFFFF,	\
9643				 (__v16si)(__m512i)INDEX,		\
9644				 (__v16sf)(__m512)V1, (int)SCALE)
9645
9646#define _mm512_mask_i32scatter_ps(ADDR, MASK, INDEX, V1, SCALE)		\
9647  __builtin_ia32_scattersiv16sf ((float *)ADDR, (__mmask16)MASK,		\
9648				 (__v16si)(__m512i)INDEX,		\
9649				 (__v16sf)(__m512)V1, (int)SCALE)
9650
9651#define _mm512_i32scatter_pd(ADDR, INDEX, V1, SCALE)			\
9652  __builtin_ia32_scattersiv8df ((double *)ADDR, (__mmask8)0xFF,		\
9653				(__v8si)(__m256i)INDEX,			\
9654				(__v8df)(__m512d)V1, (int)SCALE)
9655
9656#define _mm512_mask_i32scatter_pd(ADDR, MASK, INDEX, V1, SCALE)		\
9657  __builtin_ia32_scattersiv8df ((double *)ADDR, (__mmask8)MASK,		\
9658				(__v8si)(__m256i)INDEX,			\
9659				(__v8df)(__m512d)V1, (int)SCALE)
9660
9661#define _mm512_i64scatter_ps(ADDR, INDEX, V1, SCALE)			\
9662  __builtin_ia32_scatterdiv16sf ((float *)ADDR, (__mmask8)0xFF,		\
9663				 (__v8di)(__m512i)INDEX,		\
9664				 (__v8sf)(__m256)V1, (int)SCALE)
9665
9666#define _mm512_mask_i64scatter_ps(ADDR, MASK, INDEX, V1, SCALE)		\
9667  __builtin_ia32_scatterdiv16sf ((float *)ADDR, (__mmask16)MASK,		\
9668				 (__v8di)(__m512i)INDEX,		\
9669				 (__v8sf)(__m256)V1, (int)SCALE)
9670
9671#define _mm512_i64scatter_pd(ADDR, INDEX, V1, SCALE)			\
9672  __builtin_ia32_scatterdiv8df ((double *)ADDR, (__mmask8)0xFF,		\
9673				(__v8di)(__m512i)INDEX,			\
9674				(__v8df)(__m512d)V1, (int)SCALE)
9675
9676#define _mm512_mask_i64scatter_pd(ADDR, MASK, INDEX, V1, SCALE)		\
9677  __builtin_ia32_scatterdiv8df ((double *)ADDR, (__mmask8)MASK,		\
9678				(__v8di)(__m512i)INDEX,			\
9679				(__v8df)(__m512d)V1, (int)SCALE)
9680
9681#define _mm512_i32scatter_epi32(ADDR, INDEX, V1, SCALE)			\
9682  __builtin_ia32_scattersiv16si ((int *)ADDR, (__mmask16)0xFFFF,	\
9683				 (__v16si)(__m512i)INDEX,		\
9684				 (__v16si)(__m512i)V1, (int)SCALE)
9685
9686#define _mm512_mask_i32scatter_epi32(ADDR, MASK, INDEX, V1, SCALE)	\
9687  __builtin_ia32_scattersiv16si ((int *)ADDR, (__mmask16)MASK,		\
9688				 (__v16si)(__m512i)INDEX,		\
9689				 (__v16si)(__m512i)V1, (int)SCALE)
9690
9691#define _mm512_i32scatter_epi64(ADDR, INDEX, V1, SCALE)			\
9692  __builtin_ia32_scattersiv8di ((long long *)ADDR, (__mmask8)0xFF,	\
9693				(__v8si)(__m256i)INDEX,			\
9694				(__v8di)(__m512i)V1, (int)SCALE)
9695
9696#define _mm512_mask_i32scatter_epi64(ADDR, MASK, INDEX, V1, SCALE)	\
9697  __builtin_ia32_scattersiv8di ((long long *)ADDR, (__mmask8)MASK,	\
9698				(__v8si)(__m256i)INDEX,			\
9699				(__v8di)(__m512i)V1, (int)SCALE)
9700
9701#define _mm512_i64scatter_epi32(ADDR, INDEX, V1, SCALE)			\
9702  __builtin_ia32_scatterdiv16si ((int *)ADDR, (__mmask8)0xFF,		\
9703				 (__v8di)(__m512i)INDEX,		\
9704				 (__v8si)(__m256i)V1, (int)SCALE)
9705
9706#define _mm512_mask_i64scatter_epi32(ADDR, MASK, INDEX, V1, SCALE)	\
9707  __builtin_ia32_scatterdiv16si ((int *)ADDR, (__mmask8)MASK,		\
9708				 (__v8di)(__m512i)INDEX,		\
9709				 (__v8si)(__m256i)V1, (int)SCALE)
9710
9711#define _mm512_i64scatter_epi64(ADDR, INDEX, V1, SCALE)			\
9712  __builtin_ia32_scatterdiv8di ((long long *)ADDR, (__mmask8)0xFF,	\
9713				(__v8di)(__m512i)INDEX,			\
9714				(__v8di)(__m512i)V1, (int)SCALE)
9715
9716#define _mm512_mask_i64scatter_epi64(ADDR, MASK, INDEX, V1, SCALE)	\
9717  __builtin_ia32_scatterdiv8di ((long long *)ADDR, (__mmask8)MASK,	\
9718				(__v8di)(__m512i)INDEX,			\
9719				(__v8di)(__m512i)V1, (int)SCALE)
9720#endif
9721
9722extern __inline __m512d
9723__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9724_mm512_mask_compress_pd (__m512d __W, __mmask8 __U, __m512d __A)
9725{
9726  return (__m512d) __builtin_ia32_compressdf512_mask ((__v8df) __A,
9727						      (__v8df) __W,
9728						      (__mmask8) __U);
9729}
9730
9731extern __inline __m512d
9732__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9733_mm512_maskz_compress_pd (__mmask8 __U, __m512d __A)
9734{
9735  return (__m512d) __builtin_ia32_compressdf512_mask ((__v8df) __A,
9736						      (__v8df)
9737						      _mm512_setzero_pd (),
9738						      (__mmask8) __U);
9739}
9740
9741extern __inline void
9742__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9743_mm512_mask_compressstoreu_pd (void *__P, __mmask8 __U, __m512d __A)
9744{
9745  __builtin_ia32_compressstoredf512_mask ((__v8df *) __P, (__v8df) __A,
9746					  (__mmask8) __U);
9747}
9748
9749extern __inline __m512
9750__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9751_mm512_mask_compress_ps (__m512 __W, __mmask16 __U, __m512 __A)
9752{
9753  return (__m512) __builtin_ia32_compresssf512_mask ((__v16sf) __A,
9754						     (__v16sf) __W,
9755						     (__mmask16) __U);
9756}
9757
9758extern __inline __m512
9759__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9760_mm512_maskz_compress_ps (__mmask16 __U, __m512 __A)
9761{
9762  return (__m512) __builtin_ia32_compresssf512_mask ((__v16sf) __A,
9763						     (__v16sf)
9764						     _mm512_setzero_ps (),
9765						     (__mmask16) __U);
9766}
9767
9768extern __inline void
9769__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9770_mm512_mask_compressstoreu_ps (void *__P, __mmask16 __U, __m512 __A)
9771{
9772  __builtin_ia32_compressstoresf512_mask ((__v16sf *) __P, (__v16sf) __A,
9773					  (__mmask16) __U);
9774}
9775
9776extern __inline __m512i
9777__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9778_mm512_mask_compress_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
9779{
9780  return (__m512i) __builtin_ia32_compressdi512_mask ((__v8di) __A,
9781						      (__v8di) __W,
9782						      (__mmask8) __U);
9783}
9784
9785extern __inline __m512i
9786__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9787_mm512_maskz_compress_epi64 (__mmask8 __U, __m512i __A)
9788{
9789  return (__m512i) __builtin_ia32_compressdi512_mask ((__v8di) __A,
9790						      (__v8di)
9791						      _mm512_setzero_si512 (),
9792						      (__mmask8) __U);
9793}
9794
9795extern __inline void
9796__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9797_mm512_mask_compressstoreu_epi64 (void *__P, __mmask8 __U, __m512i __A)
9798{
9799  __builtin_ia32_compressstoredi512_mask ((__v8di *) __P, (__v8di) __A,
9800					  (__mmask8) __U);
9801}
9802
9803extern __inline __m512i
9804__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9805_mm512_mask_compress_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
9806{
9807  return (__m512i) __builtin_ia32_compresssi512_mask ((__v16si) __A,
9808						      (__v16si) __W,
9809						      (__mmask16) __U);
9810}
9811
9812extern __inline __m512i
9813__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9814_mm512_maskz_compress_epi32 (__mmask16 __U, __m512i __A)
9815{
9816  return (__m512i) __builtin_ia32_compresssi512_mask ((__v16si) __A,
9817						      (__v16si)
9818						      _mm512_setzero_si512 (),
9819						      (__mmask16) __U);
9820}
9821
9822extern __inline void
9823__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9824_mm512_mask_compressstoreu_epi32 (void *__P, __mmask16 __U, __m512i __A)
9825{
9826  __builtin_ia32_compressstoresi512_mask ((__v16si *) __P, (__v16si) __A,
9827					  (__mmask16) __U);
9828}
9829
9830extern __inline __m512d
9831__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9832_mm512_mask_expand_pd (__m512d __W, __mmask8 __U, __m512d __A)
9833{
9834  return (__m512d) __builtin_ia32_expanddf512_mask ((__v8df) __A,
9835						    (__v8df) __W,
9836						    (__mmask8) __U);
9837}
9838
9839extern __inline __m512d
9840__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9841_mm512_maskz_expand_pd (__mmask8 __U, __m512d __A)
9842{
9843  return (__m512d) __builtin_ia32_expanddf512_maskz ((__v8df) __A,
9844						     (__v8df)
9845						     _mm512_setzero_pd (),
9846						     (__mmask8) __U);
9847}
9848
9849extern __inline __m512d
9850__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9851_mm512_mask_expandloadu_pd (__m512d __W, __mmask8 __U, void const *__P)
9852{
9853  return (__m512d) __builtin_ia32_expandloaddf512_mask ((const __v8df *) __P,
9854							(__v8df) __W,
9855							(__mmask8) __U);
9856}
9857
9858extern __inline __m512d
9859__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9860_mm512_maskz_expandloadu_pd (__mmask8 __U, void const *__P)
9861{
9862  return (__m512d) __builtin_ia32_expandloaddf512_maskz ((const __v8df *) __P,
9863							 (__v8df)
9864							 _mm512_setzero_pd (),
9865							 (__mmask8) __U);
9866}
9867
9868extern __inline __m512
9869__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9870_mm512_mask_expand_ps (__m512 __W, __mmask16 __U, __m512 __A)
9871{
9872  return (__m512) __builtin_ia32_expandsf512_mask ((__v16sf) __A,
9873						   (__v16sf) __W,
9874						   (__mmask16) __U);
9875}
9876
9877extern __inline __m512
9878__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9879_mm512_maskz_expand_ps (__mmask16 __U, __m512 __A)
9880{
9881  return (__m512) __builtin_ia32_expandsf512_maskz ((__v16sf) __A,
9882						    (__v16sf)
9883						    _mm512_setzero_ps (),
9884						    (__mmask16) __U);
9885}
9886
9887extern __inline __m512
9888__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9889_mm512_mask_expandloadu_ps (__m512 __W, __mmask16 __U, void const *__P)
9890{
9891  return (__m512) __builtin_ia32_expandloadsf512_mask ((const __v16sf *) __P,
9892						       (__v16sf) __W,
9893						       (__mmask16) __U);
9894}
9895
9896extern __inline __m512
9897__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9898_mm512_maskz_expandloadu_ps (__mmask16 __U, void const *__P)
9899{
9900  return (__m512) __builtin_ia32_expandloadsf512_maskz ((const __v16sf *) __P,
9901							(__v16sf)
9902							_mm512_setzero_ps (),
9903							(__mmask16) __U);
9904}
9905
9906extern __inline __m512i
9907__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9908_mm512_mask_expand_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
9909{
9910  return (__m512i) __builtin_ia32_expanddi512_mask ((__v8di) __A,
9911						    (__v8di) __W,
9912						    (__mmask8) __U);
9913}
9914
9915extern __inline __m512i
9916__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9917_mm512_maskz_expand_epi64 (__mmask8 __U, __m512i __A)
9918{
9919  return (__m512i) __builtin_ia32_expanddi512_maskz ((__v8di) __A,
9920						     (__v8di)
9921						     _mm512_setzero_si512 (),
9922						     (__mmask8) __U);
9923}
9924
9925extern __inline __m512i
9926__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9927_mm512_mask_expandloadu_epi64 (__m512i __W, __mmask8 __U, void const *__P)
9928{
9929  return (__m512i) __builtin_ia32_expandloaddi512_mask ((const __v8di *) __P,
9930							(__v8di) __W,
9931							(__mmask8) __U);
9932}
9933
9934extern __inline __m512i
9935__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9936_mm512_maskz_expandloadu_epi64 (__mmask8 __U, void const *__P)
9937{
9938  return (__m512i)
9939	 __builtin_ia32_expandloaddi512_maskz ((const __v8di *) __P,
9940					       (__v8di)
9941					       _mm512_setzero_si512 (),
9942					       (__mmask8) __U);
9943}
9944
9945extern __inline __m512i
9946__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9947_mm512_mask_expand_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
9948{
9949  return (__m512i) __builtin_ia32_expandsi512_mask ((__v16si) __A,
9950						    (__v16si) __W,
9951						    (__mmask16) __U);
9952}
9953
9954extern __inline __m512i
9955__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9956_mm512_maskz_expand_epi32 (__mmask16 __U, __m512i __A)
9957{
9958  return (__m512i) __builtin_ia32_expandsi512_maskz ((__v16si) __A,
9959						     (__v16si)
9960						     _mm512_setzero_si512 (),
9961						     (__mmask16) __U);
9962}
9963
9964extern __inline __m512i
9965__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9966_mm512_mask_expandloadu_epi32 (__m512i __W, __mmask16 __U, void const *__P)
9967{
9968  return (__m512i) __builtin_ia32_expandloadsi512_mask ((const __v16si *) __P,
9969							(__v16si) __W,
9970							(__mmask16) __U);
9971}
9972
9973extern __inline __m512i
9974__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9975_mm512_maskz_expandloadu_epi32 (__mmask16 __U, void const *__P)
9976{
9977  return (__m512i) __builtin_ia32_expandloadsi512_maskz ((const __v16si *) __P,
9978							 (__v16si)
9979							 _mm512_setzero_si512
9980							 (), (__mmask16) __U);
9981}
9982
9983/* Mask arithmetic operations */
9984extern __inline __mmask16
9985__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9986_mm512_kand (__mmask16 __A, __mmask16 __B)
9987{
9988  return (__mmask16) __builtin_ia32_kandhi ((__mmask16) __A, (__mmask16) __B);
9989}
9990
9991extern __inline __mmask16
9992__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9993_mm512_kandn (__mmask16 __A, __mmask16 __B)
9994{
9995  return (__mmask16) __builtin_ia32_kandnhi ((__mmask16) __A, (__mmask16) __B);
9996}
9997
9998extern __inline __mmask16
9999__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10000_mm512_kor (__mmask16 __A, __mmask16 __B)
10001{
10002  return (__mmask16) __builtin_ia32_korhi ((__mmask16) __A, (__mmask16) __B);
10003}
10004
10005extern __inline int
10006__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10007_mm512_kortestz (__mmask16 __A, __mmask16 __B)
10008{
10009  return (__mmask16) __builtin_ia32_kortestzhi ((__mmask16) __A,
10010						(__mmask16) __B);
10011}
10012
10013extern __inline int
10014__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10015_mm512_kortestc (__mmask16 __A, __mmask16 __B)
10016{
10017  return (__mmask16) __builtin_ia32_kortestchi ((__mmask16) __A,
10018						(__mmask16) __B);
10019}
10020
10021extern __inline __mmask16
10022__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10023_mm512_kxnor (__mmask16 __A, __mmask16 __B)
10024{
10025  return (__mmask16) __builtin_ia32_kxnorhi ((__mmask16) __A, (__mmask16) __B);
10026}
10027
10028extern __inline __mmask16
10029__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10030_mm512_kxor (__mmask16 __A, __mmask16 __B)
10031{
10032  return (__mmask16) __builtin_ia32_kxorhi ((__mmask16) __A, (__mmask16) __B);
10033}
10034
10035extern __inline __mmask16
10036__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10037_mm512_knot (__mmask16 __A)
10038{
10039  return (__mmask16) __builtin_ia32_knothi ((__mmask16) __A);
10040}
10041
10042extern __inline __mmask16
10043__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10044_mm512_kunpackb (__mmask16 __A, __mmask16 __B)
10045{
10046  return (__mmask16) __builtin_ia32_kunpckhi ((__mmask16) __A, (__mmask16) __B);
10047}
10048
10049#ifdef __OPTIMIZE__
10050extern __inline __m512i
10051__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10052_mm512_maskz_inserti32x4 (__mmask16 __B, __m512i __C, __m128i __D,
10053			  const int __imm)
10054{
10055  return (__m512i) __builtin_ia32_inserti32x4_mask ((__v16si) __C,
10056						    (__v4si) __D,
10057						    __imm,
10058						    (__v16si)
10059						    _mm512_setzero_si512 (),
10060						    __B);
10061}
10062
10063extern __inline __m512
10064__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10065_mm512_maskz_insertf32x4 (__mmask16 __B, __m512 __C, __m128 __D,
10066			  const int __imm)
10067{
10068  return (__m512) __builtin_ia32_insertf32x4_mask ((__v16sf) __C,
10069						   (__v4sf) __D,
10070						   __imm,
10071						   (__v16sf)
10072						   _mm512_setzero_ps (), __B);
10073}
10074
10075extern __inline __m512i
10076__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10077_mm512_mask_inserti32x4 (__m512i __A, __mmask16 __B, __m512i __C,
10078			 __m128i __D, const int __imm)
10079{
10080  return (__m512i) __builtin_ia32_inserti32x4_mask ((__v16si) __C,
10081						    (__v4si) __D,
10082						    __imm,
10083						    (__v16si) __A,
10084						    __B);
10085}
10086
10087extern __inline __m512
10088__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10089_mm512_mask_insertf32x4 (__m512 __A, __mmask16 __B, __m512 __C,
10090			 __m128 __D, const int __imm)
10091{
10092  return (__m512) __builtin_ia32_insertf32x4_mask ((__v16sf) __C,
10093						   (__v4sf) __D,
10094						   __imm,
10095						   (__v16sf) __A, __B);
10096}
10097#else
10098#define _mm512_maskz_insertf32x4(A, X, Y, C)                            \
10099  ((__m512) __builtin_ia32_insertf32x4_mask ((__v16sf)(__m512) (X),     \
10100    (__v4sf)(__m128) (Y), (int) (C), (__v16sf)_mm512_setzero_ps(),      \
10101    (__mmask8)(A)))
10102
10103#define _mm512_maskz_inserti32x4(A, X, Y, C)                            \
10104  ((__m512i) __builtin_ia32_inserti32x4_mask ((__v16si)(__m512i) (X),   \
10105    (__v4si)(__m128i) (Y), (int) (C), (__v16si)_mm512_setzero_si512 (),     \
10106    (__mmask8)(A)))
10107
10108#define _mm512_mask_insertf32x4(A, B, X, Y, C)                          \
10109  ((__m512) __builtin_ia32_insertf32x4_mask ((__v16sf)(__m512) (X),     \
10110    (__v4sf)(__m128) (Y), (int) (C), (__v16sf)(__m512) (A),             \
10111					     (__mmask8)(B)))
10112
10113#define _mm512_mask_inserti32x4(A, B, X, Y, C)                          \
10114  ((__m512i) __builtin_ia32_inserti32x4_mask ((__v16si)(__m512i) (X),   \
10115    (__v4si)(__m128i) (Y), (int) (C), (__v16si)(__m512i) (A),           \
10116					      (__mmask8)(B)))
10117#endif
10118
10119extern __inline __m512i
10120__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10121_mm512_max_epi64 (__m512i __A, __m512i __B)
10122{
10123  return (__m512i) __builtin_ia32_pmaxsq512_mask ((__v8di) __A,
10124						  (__v8di) __B,
10125						  (__v8di)
10126						  _mm512_undefined_si512 (),
10127						  (__mmask8) -1);
10128}
10129
10130extern __inline __m512i
10131__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10132_mm512_maskz_max_epi64 (__mmask8 __M, __m512i __A, __m512i __B)
10133{
10134  return (__m512i) __builtin_ia32_pmaxsq512_mask ((__v8di) __A,
10135						  (__v8di) __B,
10136						  (__v8di)
10137						  _mm512_setzero_si512 (),
10138						  __M);
10139}
10140
10141extern __inline __m512i
10142__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10143_mm512_mask_max_epi64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
10144{
10145  return (__m512i) __builtin_ia32_pmaxsq512_mask ((__v8di) __A,
10146						  (__v8di) __B,
10147						  (__v8di) __W, __M);
10148}
10149
10150extern __inline __m512i
10151__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10152_mm512_min_epi64 (__m512i __A, __m512i __B)
10153{
10154  return (__m512i) __builtin_ia32_pminsq512_mask ((__v8di) __A,
10155						  (__v8di) __B,
10156						  (__v8di)
10157						  _mm512_undefined_si512 (),
10158						  (__mmask8) -1);
10159}
10160
10161extern __inline __m512i
10162__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10163_mm512_mask_min_epi64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
10164{
10165  return (__m512i) __builtin_ia32_pminsq512_mask ((__v8di) __A,
10166						  (__v8di) __B,
10167						  (__v8di) __W, __M);
10168}
10169
10170extern __inline __m512i
10171__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10172_mm512_maskz_min_epi64 (__mmask8 __M, __m512i __A, __m512i __B)
10173{
10174  return (__m512i) __builtin_ia32_pminsq512_mask ((__v8di) __A,
10175						  (__v8di) __B,
10176						  (__v8di)
10177						  _mm512_setzero_si512 (),
10178						  __M);
10179}
10180
10181extern __inline __m512i
10182__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10183_mm512_max_epu64 (__m512i __A, __m512i __B)
10184{
10185  return (__m512i) __builtin_ia32_pmaxuq512_mask ((__v8di) __A,
10186						  (__v8di) __B,
10187						  (__v8di)
10188						  _mm512_undefined_si512 (),
10189						  (__mmask8) -1);
10190}
10191
10192extern __inline __m512i
10193__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10194_mm512_maskz_max_epu64 (__mmask8 __M, __m512i __A, __m512i __B)
10195{
10196  return (__m512i) __builtin_ia32_pmaxuq512_mask ((__v8di) __A,
10197						  (__v8di) __B,
10198						  (__v8di)
10199						  _mm512_setzero_si512 (),
10200						  __M);
10201}
10202
10203extern __inline __m512i
10204__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10205_mm512_mask_max_epu64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
10206{
10207  return (__m512i) __builtin_ia32_pmaxuq512_mask ((__v8di) __A,
10208						  (__v8di) __B,
10209						  (__v8di) __W, __M);
10210}
10211
10212extern __inline __m512i
10213__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10214_mm512_min_epu64 (__m512i __A, __m512i __B)
10215{
10216  return (__m512i) __builtin_ia32_pminuq512_mask ((__v8di) __A,
10217						  (__v8di) __B,
10218						  (__v8di)
10219						  _mm512_undefined_si512 (),
10220						  (__mmask8) -1);
10221}
10222
10223extern __inline __m512i
10224__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10225_mm512_mask_min_epu64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
10226{
10227  return (__m512i) __builtin_ia32_pminuq512_mask ((__v8di) __A,
10228						  (__v8di) __B,
10229						  (__v8di) __W, __M);
10230}
10231
10232extern __inline __m512i
10233__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10234_mm512_maskz_min_epu64 (__mmask8 __M, __m512i __A, __m512i __B)
10235{
10236  return (__m512i) __builtin_ia32_pminuq512_mask ((__v8di) __A,
10237						  (__v8di) __B,
10238						  (__v8di)
10239						  _mm512_setzero_si512 (),
10240						  __M);
10241}
10242
10243extern __inline __m512i
10244__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10245_mm512_max_epi32 (__m512i __A, __m512i __B)
10246{
10247  return (__m512i) __builtin_ia32_pmaxsd512_mask ((__v16si) __A,
10248						  (__v16si) __B,
10249						  (__v16si)
10250						  _mm512_undefined_si512 (),
10251						  (__mmask16) -1);
10252}
10253
10254extern __inline __m512i
10255__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10256_mm512_maskz_max_epi32 (__mmask16 __M, __m512i __A, __m512i __B)
10257{
10258  return (__m512i) __builtin_ia32_pmaxsd512_mask ((__v16si) __A,
10259						  (__v16si) __B,
10260						  (__v16si)
10261						  _mm512_setzero_si512 (),
10262						  __M);
10263}
10264
10265extern __inline __m512i
10266__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10267_mm512_mask_max_epi32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
10268{
10269  return (__m512i) __builtin_ia32_pmaxsd512_mask ((__v16si) __A,
10270						  (__v16si) __B,
10271						  (__v16si) __W, __M);
10272}
10273
10274extern __inline __m512i
10275__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10276_mm512_min_epi32 (__m512i __A, __m512i __B)
10277{
10278  return (__m512i) __builtin_ia32_pminsd512_mask ((__v16si) __A,
10279						  (__v16si) __B,
10280						  (__v16si)
10281						  _mm512_undefined_si512 (),
10282						  (__mmask16) -1);
10283}
10284
10285extern __inline __m512i
10286__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10287_mm512_maskz_min_epi32 (__mmask16 __M, __m512i __A, __m512i __B)
10288{
10289  return (__m512i) __builtin_ia32_pminsd512_mask ((__v16si) __A,
10290						  (__v16si) __B,
10291						  (__v16si)
10292						  _mm512_setzero_si512 (),
10293						  __M);
10294}
10295
10296extern __inline __m512i
10297__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10298_mm512_mask_min_epi32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
10299{
10300  return (__m512i) __builtin_ia32_pminsd512_mask ((__v16si) __A,
10301						  (__v16si) __B,
10302						  (__v16si) __W, __M);
10303}
10304
10305extern __inline __m512i
10306__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10307_mm512_max_epu32 (__m512i __A, __m512i __B)
10308{
10309  return (__m512i) __builtin_ia32_pmaxud512_mask ((__v16si) __A,
10310						  (__v16si) __B,
10311						  (__v16si)
10312						  _mm512_undefined_si512 (),
10313						  (__mmask16) -1);
10314}
10315
10316extern __inline __m512i
10317__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10318_mm512_maskz_max_epu32 (__mmask16 __M, __m512i __A, __m512i __B)
10319{
10320  return (__m512i) __builtin_ia32_pmaxud512_mask ((__v16si) __A,
10321						  (__v16si) __B,
10322						  (__v16si)
10323						  _mm512_setzero_si512 (),
10324						  __M);
10325}
10326
10327extern __inline __m512i
10328__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10329_mm512_mask_max_epu32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
10330{
10331  return (__m512i) __builtin_ia32_pmaxud512_mask ((__v16si) __A,
10332						  (__v16si) __B,
10333						  (__v16si) __W, __M);
10334}
10335
10336extern __inline __m512i
10337__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10338_mm512_min_epu32 (__m512i __A, __m512i __B)
10339{
10340  return (__m512i) __builtin_ia32_pminud512_mask ((__v16si) __A,
10341						  (__v16si) __B,
10342						  (__v16si)
10343						  _mm512_undefined_si512 (),
10344						  (__mmask16) -1);
10345}
10346
10347extern __inline __m512i
10348__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10349_mm512_maskz_min_epu32 (__mmask16 __M, __m512i __A, __m512i __B)
10350{
10351  return (__m512i) __builtin_ia32_pminud512_mask ((__v16si) __A,
10352						  (__v16si) __B,
10353						  (__v16si)
10354						  _mm512_setzero_si512 (),
10355						  __M);
10356}
10357
10358extern __inline __m512i
10359__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10360_mm512_mask_min_epu32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
10361{
10362  return (__m512i) __builtin_ia32_pminud512_mask ((__v16si) __A,
10363						  (__v16si) __B,
10364						  (__v16si) __W, __M);
10365}
10366
10367extern __inline __m512
10368__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10369_mm512_unpacklo_ps (__m512 __A, __m512 __B)
10370{
10371  return (__m512) __builtin_ia32_unpcklps512_mask ((__v16sf) __A,
10372						   (__v16sf) __B,
10373						   (__v16sf)
10374						   _mm512_undefined_ps (),
10375						   (__mmask16) -1);
10376}
10377
10378extern __inline __m512
10379__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10380_mm512_mask_unpacklo_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
10381{
10382  return (__m512) __builtin_ia32_unpcklps512_mask ((__v16sf) __A,
10383						   (__v16sf) __B,
10384						   (__v16sf) __W,
10385						   (__mmask16) __U);
10386}
10387
10388extern __inline __m512
10389__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10390_mm512_maskz_unpacklo_ps (__mmask16 __U, __m512 __A, __m512 __B)
10391{
10392  return (__m512) __builtin_ia32_unpcklps512_mask ((__v16sf) __A,
10393						   (__v16sf) __B,
10394						   (__v16sf)
10395						   _mm512_setzero_ps (),
10396						   (__mmask16) __U);
10397}
10398
10399#ifdef __OPTIMIZE__
10400extern __inline __m128d
10401__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10402_mm_max_round_sd (__m128d __A, __m128d __B, const int __R)
10403{
10404  return (__m128d) __builtin_ia32_maxsd_round ((__v2df) __A,
10405					       (__v2df) __B,
10406					       __R);
10407}
10408
10409extern __inline __m128
10410__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10411_mm_max_round_ss (__m128 __A, __m128 __B, const int __R)
10412{
10413  return (__m128) __builtin_ia32_maxss_round ((__v4sf) __A,
10414					      (__v4sf) __B,
10415					      __R);
10416}
10417
10418extern __inline __m128d
10419__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10420_mm_min_round_sd (__m128d __A, __m128d __B, const int __R)
10421{
10422  return (__m128d) __builtin_ia32_minsd_round ((__v2df) __A,
10423					       (__v2df) __B,
10424					       __R);
10425}
10426
10427extern __inline __m128
10428__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10429_mm_min_round_ss (__m128 __A, __m128 __B, const int __R)
10430{
10431  return (__m128) __builtin_ia32_minss_round ((__v4sf) __A,
10432					      (__v4sf) __B,
10433					      __R);
10434}
10435
10436#else
10437#define _mm_max_round_sd(A, B, C)            \
10438    (__m128d)__builtin_ia32_addsd_round(A, B, C)
10439
10440#define _mm_max_round_ss(A, B, C)            \
10441    (__m128)__builtin_ia32_addss_round(A, B, C)
10442
10443#define _mm_min_round_sd(A, B, C)            \
10444    (__m128d)__builtin_ia32_subsd_round(A, B, C)
10445
10446#define _mm_min_round_ss(A, B, C)            \
10447    (__m128)__builtin_ia32_subss_round(A, B, C)
10448#endif
10449
10450extern __inline __m512d
10451__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10452_mm512_mask_blend_pd (__mmask8 __U, __m512d __A, __m512d __W)
10453{
10454  return (__m512d) __builtin_ia32_blendmpd_512_mask ((__v8df) __A,
10455						     (__v8df) __W,
10456						     (__mmask8) __U);
10457}
10458
10459extern __inline __m512
10460__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10461_mm512_mask_blend_ps (__mmask16 __U, __m512 __A, __m512 __W)
10462{
10463  return (__m512) __builtin_ia32_blendmps_512_mask ((__v16sf) __A,
10464						    (__v16sf) __W,
10465						    (__mmask16) __U);
10466}
10467
10468extern __inline __m512i
10469__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10470_mm512_mask_blend_epi64 (__mmask8 __U, __m512i __A, __m512i __W)
10471{
10472  return (__m512i) __builtin_ia32_blendmq_512_mask ((__v8di) __A,
10473						    (__v8di) __W,
10474						    (__mmask8) __U);
10475}
10476
10477extern __inline __m512i
10478__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10479_mm512_mask_blend_epi32 (__mmask16 __U, __m512i __A, __m512i __W)
10480{
10481  return (__m512i) __builtin_ia32_blendmd_512_mask ((__v16si) __A,
10482						    (__v16si) __W,
10483						    (__mmask16) __U);
10484}
10485
10486#ifdef __OPTIMIZE__
10487extern __inline __m128d
10488__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10489_mm_fmadd_round_sd (__m128d __W, __m128d __A, __m128d __B, const int __R)
10490{
10491  return (__m128d) __builtin_ia32_vfmaddsd3_round ((__v2df) __W,
10492						   (__v2df) __A,
10493						   (__v2df) __B,
10494						   __R);
10495}
10496
10497extern __inline __m128
10498__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10499_mm_fmadd_round_ss (__m128 __W, __m128 __A, __m128 __B, const int __R)
10500{
10501  return (__m128) __builtin_ia32_vfmaddss3_round ((__v4sf) __W,
10502						  (__v4sf) __A,
10503						  (__v4sf) __B,
10504						  __R);
10505}
10506
10507extern __inline __m128d
10508__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10509_mm_fmsub_round_sd (__m128d __W, __m128d __A, __m128d __B, const int __R)
10510{
10511  return (__m128d) __builtin_ia32_vfmaddsd3_round ((__v2df) __W,
10512						   (__v2df) __A,
10513						   -(__v2df) __B,
10514						   __R);
10515}
10516
10517extern __inline __m128
10518__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10519_mm_fmsub_round_ss (__m128 __W, __m128 __A, __m128 __B, const int __R)
10520{
10521  return (__m128) __builtin_ia32_vfmaddss3_round ((__v4sf) __W,
10522						  (__v4sf) __A,
10523						  -(__v4sf) __B,
10524						  __R);
10525}
10526
10527extern __inline __m128d
10528__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10529_mm_fnmadd_round_sd (__m128d __W, __m128d __A, __m128d __B, const int __R)
10530{
10531  return (__m128d) __builtin_ia32_vfmaddsd3_round ((__v2df) __W,
10532						   -(__v2df) __A,
10533						   (__v2df) __B,
10534						   __R);
10535}
10536
10537extern __inline __m128
10538__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10539_mm_fnmadd_round_ss (__m128 __W, __m128 __A, __m128 __B, const int __R)
10540{
10541  return (__m128) __builtin_ia32_vfmaddss3_round ((__v4sf) __W,
10542						  -(__v4sf) __A,
10543						  (__v4sf) __B,
10544						  __R);
10545}
10546
10547extern __inline __m128d
10548__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10549_mm_fnmsub_round_sd (__m128d __W, __m128d __A, __m128d __B, const int __R)
10550{
10551  return (__m128d) __builtin_ia32_vfmaddsd3_round ((__v2df) __W,
10552						   -(__v2df) __A,
10553						   -(__v2df) __B,
10554						   __R);
10555}
10556
10557extern __inline __m128
10558__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10559_mm_fnmsub_round_ss (__m128 __W, __m128 __A, __m128 __B, const int __R)
10560{
10561  return (__m128) __builtin_ia32_vfmaddss3_round ((__v4sf) __W,
10562						  -(__v4sf) __A,
10563						  -(__v4sf) __B,
10564						  __R);
10565}
10566#else
10567#define _mm_fmadd_round_sd(A, B, C, R)            \
10568    (__m128d)__builtin_ia32_vfmaddsd3_round(A, B, C, R)
10569
10570#define _mm_fmadd_round_ss(A, B, C, R)            \
10571    (__m128)__builtin_ia32_vfmaddss3_round(A, B, C, R)
10572
10573#define _mm_fmsub_round_sd(A, B, C, R)            \
10574    (__m128d)__builtin_ia32_vfmaddsd3_round(A, B, -(C), R)
10575
10576#define _mm_fmsub_round_ss(A, B, C, R)            \
10577    (__m128)__builtin_ia32_vfmaddss3_round(A, B, -(C), R)
10578
10579#define _mm_fnmadd_round_sd(A, B, C, R)            \
10580    (__m128d)__builtin_ia32_vfmaddsd3_round(A, -(B), C, R)
10581
10582#define _mm_fnmadd_round_ss(A, B, C, R)            \
10583   (__m128)__builtin_ia32_vfmaddss3_round(A, -(B), C, R)
10584
10585#define _mm_fnmsub_round_sd(A, B, C, R)            \
10586    (__m128d)__builtin_ia32_vfmaddsd3_round(A, -(B), -(C), R)
10587
10588#define _mm_fnmsub_round_ss(A, B, C, R)            \
10589    (__m128)__builtin_ia32_vfmaddss3_round(A, -(B), -(C), R)
10590#endif
10591
10592#ifdef __OPTIMIZE__
10593extern __inline int
10594__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10595_mm_comi_round_ss (__m128 __A, __m128 __B, const int __P, const int __R)
10596{
10597  return __builtin_ia32_vcomiss ((__v4sf) __A, (__v4sf) __B, __P, __R);
10598}
10599
10600extern __inline int
10601__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10602_mm_comi_round_sd (__m128d __A, __m128d __B, const int __P, const int __R)
10603{
10604  return __builtin_ia32_vcomisd ((__v2df) __A, (__v2df) __B, __P, __R);
10605}
10606#else
10607#define _mm_comi_round_ss(A, B, C, D)\
10608__builtin_ia32_vcomiss(A, B, C, D)
10609#define _mm_comi_round_sd(A, B, C, D)\
10610__builtin_ia32_vcomisd(A, B, C, D)
10611#endif
10612
10613extern __inline __m512d
10614__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10615_mm512_sqrt_pd (__m512d __A)
10616{
10617  return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
10618						  (__v8df)
10619						  _mm512_undefined_pd (),
10620						  (__mmask8) -1,
10621						  _MM_FROUND_CUR_DIRECTION);
10622}
10623
10624extern __inline __m512d
10625__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10626_mm512_mask_sqrt_pd (__m512d __W, __mmask8 __U, __m512d __A)
10627{
10628  return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
10629						  (__v8df) __W,
10630						  (__mmask8) __U,
10631						  _MM_FROUND_CUR_DIRECTION);
10632}
10633
10634extern __inline __m512d
10635__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10636_mm512_maskz_sqrt_pd (__mmask8 __U, __m512d __A)
10637{
10638  return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
10639						  (__v8df)
10640						  _mm512_setzero_pd (),
10641						  (__mmask8) __U,
10642						  _MM_FROUND_CUR_DIRECTION);
10643}
10644
10645extern __inline __m512
10646__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10647_mm512_sqrt_ps (__m512 __A)
10648{
10649  return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A,
10650						 (__v16sf)
10651						 _mm512_undefined_ps (),
10652						 (__mmask16) -1,
10653						 _MM_FROUND_CUR_DIRECTION);
10654}
10655
10656extern __inline __m512
10657__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10658_mm512_mask_sqrt_ps (__m512 __W, __mmask16 __U, __m512 __A)
10659{
10660  return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A,
10661						 (__v16sf) __W,
10662						 (__mmask16) __U,
10663						 _MM_FROUND_CUR_DIRECTION);
10664}
10665
10666extern __inline __m512
10667__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10668_mm512_maskz_sqrt_ps (__mmask16 __U, __m512 __A)
10669{
10670  return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A,
10671						 (__v16sf)
10672						 _mm512_setzero_ps (),
10673						 (__mmask16) __U,
10674						 _MM_FROUND_CUR_DIRECTION);
10675}
10676
10677extern __inline __m512d
10678__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10679_mm512_add_pd (__m512d __A, __m512d __B)
10680{
10681  return (__m512d) ((__v8df)__A + (__v8df)__B);
10682}
10683
10684extern __inline __m512d
10685__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10686_mm512_mask_add_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
10687{
10688  return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A,
10689						 (__v8df) __B,
10690						 (__v8df) __W,
10691						 (__mmask8) __U,
10692						 _MM_FROUND_CUR_DIRECTION);
10693}
10694
10695extern __inline __m512d
10696__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10697_mm512_maskz_add_pd (__mmask8 __U, __m512d __A, __m512d __B)
10698{
10699  return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A,
10700						 (__v8df) __B,
10701						 (__v8df)
10702						 _mm512_setzero_pd (),
10703						 (__mmask8) __U,
10704						 _MM_FROUND_CUR_DIRECTION);
10705}
10706
10707extern __inline __m512
10708__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10709_mm512_add_ps (__m512 __A, __m512 __B)
10710{
10711  return (__m512) ((__v16sf)__A + (__v16sf)__B);
10712}
10713
10714extern __inline __m512
10715__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10716_mm512_mask_add_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
10717{
10718  return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A,
10719						(__v16sf) __B,
10720						(__v16sf) __W,
10721						(__mmask16) __U,
10722						_MM_FROUND_CUR_DIRECTION);
10723}
10724
10725extern __inline __m512
10726__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10727_mm512_maskz_add_ps (__mmask16 __U, __m512 __A, __m512 __B)
10728{
10729  return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A,
10730						(__v16sf) __B,
10731						(__v16sf)
10732						_mm512_setzero_ps (),
10733						(__mmask16) __U,
10734						_MM_FROUND_CUR_DIRECTION);
10735}
10736
10737extern __inline __m512d
10738__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10739_mm512_sub_pd (__m512d __A, __m512d __B)
10740{
10741  return (__m512d) ((__v8df)__A - (__v8df)__B);
10742}
10743
10744extern __inline __m512d
10745__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10746_mm512_mask_sub_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
10747{
10748  return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A,
10749						 (__v8df) __B,
10750						 (__v8df) __W,
10751						 (__mmask8) __U,
10752						 _MM_FROUND_CUR_DIRECTION);
10753}
10754
10755extern __inline __m512d
10756__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10757_mm512_maskz_sub_pd (__mmask8 __U, __m512d __A, __m512d __B)
10758{
10759  return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A,
10760						 (__v8df) __B,
10761						 (__v8df)
10762						 _mm512_setzero_pd (),
10763						 (__mmask8) __U,
10764						 _MM_FROUND_CUR_DIRECTION);
10765}
10766
10767extern __inline __m512
10768__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10769_mm512_sub_ps (__m512 __A, __m512 __B)
10770{
10771  return (__m512) ((__v16sf)__A - (__v16sf)__B);
10772}
10773
10774extern __inline __m512
10775__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10776_mm512_mask_sub_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
10777{
10778  return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A,
10779						(__v16sf) __B,
10780						(__v16sf) __W,
10781						(__mmask16) __U,
10782						_MM_FROUND_CUR_DIRECTION);
10783}
10784
10785extern __inline __m512
10786__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10787_mm512_maskz_sub_ps (__mmask16 __U, __m512 __A, __m512 __B)
10788{
10789  return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A,
10790						(__v16sf) __B,
10791						(__v16sf)
10792						_mm512_setzero_ps (),
10793						(__mmask16) __U,
10794						_MM_FROUND_CUR_DIRECTION);
10795}
10796
10797extern __inline __m512d
10798__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10799_mm512_mul_pd (__m512d __A, __m512d __B)
10800{
10801  return (__m512d) ((__v8df)__A * (__v8df)__B);
10802}
10803
10804extern __inline __m512d
10805__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10806_mm512_mask_mul_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
10807{
10808  return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A,
10809						 (__v8df) __B,
10810						 (__v8df) __W,
10811						 (__mmask8) __U,
10812						 _MM_FROUND_CUR_DIRECTION);
10813}
10814
10815extern __inline __m512d
10816__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10817_mm512_maskz_mul_pd (__mmask8 __U, __m512d __A, __m512d __B)
10818{
10819  return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A,
10820						 (__v8df) __B,
10821						 (__v8df)
10822						 _mm512_setzero_pd (),
10823						 (__mmask8) __U,
10824						 _MM_FROUND_CUR_DIRECTION);
10825}
10826
10827extern __inline __m512
10828__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10829_mm512_mul_ps (__m512 __A, __m512 __B)
10830{
10831  return (__m512) ((__v16sf)__A * (__v16sf)__B);
10832}
10833
10834extern __inline __m512
10835__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10836_mm512_mask_mul_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
10837{
10838  return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A,
10839						(__v16sf) __B,
10840						(__v16sf) __W,
10841						(__mmask16) __U,
10842						_MM_FROUND_CUR_DIRECTION);
10843}
10844
10845extern __inline __m512
10846__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10847_mm512_maskz_mul_ps (__mmask16 __U, __m512 __A, __m512 __B)
10848{
10849  return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A,
10850						(__v16sf) __B,
10851						(__v16sf)
10852						_mm512_setzero_ps (),
10853						(__mmask16) __U,
10854						_MM_FROUND_CUR_DIRECTION);
10855}
10856
10857extern __inline __m512d
10858__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10859_mm512_div_pd (__m512d __M, __m512d __V)
10860{
10861  return (__m512d) ((__v8df)__M / (__v8df)__V);
10862}
10863
10864extern __inline __m512d
10865__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10866_mm512_mask_div_pd (__m512d __W, __mmask8 __U, __m512d __M, __m512d __V)
10867{
10868  return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __M,
10869						 (__v8df) __V,
10870						 (__v8df) __W,
10871						 (__mmask8) __U,
10872						 _MM_FROUND_CUR_DIRECTION);
10873}
10874
10875extern __inline __m512d
10876__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10877_mm512_maskz_div_pd (__mmask8 __U, __m512d __M, __m512d __V)
10878{
10879  return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __M,
10880						 (__v8df) __V,
10881						 (__v8df)
10882						 _mm512_setzero_pd (),
10883						 (__mmask8) __U,
10884						 _MM_FROUND_CUR_DIRECTION);
10885}
10886
10887extern __inline __m512
10888__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10889_mm512_div_ps (__m512 __A, __m512 __B)
10890{
10891  return (__m512) ((__v16sf)__A / (__v16sf)__B);
10892}
10893
10894extern __inline __m512
10895__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10896_mm512_mask_div_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
10897{
10898  return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A,
10899						(__v16sf) __B,
10900						(__v16sf) __W,
10901						(__mmask16) __U,
10902						_MM_FROUND_CUR_DIRECTION);
10903}
10904
10905extern __inline __m512
10906__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10907_mm512_maskz_div_ps (__mmask16 __U, __m512 __A, __m512 __B)
10908{
10909  return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A,
10910						(__v16sf) __B,
10911						(__v16sf)
10912						_mm512_setzero_ps (),
10913						(__mmask16) __U,
10914						_MM_FROUND_CUR_DIRECTION);
10915}
10916
10917extern __inline __m512d
10918__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10919_mm512_max_pd (__m512d __A, __m512d __B)
10920{
10921  return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
10922						 (__v8df) __B,
10923						 (__v8df)
10924						 _mm512_undefined_pd (),
10925						 (__mmask8) -1,
10926						 _MM_FROUND_CUR_DIRECTION);
10927}
10928
10929extern __inline __m512d
10930__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10931_mm512_mask_max_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
10932{
10933  return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
10934						 (__v8df) __B,
10935						 (__v8df) __W,
10936						 (__mmask8) __U,
10937						 _MM_FROUND_CUR_DIRECTION);
10938}
10939
10940extern __inline __m512d
10941__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10942_mm512_maskz_max_pd (__mmask8 __U, __m512d __A, __m512d __B)
10943{
10944  return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
10945						 (__v8df) __B,
10946						 (__v8df)
10947						 _mm512_setzero_pd (),
10948						 (__mmask8) __U,
10949						 _MM_FROUND_CUR_DIRECTION);
10950}
10951
10952extern __inline __m512
10953__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10954_mm512_max_ps (__m512 __A, __m512 __B)
10955{
10956  return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
10957						(__v16sf) __B,
10958						(__v16sf)
10959						_mm512_undefined_ps (),
10960						(__mmask16) -1,
10961						_MM_FROUND_CUR_DIRECTION);
10962}
10963
10964extern __inline __m512
10965__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10966_mm512_mask_max_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
10967{
10968  return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
10969						(__v16sf) __B,
10970						(__v16sf) __W,
10971						(__mmask16) __U,
10972						_MM_FROUND_CUR_DIRECTION);
10973}
10974
10975extern __inline __m512
10976__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10977_mm512_maskz_max_ps (__mmask16 __U, __m512 __A, __m512 __B)
10978{
10979  return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
10980						(__v16sf) __B,
10981						(__v16sf)
10982						_mm512_setzero_ps (),
10983						(__mmask16) __U,
10984						_MM_FROUND_CUR_DIRECTION);
10985}
10986
10987extern __inline __m512d
10988__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10989_mm512_min_pd (__m512d __A, __m512d __B)
10990{
10991  return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
10992						 (__v8df) __B,
10993						 (__v8df)
10994						 _mm512_undefined_pd (),
10995						 (__mmask8) -1,
10996						 _MM_FROUND_CUR_DIRECTION);
10997}
10998
10999extern __inline __m512d
11000__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11001_mm512_mask_min_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
11002{
11003  return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
11004						 (__v8df) __B,
11005						 (__v8df) __W,
11006						 (__mmask8) __U,
11007						 _MM_FROUND_CUR_DIRECTION);
11008}
11009
11010extern __inline __m512d
11011__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11012_mm512_maskz_min_pd (__mmask8 __U, __m512d __A, __m512d __B)
11013{
11014  return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
11015						 (__v8df) __B,
11016						 (__v8df)
11017						 _mm512_setzero_pd (),
11018						 (__mmask8) __U,
11019						 _MM_FROUND_CUR_DIRECTION);
11020}
11021
11022extern __inline __m512
11023__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11024_mm512_min_ps (__m512 __A, __m512 __B)
11025{
11026  return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
11027						(__v16sf) __B,
11028						(__v16sf)
11029						_mm512_undefined_ps (),
11030						(__mmask16) -1,
11031						_MM_FROUND_CUR_DIRECTION);
11032}
11033
11034extern __inline __m512
11035__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11036_mm512_mask_min_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
11037{
11038  return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
11039						(__v16sf) __B,
11040						(__v16sf) __W,
11041						(__mmask16) __U,
11042						_MM_FROUND_CUR_DIRECTION);
11043}
11044
11045extern __inline __m512
11046__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11047_mm512_maskz_min_ps (__mmask16 __U, __m512 __A, __m512 __B)
11048{
11049  return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
11050						(__v16sf) __B,
11051						(__v16sf)
11052						_mm512_setzero_ps (),
11053						(__mmask16) __U,
11054						_MM_FROUND_CUR_DIRECTION);
11055}
11056
11057extern __inline __m512d
11058__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11059_mm512_scalef_pd (__m512d __A, __m512d __B)
11060{
11061  return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
11062						    (__v8df) __B,
11063						    (__v8df)
11064						    _mm512_undefined_pd (),
11065						    (__mmask8) -1,
11066						    _MM_FROUND_CUR_DIRECTION);
11067}
11068
11069extern __inline __m512d
11070__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11071_mm512_mask_scalef_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
11072{
11073  return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
11074						    (__v8df) __B,
11075						    (__v8df) __W,
11076						    (__mmask8) __U,
11077						    _MM_FROUND_CUR_DIRECTION);
11078}
11079
11080extern __inline __m512d
11081__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11082_mm512_maskz_scalef_pd (__mmask8 __U, __m512d __A, __m512d __B)
11083{
11084  return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
11085						    (__v8df) __B,
11086						    (__v8df)
11087						    _mm512_setzero_pd (),
11088						    (__mmask8) __U,
11089						    _MM_FROUND_CUR_DIRECTION);
11090}
11091
11092extern __inline __m512
11093__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11094_mm512_scalef_ps (__m512 __A, __m512 __B)
11095{
11096  return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
11097						   (__v16sf) __B,
11098						   (__v16sf)
11099						   _mm512_undefined_ps (),
11100						   (__mmask16) -1,
11101						   _MM_FROUND_CUR_DIRECTION);
11102}
11103
11104extern __inline __m512
11105__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11106_mm512_mask_scalef_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
11107{
11108  return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
11109						   (__v16sf) __B,
11110						   (__v16sf) __W,
11111						   (__mmask16) __U,
11112						   _MM_FROUND_CUR_DIRECTION);
11113}
11114
11115extern __inline __m512
11116__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11117_mm512_maskz_scalef_ps (__mmask16 __U, __m512 __A, __m512 __B)
11118{
11119  return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
11120						   (__v16sf) __B,
11121						   (__v16sf)
11122						   _mm512_setzero_ps (),
11123						   (__mmask16) __U,
11124						   _MM_FROUND_CUR_DIRECTION);
11125}
11126
11127extern __inline __m128d
11128__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11129_mm_scalef_sd (__m128d __A, __m128d __B)
11130{
11131  return (__m128d) __builtin_ia32_scalefsd_round ((__v2df) __A,
11132						  (__v2df) __B,
11133						  _MM_FROUND_CUR_DIRECTION);
11134}
11135
11136extern __inline __m128
11137__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11138_mm_scalef_ss (__m128 __A, __m128 __B)
11139{
11140  return (__m128) __builtin_ia32_scalefss_round ((__v4sf) __A,
11141						 (__v4sf) __B,
11142						 _MM_FROUND_CUR_DIRECTION);
11143}
11144
11145extern __inline __m512d
11146__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11147_mm512_fmadd_pd (__m512d __A, __m512d __B, __m512d __C)
11148{
11149  return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
11150						    (__v8df) __B,
11151						    (__v8df) __C,
11152						    (__mmask8) -1,
11153						    _MM_FROUND_CUR_DIRECTION);
11154}
11155
11156extern __inline __m512d
11157__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11158_mm512_mask_fmadd_pd (__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
11159{
11160  return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
11161						    (__v8df) __B,
11162						    (__v8df) __C,
11163						    (__mmask8) __U,
11164						    _MM_FROUND_CUR_DIRECTION);
11165}
11166
11167extern __inline __m512d
11168__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11169_mm512_mask3_fmadd_pd (__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
11170{
11171  return (__m512d) __builtin_ia32_vfmaddpd512_mask3 ((__v8df) __A,
11172						     (__v8df) __B,
11173						     (__v8df) __C,
11174						     (__mmask8) __U,
11175						     _MM_FROUND_CUR_DIRECTION);
11176}
11177
11178extern __inline __m512d
11179__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11180_mm512_maskz_fmadd_pd (__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
11181{
11182  return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A,
11183						     (__v8df) __B,
11184						     (__v8df) __C,
11185						     (__mmask8) __U,
11186						     _MM_FROUND_CUR_DIRECTION);
11187}
11188
11189extern __inline __m512
11190__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11191_mm512_fmadd_ps (__m512 __A, __m512 __B, __m512 __C)
11192{
11193  return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
11194						   (__v16sf) __B,
11195						   (__v16sf) __C,
11196						   (__mmask16) -1,
11197						   _MM_FROUND_CUR_DIRECTION);
11198}
11199
11200extern __inline __m512
11201__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11202_mm512_mask_fmadd_ps (__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
11203{
11204  return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
11205						   (__v16sf) __B,
11206						   (__v16sf) __C,
11207						   (__mmask16) __U,
11208						   _MM_FROUND_CUR_DIRECTION);
11209}
11210
11211extern __inline __m512
11212__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11213_mm512_mask3_fmadd_ps (__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
11214{
11215  return (__m512) __builtin_ia32_vfmaddps512_mask3 ((__v16sf) __A,
11216						    (__v16sf) __B,
11217						    (__v16sf) __C,
11218						    (__mmask16) __U,
11219						    _MM_FROUND_CUR_DIRECTION);
11220}
11221
11222extern __inline __m512
11223__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11224_mm512_maskz_fmadd_ps (__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
11225{
11226  return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A,
11227						    (__v16sf) __B,
11228						    (__v16sf) __C,
11229						    (__mmask16) __U,
11230						    _MM_FROUND_CUR_DIRECTION);
11231}
11232
11233extern __inline __m512d
11234__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11235_mm512_fmsub_pd (__m512d __A, __m512d __B, __m512d __C)
11236{
11237  return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
11238						    (__v8df) __B,
11239						    -(__v8df) __C,
11240						    (__mmask8) -1,
11241						    _MM_FROUND_CUR_DIRECTION);
11242}
11243
11244extern __inline __m512d
11245__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11246_mm512_mask_fmsub_pd (__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
11247{
11248  return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
11249						    (__v8df) __B,
11250						    -(__v8df) __C,
11251						    (__mmask8) __U,
11252						    _MM_FROUND_CUR_DIRECTION);
11253}
11254
11255extern __inline __m512d
11256__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11257_mm512_mask3_fmsub_pd (__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
11258{
11259  return (__m512d) __builtin_ia32_vfmsubpd512_mask3 ((__v8df) __A,
11260						     (__v8df) __B,
11261						     (__v8df) __C,
11262						     (__mmask8) __U,
11263						     _MM_FROUND_CUR_DIRECTION);
11264}
11265
11266extern __inline __m512d
11267__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11268_mm512_maskz_fmsub_pd (__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
11269{
11270  return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A,
11271						     (__v8df) __B,
11272						     -(__v8df) __C,
11273						     (__mmask8) __U,
11274						     _MM_FROUND_CUR_DIRECTION);
11275}
11276
11277extern __inline __m512
11278__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11279_mm512_fmsub_ps (__m512 __A, __m512 __B, __m512 __C)
11280{
11281  return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
11282						   (__v16sf) __B,
11283						   -(__v16sf) __C,
11284						   (__mmask16) -1,
11285						   _MM_FROUND_CUR_DIRECTION);
11286}
11287
11288extern __inline __m512
11289__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11290_mm512_mask_fmsub_ps (__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
11291{
11292  return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
11293						   (__v16sf) __B,
11294						   -(__v16sf) __C,
11295						   (__mmask16) __U,
11296						   _MM_FROUND_CUR_DIRECTION);
11297}
11298
11299extern __inline __m512
11300__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11301_mm512_mask3_fmsub_ps (__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
11302{
11303  return (__m512) __builtin_ia32_vfmsubps512_mask3 ((__v16sf) __A,
11304						    (__v16sf) __B,
11305						    (__v16sf) __C,
11306						    (__mmask16) __U,
11307						    _MM_FROUND_CUR_DIRECTION);
11308}
11309
11310extern __inline __m512
11311__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11312_mm512_maskz_fmsub_ps (__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
11313{
11314  return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A,
11315						    (__v16sf) __B,
11316						    -(__v16sf) __C,
11317						    (__mmask16) __U,
11318						    _MM_FROUND_CUR_DIRECTION);
11319}
11320
11321extern __inline __m512d
11322__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11323_mm512_fmaddsub_pd (__m512d __A, __m512d __B, __m512d __C)
11324{
11325  return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
11326						       (__v8df) __B,
11327						       (__v8df) __C,
11328						       (__mmask8) -1,
11329						       _MM_FROUND_CUR_DIRECTION);
11330}
11331
11332extern __inline __m512d
11333__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11334_mm512_mask_fmaddsub_pd (__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
11335{
11336  return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
11337						       (__v8df) __B,
11338						       (__v8df) __C,
11339						       (__mmask8) __U,
11340						       _MM_FROUND_CUR_DIRECTION);
11341}
11342
11343extern __inline __m512d
11344__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11345_mm512_mask3_fmaddsub_pd (__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
11346{
11347  return (__m512d) __builtin_ia32_vfmaddsubpd512_mask3 ((__v8df) __A,
11348							(__v8df) __B,
11349							(__v8df) __C,
11350							(__mmask8) __U,
11351							_MM_FROUND_CUR_DIRECTION);
11352}
11353
11354extern __inline __m512d
11355__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11356_mm512_maskz_fmaddsub_pd (__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
11357{
11358  return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
11359							(__v8df) __B,
11360							(__v8df) __C,
11361							(__mmask8) __U,
11362							_MM_FROUND_CUR_DIRECTION);
11363}
11364
11365extern __inline __m512
11366__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11367_mm512_fmaddsub_ps (__m512 __A, __m512 __B, __m512 __C)
11368{
11369  return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
11370						      (__v16sf) __B,
11371						      (__v16sf) __C,
11372						      (__mmask16) -1,
11373						      _MM_FROUND_CUR_DIRECTION);
11374}
11375
11376extern __inline __m512
11377__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11378_mm512_mask_fmaddsub_ps (__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
11379{
11380  return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
11381						      (__v16sf) __B,
11382						      (__v16sf) __C,
11383						      (__mmask16) __U,
11384						      _MM_FROUND_CUR_DIRECTION);
11385}
11386
11387extern __inline __m512
11388__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11389_mm512_mask3_fmaddsub_ps (__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
11390{
11391  return (__m512) __builtin_ia32_vfmaddsubps512_mask3 ((__v16sf) __A,
11392						       (__v16sf) __B,
11393						       (__v16sf) __C,
11394						       (__mmask16) __U,
11395						       _MM_FROUND_CUR_DIRECTION);
11396}
11397
11398extern __inline __m512
11399__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11400_mm512_maskz_fmaddsub_ps (__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
11401{
11402  return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
11403						       (__v16sf) __B,
11404						       (__v16sf) __C,
11405						       (__mmask16) __U,
11406						       _MM_FROUND_CUR_DIRECTION);
11407}
11408
11409extern __inline __m512d
11410__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11411_mm512_fmsubadd_pd (__m512d __A, __m512d __B, __m512d __C)
11412{
11413  return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
11414						       (__v8df) __B,
11415						       -(__v8df) __C,
11416						       (__mmask8) -1,
11417						       _MM_FROUND_CUR_DIRECTION);
11418}
11419
11420extern __inline __m512d
11421__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11422_mm512_mask_fmsubadd_pd (__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
11423{
11424  return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
11425						       (__v8df) __B,
11426						       -(__v8df) __C,
11427						       (__mmask8) __U,
11428						       _MM_FROUND_CUR_DIRECTION);
11429}
11430
11431extern __inline __m512d
11432__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11433_mm512_mask3_fmsubadd_pd (__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
11434{
11435  return (__m512d) __builtin_ia32_vfmsubaddpd512_mask3 ((__v8df) __A,
11436							(__v8df) __B,
11437							(__v8df) __C,
11438							(__mmask8) __U,
11439							_MM_FROUND_CUR_DIRECTION);
11440}
11441
11442extern __inline __m512d
11443__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11444_mm512_maskz_fmsubadd_pd (__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
11445{
11446  return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
11447							(__v8df) __B,
11448							-(__v8df) __C,
11449							(__mmask8) __U,
11450							_MM_FROUND_CUR_DIRECTION);
11451}
11452
11453extern __inline __m512
11454__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11455_mm512_fmsubadd_ps (__m512 __A, __m512 __B, __m512 __C)
11456{
11457  return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
11458						      (__v16sf) __B,
11459						      -(__v16sf) __C,
11460						      (__mmask16) -1,
11461						      _MM_FROUND_CUR_DIRECTION);
11462}
11463
11464extern __inline __m512
11465__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11466_mm512_mask_fmsubadd_ps (__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
11467{
11468  return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
11469						      (__v16sf) __B,
11470						      -(__v16sf) __C,
11471						      (__mmask16) __U,
11472						      _MM_FROUND_CUR_DIRECTION);
11473}
11474
11475extern __inline __m512
11476__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11477_mm512_mask3_fmsubadd_ps (__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
11478{
11479  return (__m512) __builtin_ia32_vfmsubaddps512_mask3 ((__v16sf) __A,
11480						       (__v16sf) __B,
11481						       (__v16sf) __C,
11482						       (__mmask16) __U,
11483						       _MM_FROUND_CUR_DIRECTION);
11484}
11485
11486extern __inline __m512
11487__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11488_mm512_maskz_fmsubadd_ps (__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
11489{
11490  return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
11491						       (__v16sf) __B,
11492						       -(__v16sf) __C,
11493						       (__mmask16) __U,
11494						       _MM_FROUND_CUR_DIRECTION);
11495}
11496
11497extern __inline __m512d
11498__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11499_mm512_fnmadd_pd (__m512d __A, __m512d __B, __m512d __C)
11500{
11501  return (__m512d) __builtin_ia32_vfmaddpd512_mask (-(__v8df) __A,
11502						    (__v8df) __B,
11503						    (__v8df) __C,
11504						    (__mmask8) -1,
11505						    _MM_FROUND_CUR_DIRECTION);
11506}
11507
11508extern __inline __m512d
11509__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11510_mm512_mask_fnmadd_pd (__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
11511{
11512  return (__m512d) __builtin_ia32_vfnmaddpd512_mask ((__v8df) __A,
11513						     (__v8df) __B,
11514						     (__v8df) __C,
11515						     (__mmask8) __U,
11516						     _MM_FROUND_CUR_DIRECTION);
11517}
11518
11519extern __inline __m512d
11520__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11521_mm512_mask3_fnmadd_pd (__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
11522{
11523  return (__m512d) __builtin_ia32_vfmaddpd512_mask3 (-(__v8df) __A,
11524						     (__v8df) __B,
11525						     (__v8df) __C,
11526						     (__mmask8) __U,
11527						     _MM_FROUND_CUR_DIRECTION);
11528}
11529
11530extern __inline __m512d
11531__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11532_mm512_maskz_fnmadd_pd (__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
11533{
11534  return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A,
11535						     (__v8df) __B,
11536						     (__v8df) __C,
11537						     (__mmask8) __U,
11538						     _MM_FROUND_CUR_DIRECTION);
11539}
11540
11541extern __inline __m512
11542__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11543_mm512_fnmadd_ps (__m512 __A, __m512 __B, __m512 __C)
11544{
11545  return (__m512) __builtin_ia32_vfmaddps512_mask (-(__v16sf) __A,
11546						   (__v16sf) __B,
11547						   (__v16sf) __C,
11548						   (__mmask16) -1,
11549						   _MM_FROUND_CUR_DIRECTION);
11550}
11551
11552extern __inline __m512
11553__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11554_mm512_mask_fnmadd_ps (__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
11555{
11556  return (__m512) __builtin_ia32_vfnmaddps512_mask ((__v16sf) __A,
11557						    (__v16sf) __B,
11558						    (__v16sf) __C,
11559						    (__mmask16) __U,
11560						    _MM_FROUND_CUR_DIRECTION);
11561}
11562
11563extern __inline __m512
11564__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11565_mm512_mask3_fnmadd_ps (__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
11566{
11567  return (__m512) __builtin_ia32_vfmaddps512_mask3 (-(__v16sf) __A,
11568						    (__v16sf) __B,
11569						    (__v16sf) __C,
11570						    (__mmask16) __U,
11571						    _MM_FROUND_CUR_DIRECTION);
11572}
11573
11574extern __inline __m512
11575__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11576_mm512_maskz_fnmadd_ps (__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
11577{
11578  return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A,
11579						    (__v16sf) __B,
11580						    (__v16sf) __C,
11581						    (__mmask16) __U,
11582						    _MM_FROUND_CUR_DIRECTION);
11583}
11584
11585extern __inline __m512d
11586__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11587_mm512_fnmsub_pd (__m512d __A, __m512d __B, __m512d __C)
11588{
11589  return (__m512d) __builtin_ia32_vfmaddpd512_mask (-(__v8df) __A,
11590						    (__v8df) __B,
11591						    -(__v8df) __C,
11592						    (__mmask8) -1,
11593						    _MM_FROUND_CUR_DIRECTION);
11594}
11595
11596extern __inline __m512d
11597__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11598_mm512_mask_fnmsub_pd (__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
11599{
11600  return (__m512d) __builtin_ia32_vfnmsubpd512_mask ((__v8df) __A,
11601						     (__v8df) __B,
11602						     (__v8df) __C,
11603						     (__mmask8) __U,
11604						     _MM_FROUND_CUR_DIRECTION);
11605}
11606
11607extern __inline __m512d
11608__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11609_mm512_mask3_fnmsub_pd (__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
11610{
11611  return (__m512d) __builtin_ia32_vfnmsubpd512_mask3 ((__v8df) __A,
11612						      (__v8df) __B,
11613						      (__v8df) __C,
11614						      (__mmask8) __U,
11615						      _MM_FROUND_CUR_DIRECTION);
11616}
11617
11618extern __inline __m512d
11619__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11620_mm512_maskz_fnmsub_pd (__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
11621{
11622  return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A,
11623						     (__v8df) __B,
11624						     -(__v8df) __C,
11625						     (__mmask8) __U,
11626						     _MM_FROUND_CUR_DIRECTION);
11627}
11628
11629extern __inline __m512
11630__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11631_mm512_fnmsub_ps (__m512 __A, __m512 __B, __m512 __C)
11632{
11633  return (__m512) __builtin_ia32_vfmaddps512_mask (-(__v16sf) __A,
11634						   (__v16sf) __B,
11635						   -(__v16sf) __C,
11636						   (__mmask16) -1,
11637						   _MM_FROUND_CUR_DIRECTION);
11638}
11639
11640extern __inline __m512
11641__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11642_mm512_mask_fnmsub_ps (__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
11643{
11644  return (__m512) __builtin_ia32_vfnmsubps512_mask ((__v16sf) __A,
11645						    (__v16sf) __B,
11646						    (__v16sf) __C,
11647						    (__mmask16) __U,
11648						    _MM_FROUND_CUR_DIRECTION);
11649}
11650
11651extern __inline __m512
11652__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11653_mm512_mask3_fnmsub_ps (__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
11654{
11655  return (__m512) __builtin_ia32_vfnmsubps512_mask3 ((__v16sf) __A,
11656						     (__v16sf) __B,
11657						     (__v16sf) __C,
11658						     (__mmask16) __U,
11659						     _MM_FROUND_CUR_DIRECTION);
11660}
11661
11662extern __inline __m512
11663__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11664_mm512_maskz_fnmsub_ps (__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
11665{
11666  return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A,
11667						    (__v16sf) __B,
11668						    -(__v16sf) __C,
11669						    (__mmask16) __U,
11670						    _MM_FROUND_CUR_DIRECTION);
11671}
11672
11673extern __inline __m256i
11674__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11675_mm512_cvttpd_epi32 (__m512d __A)
11676{
11677  return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
11678						     (__v8si)
11679						     _mm256_undefined_si256 (),
11680						     (__mmask8) -1,
11681						     _MM_FROUND_CUR_DIRECTION);
11682}
11683
11684extern __inline __m256i
11685__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11686_mm512_mask_cvttpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A)
11687{
11688  return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
11689						     (__v8si) __W,
11690						     (__mmask8) __U,
11691						     _MM_FROUND_CUR_DIRECTION);
11692}
11693
11694extern __inline __m256i
11695__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11696_mm512_maskz_cvttpd_epi32 (__mmask8 __U, __m512d __A)
11697{
11698  return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
11699						     (__v8si)
11700						     _mm256_setzero_si256 (),
11701						     (__mmask8) __U,
11702						     _MM_FROUND_CUR_DIRECTION);
11703}
11704
11705extern __inline __m256i
11706__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11707_mm512_cvttpd_epu32 (__m512d __A)
11708{
11709  return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
11710						      (__v8si)
11711						      _mm256_undefined_si256 (),
11712						      (__mmask8) -1,
11713						      _MM_FROUND_CUR_DIRECTION);
11714}
11715
11716extern __inline __m256i
11717__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11718_mm512_mask_cvttpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A)
11719{
11720  return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
11721						      (__v8si) __W,
11722						      (__mmask8) __U,
11723						      _MM_FROUND_CUR_DIRECTION);
11724}
11725
11726extern __inline __m256i
11727__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11728_mm512_maskz_cvttpd_epu32 (__mmask8 __U, __m512d __A)
11729{
11730  return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
11731						      (__v8si)
11732						      _mm256_setzero_si256 (),
11733						      (__mmask8) __U,
11734						      _MM_FROUND_CUR_DIRECTION);
11735}
11736
11737extern __inline __m256i
11738__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11739_mm512_cvtpd_epi32 (__m512d __A)
11740{
11741  return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
11742						    (__v8si)
11743						    _mm256_undefined_si256 (),
11744						    (__mmask8) -1,
11745						    _MM_FROUND_CUR_DIRECTION);
11746}
11747
11748extern __inline __m256i
11749__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11750_mm512_mask_cvtpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A)
11751{
11752  return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
11753						    (__v8si) __W,
11754						    (__mmask8) __U,
11755						    _MM_FROUND_CUR_DIRECTION);
11756}
11757
11758extern __inline __m256i
11759__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11760_mm512_maskz_cvtpd_epi32 (__mmask8 __U, __m512d __A)
11761{
11762  return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
11763						    (__v8si)
11764						    _mm256_setzero_si256 (),
11765						    (__mmask8) __U,
11766						    _MM_FROUND_CUR_DIRECTION);
11767}
11768
11769extern __inline __m256i
11770__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11771_mm512_cvtpd_epu32 (__m512d __A)
11772{
11773  return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
11774						     (__v8si)
11775						     _mm256_undefined_si256 (),
11776						     (__mmask8) -1,
11777						     _MM_FROUND_CUR_DIRECTION);
11778}
11779
11780extern __inline __m256i
11781__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11782_mm512_mask_cvtpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A)
11783{
11784  return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
11785						     (__v8si) __W,
11786						     (__mmask8) __U,
11787						     _MM_FROUND_CUR_DIRECTION);
11788}
11789
11790extern __inline __m256i
11791__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11792_mm512_maskz_cvtpd_epu32 (__mmask8 __U, __m512d __A)
11793{
11794  return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
11795						     (__v8si)
11796						     _mm256_setzero_si256 (),
11797						     (__mmask8) __U,
11798						     _MM_FROUND_CUR_DIRECTION);
11799}
11800
11801extern __inline __m512i
11802__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11803_mm512_cvttps_epi32 (__m512 __A)
11804{
11805  return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
11806						     (__v16si)
11807						     _mm512_undefined_si512 (),
11808						     (__mmask16) -1,
11809						     _MM_FROUND_CUR_DIRECTION);
11810}
11811
11812extern __inline __m512i
11813__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11814_mm512_mask_cvttps_epi32 (__m512i __W, __mmask16 __U, __m512 __A)
11815{
11816  return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
11817						     (__v16si) __W,
11818						     (__mmask16) __U,
11819						     _MM_FROUND_CUR_DIRECTION);
11820}
11821
11822extern __inline __m512i
11823__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11824_mm512_maskz_cvttps_epi32 (__mmask16 __U, __m512 __A)
11825{
11826  return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
11827						     (__v16si)
11828						     _mm512_setzero_si512 (),
11829						     (__mmask16) __U,
11830						     _MM_FROUND_CUR_DIRECTION);
11831}
11832
11833extern __inline __m512i
11834__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11835_mm512_cvttps_epu32 (__m512 __A)
11836{
11837  return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
11838						      (__v16si)
11839						      _mm512_undefined_si512 (),
11840						      (__mmask16) -1,
11841						      _MM_FROUND_CUR_DIRECTION);
11842}
11843
11844extern __inline __m512i
11845__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11846_mm512_mask_cvttps_epu32 (__m512i __W, __mmask16 __U, __m512 __A)
11847{
11848  return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
11849						      (__v16si) __W,
11850						      (__mmask16) __U,
11851						      _MM_FROUND_CUR_DIRECTION);
11852}
11853
11854extern __inline __m512i
11855__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11856_mm512_maskz_cvttps_epu32 (__mmask16 __U, __m512 __A)
11857{
11858  return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
11859						      (__v16si)
11860						      _mm512_setzero_si512 (),
11861						      (__mmask16) __U,
11862						      _MM_FROUND_CUR_DIRECTION);
11863}
11864
11865extern __inline __m512i
11866__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11867_mm512_cvtps_epi32 (__m512 __A)
11868{
11869  return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
11870						    (__v16si)
11871						    _mm512_undefined_si512 (),
11872						    (__mmask16) -1,
11873						    _MM_FROUND_CUR_DIRECTION);
11874}
11875
11876extern __inline __m512i
11877__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11878_mm512_mask_cvtps_epi32 (__m512i __W, __mmask16 __U, __m512 __A)
11879{
11880  return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
11881						    (__v16si) __W,
11882						    (__mmask16) __U,
11883						    _MM_FROUND_CUR_DIRECTION);
11884}
11885
11886extern __inline __m512i
11887__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11888_mm512_maskz_cvtps_epi32 (__mmask16 __U, __m512 __A)
11889{
11890  return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
11891						    (__v16si)
11892						    _mm512_setzero_si512 (),
11893						    (__mmask16) __U,
11894						    _MM_FROUND_CUR_DIRECTION);
11895}
11896
11897extern __inline __m512i
11898__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11899_mm512_cvtps_epu32 (__m512 __A)
11900{
11901  return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
11902						     (__v16si)
11903						     _mm512_undefined_si512 (),
11904						     (__mmask16) -1,
11905						     _MM_FROUND_CUR_DIRECTION);
11906}
11907
11908extern __inline __m512i
11909__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11910_mm512_mask_cvtps_epu32 (__m512i __W, __mmask16 __U, __m512 __A)
11911{
11912  return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
11913						     (__v16si) __W,
11914						     (__mmask16) __U,
11915						     _MM_FROUND_CUR_DIRECTION);
11916}
11917
11918extern __inline __m512i
11919__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11920_mm512_maskz_cvtps_epu32 (__mmask16 __U, __m512 __A)
11921{
11922  return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
11923						     (__v16si)
11924						     _mm512_setzero_si512 (),
11925						     (__mmask16) __U,
11926						     _MM_FROUND_CUR_DIRECTION);
11927}
11928
11929#ifdef __x86_64__
11930extern __inline __m128
11931__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11932_mm_cvtu64_ss (__m128 __A, unsigned long long __B)
11933{
11934  return (__m128) __builtin_ia32_cvtusi2ss64 ((__v4sf) __A, __B,
11935					      _MM_FROUND_CUR_DIRECTION);
11936}
11937
11938extern __inline __m128d
11939__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11940_mm_cvtu64_sd (__m128d __A, unsigned long long __B)
11941{
11942  return (__m128d) __builtin_ia32_cvtusi2sd64 ((__v2df) __A, __B,
11943					       _MM_FROUND_CUR_DIRECTION);
11944}
11945#endif
11946
11947extern __inline __m128
11948__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11949_mm_cvtu32_ss (__m128 __A, unsigned __B)
11950{
11951  return (__m128) __builtin_ia32_cvtusi2ss32 ((__v4sf) __A, __B,
11952					      _MM_FROUND_CUR_DIRECTION);
11953}
11954
11955extern __inline __m512
11956__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11957_mm512_cvtepi32_ps (__m512i __A)
11958{
11959  return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
11960						   (__v16sf)
11961						   _mm512_undefined_ps (),
11962						   (__mmask16) -1,
11963						   _MM_FROUND_CUR_DIRECTION);
11964}
11965
11966extern __inline __m512
11967__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11968_mm512_mask_cvtepi32_ps (__m512 __W, __mmask16 __U, __m512i __A)
11969{
11970  return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
11971						   (__v16sf) __W,
11972						   (__mmask16) __U,
11973						   _MM_FROUND_CUR_DIRECTION);
11974}
11975
11976extern __inline __m512
11977__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11978_mm512_maskz_cvtepi32_ps (__mmask16 __U, __m512i __A)
11979{
11980  return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
11981						   (__v16sf)
11982						   _mm512_setzero_ps (),
11983						   (__mmask16) __U,
11984						   _MM_FROUND_CUR_DIRECTION);
11985}
11986
11987extern __inline __m512
11988__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11989_mm512_cvtepu32_ps (__m512i __A)
11990{
11991  return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
11992						    (__v16sf)
11993						    _mm512_undefined_ps (),
11994						    (__mmask16) -1,
11995						    _MM_FROUND_CUR_DIRECTION);
11996}
11997
11998extern __inline __m512
11999__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12000_mm512_mask_cvtepu32_ps (__m512 __W, __mmask16 __U, __m512i __A)
12001{
12002  return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
12003						    (__v16sf) __W,
12004						    (__mmask16) __U,
12005						    _MM_FROUND_CUR_DIRECTION);
12006}
12007
12008extern __inline __m512
12009__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12010_mm512_maskz_cvtepu32_ps (__mmask16 __U, __m512i __A)
12011{
12012  return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
12013						    (__v16sf)
12014						    _mm512_setzero_ps (),
12015						    (__mmask16) __U,
12016						    _MM_FROUND_CUR_DIRECTION);
12017}
12018
12019#ifdef __OPTIMIZE__
12020extern __inline __m512d
12021__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12022_mm512_fixupimm_pd (__m512d __A, __m512d __B, __m512i __C, const int __imm)
12023{
12024  return (__m512d) __builtin_ia32_fixupimmpd512_mask ((__v8df) __A,
12025						      (__v8df) __B,
12026						      (__v8di) __C,
12027						      __imm,
12028						      (__mmask8) -1,
12029						      _MM_FROUND_CUR_DIRECTION);
12030}
12031
12032extern __inline __m512d
12033__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12034_mm512_mask_fixupimm_pd (__m512d __A, __mmask8 __U, __m512d __B,
12035			 __m512i __C, const int __imm)
12036{
12037  return (__m512d) __builtin_ia32_fixupimmpd512_mask ((__v8df) __A,
12038						      (__v8df) __B,
12039						      (__v8di) __C,
12040						      __imm,
12041						      (__mmask8) __U,
12042						      _MM_FROUND_CUR_DIRECTION);
12043}
12044
12045extern __inline __m512d
12046__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12047_mm512_maskz_fixupimm_pd (__mmask8 __U, __m512d __A, __m512d __B,
12048			  __m512i __C, const int __imm)
12049{
12050  return (__m512d) __builtin_ia32_fixupimmpd512_maskz ((__v8df) __A,
12051						       (__v8df) __B,
12052						       (__v8di) __C,
12053						       __imm,
12054						       (__mmask8) __U,
12055						       _MM_FROUND_CUR_DIRECTION);
12056}
12057
12058extern __inline __m512
12059__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12060_mm512_fixupimm_ps (__m512 __A, __m512 __B, __m512i __C, const int __imm)
12061{
12062  return (__m512) __builtin_ia32_fixupimmps512_mask ((__v16sf) __A,
12063						     (__v16sf) __B,
12064						     (__v16si) __C,
12065						     __imm,
12066						     (__mmask16) -1,
12067						     _MM_FROUND_CUR_DIRECTION);
12068}
12069
12070extern __inline __m512
12071__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12072_mm512_mask_fixupimm_ps (__m512 __A, __mmask16 __U, __m512 __B,
12073			 __m512i __C, const int __imm)
12074{
12075  return (__m512) __builtin_ia32_fixupimmps512_mask ((__v16sf) __A,
12076						     (__v16sf) __B,
12077						     (__v16si) __C,
12078						     __imm,
12079						     (__mmask16) __U,
12080						     _MM_FROUND_CUR_DIRECTION);
12081}
12082
12083extern __inline __m512
12084__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12085_mm512_maskz_fixupimm_ps (__mmask16 __U, __m512 __A, __m512 __B,
12086			  __m512i __C, const int __imm)
12087{
12088  return (__m512) __builtin_ia32_fixupimmps512_maskz ((__v16sf) __A,
12089						      (__v16sf) __B,
12090						      (__v16si) __C,
12091						      __imm,
12092						      (__mmask16) __U,
12093						      _MM_FROUND_CUR_DIRECTION);
12094}
12095
12096extern __inline __m128d
12097__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12098_mm_fixupimm_sd (__m128d __A, __m128d __B, __m128i __C, const int __imm)
12099{
12100  return (__m128d) __builtin_ia32_fixupimmsd_mask ((__v2df) __A,
12101						   (__v2df) __B,
12102						   (__v2di) __C, __imm,
12103						   (__mmask8) -1,
12104						   _MM_FROUND_CUR_DIRECTION);
12105}
12106
12107extern __inline __m128d
12108__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12109_mm_mask_fixupimm_sd (__m128d __A, __mmask8 __U, __m128d __B,
12110		      __m128i __C, const int __imm)
12111{
12112  return (__m128d) __builtin_ia32_fixupimmsd_mask ((__v2df) __A,
12113						   (__v2df) __B,
12114						   (__v2di) __C, __imm,
12115						   (__mmask8) __U,
12116						   _MM_FROUND_CUR_DIRECTION);
12117}
12118
12119extern __inline __m128d
12120__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12121_mm_maskz_fixupimm_sd (__mmask8 __U, __m128d __A, __m128d __B,
12122		       __m128i __C, const int __imm)
12123{
12124  return (__m128d) __builtin_ia32_fixupimmsd_maskz ((__v2df) __A,
12125						    (__v2df) __B,
12126						    (__v2di) __C,
12127						    __imm,
12128						    (__mmask8) __U,
12129						    _MM_FROUND_CUR_DIRECTION);
12130}
12131
12132extern __inline __m128
12133__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12134_mm_fixupimm_ss (__m128 __A, __m128 __B, __m128i __C, const int __imm)
12135{
12136  return (__m128) __builtin_ia32_fixupimmss_mask ((__v4sf) __A,
12137						  (__v4sf) __B,
12138						  (__v4si) __C, __imm,
12139						  (__mmask8) -1,
12140						  _MM_FROUND_CUR_DIRECTION);
12141}
12142
12143extern __inline __m128
12144__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12145_mm_mask_fixupimm_ss (__m128 __A, __mmask8 __U, __m128 __B,
12146		      __m128i __C, const int __imm)
12147{
12148  return (__m128) __builtin_ia32_fixupimmss_mask ((__v4sf) __A,
12149						  (__v4sf) __B,
12150						  (__v4si) __C, __imm,
12151						  (__mmask8) __U,
12152						  _MM_FROUND_CUR_DIRECTION);
12153}
12154
12155extern __inline __m128
12156__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12157_mm_maskz_fixupimm_ss (__mmask8 __U, __m128 __A, __m128 __B,
12158		       __m128i __C, const int __imm)
12159{
12160  return (__m128) __builtin_ia32_fixupimmss_maskz ((__v4sf) __A,
12161						   (__v4sf) __B,
12162						   (__v4si) __C, __imm,
12163						   (__mmask8) __U,
12164						   _MM_FROUND_CUR_DIRECTION);
12165}
12166#else
12167#define _mm512_fixupimm_pd(X, Y, Z, C)					\
12168  ((__m512d)__builtin_ia32_fixupimmpd512_mask ((__v8df)(__m512d)(X),	\
12169      (__v8df)(__m512d)(Y), (__v8di)(__m512i)(Z), (int)(C),		\
12170      (__mmask8)(-1), _MM_FROUND_CUR_DIRECTION))
12171
12172#define _mm512_mask_fixupimm_pd(X, U, Y, Z, C)                          \
12173  ((__m512d)__builtin_ia32_fixupimmpd512_mask ((__v8df)(__m512d)(X),    \
12174      (__v8df)(__m512d)(Y), (__v8di)(__m512i)(Z), (int)(C),             \
12175      (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
12176
12177#define _mm512_maskz_fixupimm_pd(U, X, Y, Z, C)                         \
12178  ((__m512d)__builtin_ia32_fixupimmpd512_maskz ((__v8df)(__m512d)(X),   \
12179      (__v8df)(__m512d)(Y), (__v8di)(__m512i)(Z), (int)(C),             \
12180      (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
12181
12182#define _mm512_fixupimm_ps(X, Y, Z, C)					\
12183  ((__m512)__builtin_ia32_fixupimmps512_mask ((__v16sf)(__m512)(X),	\
12184    (__v16sf)(__m512)(Y), (__v16si)(__m512i)(Z), (int)(C),		\
12185    (__mmask16)(-1), _MM_FROUND_CUR_DIRECTION))
12186
12187#define _mm512_mask_fixupimm_ps(X, U, Y, Z, C)                          \
12188  ((__m512)__builtin_ia32_fixupimmps512_mask ((__v16sf)(__m512)(X),     \
12189    (__v16sf)(__m512)(Y), (__v16si)(__m512i)(Z), (int)(C),              \
12190    (__mmask16)(U), _MM_FROUND_CUR_DIRECTION))
12191
12192#define _mm512_maskz_fixupimm_ps(U, X, Y, Z, C)                         \
12193  ((__m512)__builtin_ia32_fixupimmps512_maskz ((__v16sf)(__m512)(X),    \
12194    (__v16sf)(__m512)(Y), (__v16si)(__m512i)(Z), (int)(C),              \
12195    (__mmask16)(U), _MM_FROUND_CUR_DIRECTION))
12196
12197#define _mm_fixupimm_sd(X, Y, Z, C)					\
12198    ((__m128d)__builtin_ia32_fixupimmsd_mask ((__v2df)(__m128d)(X),	\
12199      (__v2df)(__m128d)(Y), (__v2di)(__m128i)(Z), (int)(C),		\
12200      (__mmask8)(-1), _MM_FROUND_CUR_DIRECTION))
12201
12202#define _mm_mask_fixupimm_sd(X, U, Y, Z, C)				\
12203    ((__m128d)__builtin_ia32_fixupimmsd_mask ((__v2df)(__m128d)(X),	\
12204      (__v2df)(__m128d)(Y), (__v2di)(__m128i)(Z), (int)(C),		\
12205      (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
12206
12207#define _mm_maskz_fixupimm_sd(U, X, Y, Z, C)				\
12208    ((__m128d)__builtin_ia32_fixupimmsd_maskz ((__v2df)(__m128d)(X),	\
12209      (__v2df)(__m128d)(Y), (__v2di)(__m128i)(Z), (int)(C),		\
12210      (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
12211
12212#define _mm_fixupimm_ss(X, Y, Z, C)					\
12213    ((__m128)__builtin_ia32_fixupimmss_mask ((__v4sf)(__m128)(X),	\
12214      (__v4sf)(__m128)(Y), (__v4si)(__m128i)(Z), (int)(C),		\
12215      (__mmask8)(-1), _MM_FROUND_CUR_DIRECTION))
12216
12217#define _mm_mask_fixupimm_ss(X, U, Y, Z, C)				\
12218    ((__m128)__builtin_ia32_fixupimmss_mask ((__v4sf)(__m128)(X),	\
12219      (__v4sf)(__m128)(Y), (__v4si)(__m128i)(Z), (int)(C),		\
12220      (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
12221
12222#define _mm_maskz_fixupimm_ss(U, X, Y, Z, C)				\
12223    ((__m128)__builtin_ia32_fixupimmss_maskz ((__v4sf)(__m128)(X),	\
12224      (__v4sf)(__m128)(Y), (__v4si)(__m128i)(Z), (int)(C),		\
12225      (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
12226#endif
12227
12228#ifdef __x86_64__
12229extern __inline unsigned long long
12230__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12231_mm_cvtss_u64 (__m128 __A)
12232{
12233  return (unsigned long long) __builtin_ia32_vcvtss2usi64 ((__v4sf)
12234							   __A,
12235							   _MM_FROUND_CUR_DIRECTION);
12236}
12237
12238extern __inline unsigned long long
12239__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12240_mm_cvttss_u64 (__m128 __A)
12241{
12242  return (unsigned long long) __builtin_ia32_vcvttss2usi64 ((__v4sf)
12243							    __A,
12244							    _MM_FROUND_CUR_DIRECTION);
12245}
12246
12247extern __inline long long
12248__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12249_mm_cvttss_i64 (__m128 __A)
12250{
12251  return (long long) __builtin_ia32_vcvttss2si64 ((__v4sf) __A,
12252						  _MM_FROUND_CUR_DIRECTION);
12253}
12254#endif /* __x86_64__ */
12255
12256extern __inline unsigned
12257__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12258_mm_cvtss_u32 (__m128 __A)
12259{
12260  return (unsigned) __builtin_ia32_vcvtss2usi32 ((__v4sf) __A,
12261						 _MM_FROUND_CUR_DIRECTION);
12262}
12263
12264extern __inline unsigned
12265__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12266_mm_cvttss_u32 (__m128 __A)
12267{
12268  return (unsigned) __builtin_ia32_vcvttss2usi32 ((__v4sf) __A,
12269						  _MM_FROUND_CUR_DIRECTION);
12270}
12271
12272extern __inline int
12273__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12274_mm_cvttss_i32 (__m128 __A)
12275{
12276  return (int) __builtin_ia32_vcvttss2si32 ((__v4sf) __A,
12277					    _MM_FROUND_CUR_DIRECTION);
12278}
12279
12280#ifdef __x86_64__
12281extern __inline unsigned long long
12282__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12283_mm_cvtsd_u64 (__m128d __A)
12284{
12285  return (unsigned long long) __builtin_ia32_vcvtsd2usi64 ((__v2df)
12286							   __A,
12287							   _MM_FROUND_CUR_DIRECTION);
12288}
12289
12290extern __inline unsigned long long
12291__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12292_mm_cvttsd_u64 (__m128d __A)
12293{
12294  return (unsigned long long) __builtin_ia32_vcvttsd2usi64 ((__v2df)
12295							    __A,
12296							    _MM_FROUND_CUR_DIRECTION);
12297}
12298
12299extern __inline long long
12300__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12301_mm_cvttsd_i64 (__m128d __A)
12302{
12303  return (long long) __builtin_ia32_vcvttsd2si64 ((__v2df) __A,
12304						  _MM_FROUND_CUR_DIRECTION);
12305}
12306#endif /* __x86_64__ */
12307
12308extern __inline unsigned
12309__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12310_mm_cvtsd_u32 (__m128d __A)
12311{
12312  return (unsigned) __builtin_ia32_vcvtsd2usi32 ((__v2df) __A,
12313						 _MM_FROUND_CUR_DIRECTION);
12314}
12315
12316extern __inline unsigned
12317__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12318_mm_cvttsd_u32 (__m128d __A)
12319{
12320  return (unsigned) __builtin_ia32_vcvttsd2usi32 ((__v2df) __A,
12321						  _MM_FROUND_CUR_DIRECTION);
12322}
12323
12324extern __inline int
12325__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12326_mm_cvttsd_i32 (__m128d __A)
12327{
12328  return (int) __builtin_ia32_vcvttsd2si32 ((__v2df) __A,
12329					    _MM_FROUND_CUR_DIRECTION);
12330}
12331
12332extern __inline __m512d
12333__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12334_mm512_cvtps_pd (__m256 __A)
12335{
12336  return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
12337						    (__v8df)
12338						    _mm512_undefined_pd (),
12339						    (__mmask8) -1,
12340						    _MM_FROUND_CUR_DIRECTION);
12341}
12342
12343extern __inline __m512d
12344__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12345_mm512_mask_cvtps_pd (__m512d __W, __mmask8 __U, __m256 __A)
12346{
12347  return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
12348						    (__v8df) __W,
12349						    (__mmask8) __U,
12350						    _MM_FROUND_CUR_DIRECTION);
12351}
12352
12353extern __inline __m512d
12354__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12355_mm512_maskz_cvtps_pd (__mmask8 __U, __m256 __A)
12356{
12357  return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
12358						    (__v8df)
12359						    _mm512_setzero_pd (),
12360						    (__mmask8) __U,
12361						    _MM_FROUND_CUR_DIRECTION);
12362}
12363
12364extern __inline __m512
12365__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12366_mm512_cvtph_ps (__m256i __A)
12367{
12368  return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
12369						    (__v16sf)
12370						    _mm512_undefined_ps (),
12371						    (__mmask16) -1,
12372						    _MM_FROUND_CUR_DIRECTION);
12373}
12374
12375extern __inline __m512
12376__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12377_mm512_mask_cvtph_ps (__m512 __W, __mmask16 __U, __m256i __A)
12378{
12379  return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
12380						    (__v16sf) __W,
12381						    (__mmask16) __U,
12382						    _MM_FROUND_CUR_DIRECTION);
12383}
12384
12385extern __inline __m512
12386__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12387_mm512_maskz_cvtph_ps (__mmask16 __U, __m256i __A)
12388{
12389  return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
12390						    (__v16sf)
12391						    _mm512_setzero_ps (),
12392						    (__mmask16) __U,
12393						    _MM_FROUND_CUR_DIRECTION);
12394}
12395
12396extern __inline __m256
12397__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12398_mm512_cvtpd_ps (__m512d __A)
12399{
12400  return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
12401						   (__v8sf)
12402						   _mm256_undefined_ps (),
12403						   (__mmask8) -1,
12404						   _MM_FROUND_CUR_DIRECTION);
12405}
12406
12407extern __inline __m256
12408__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12409_mm512_mask_cvtpd_ps (__m256 __W, __mmask8 __U, __m512d __A)
12410{
12411  return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
12412						   (__v8sf) __W,
12413						   (__mmask8) __U,
12414						   _MM_FROUND_CUR_DIRECTION);
12415}
12416
12417extern __inline __m256
12418__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12419_mm512_maskz_cvtpd_ps (__mmask8 __U, __m512d __A)
12420{
12421  return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
12422						   (__v8sf)
12423						   _mm256_setzero_ps (),
12424						   (__mmask8) __U,
12425						   _MM_FROUND_CUR_DIRECTION);
12426}
12427
12428#ifdef __OPTIMIZE__
12429extern __inline __m512
12430__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12431_mm512_getexp_ps (__m512 __A)
12432{
12433  return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
12434						   (__v16sf)
12435						   _mm512_undefined_ps (),
12436						   (__mmask16) -1,
12437						   _MM_FROUND_CUR_DIRECTION);
12438}
12439
12440extern __inline __m512
12441__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12442_mm512_mask_getexp_ps (__m512 __W, __mmask16 __U, __m512 __A)
12443{
12444  return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
12445						   (__v16sf) __W,
12446						   (__mmask16) __U,
12447						   _MM_FROUND_CUR_DIRECTION);
12448}
12449
12450extern __inline __m512
12451__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12452_mm512_maskz_getexp_ps (__mmask16 __U, __m512 __A)
12453{
12454  return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
12455						   (__v16sf)
12456						   _mm512_setzero_ps (),
12457						   (__mmask16) __U,
12458						   _MM_FROUND_CUR_DIRECTION);
12459}
12460
12461extern __inline __m512d
12462__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12463_mm512_getexp_pd (__m512d __A)
12464{
12465  return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
12466						    (__v8df)
12467						    _mm512_undefined_pd (),
12468						    (__mmask8) -1,
12469						    _MM_FROUND_CUR_DIRECTION);
12470}
12471
12472extern __inline __m512d
12473__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12474_mm512_mask_getexp_pd (__m512d __W, __mmask8 __U, __m512d __A)
12475{
12476  return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
12477						    (__v8df) __W,
12478						    (__mmask8) __U,
12479						    _MM_FROUND_CUR_DIRECTION);
12480}
12481
12482extern __inline __m512d
12483__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12484_mm512_maskz_getexp_pd (__mmask8 __U, __m512d __A)
12485{
12486  return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
12487						    (__v8df)
12488						    _mm512_setzero_pd (),
12489						    (__mmask8) __U,
12490						    _MM_FROUND_CUR_DIRECTION);
12491}
12492
12493extern __inline __m128
12494__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12495_mm_getexp_ss (__m128 __A, __m128 __B)
12496{
12497  return (__m128) __builtin_ia32_getexpss128_round ((__v4sf) __A,
12498						    (__v4sf) __B,
12499						    _MM_FROUND_CUR_DIRECTION);
12500}
12501
12502extern __inline __m128d
12503__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12504_mm_getexp_sd (__m128d __A, __m128d __B)
12505{
12506  return (__m128d) __builtin_ia32_getexpsd128_round ((__v2df) __A,
12507						     (__v2df) __B,
12508						     _MM_FROUND_CUR_DIRECTION);
12509}
12510
12511extern __inline __m512d
12512__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12513_mm512_getmant_pd (__m512d __A, _MM_MANTISSA_NORM_ENUM __B,
12514		   _MM_MANTISSA_SIGN_ENUM __C)
12515{
12516  return (__m512d) __builtin_ia32_getmantpd512_mask ((__v8df) __A,
12517						     (__C << 2) | __B,
12518						     _mm512_undefined_pd (),
12519						     (__mmask8) -1,
12520						     _MM_FROUND_CUR_DIRECTION);
12521}
12522
12523extern __inline __m512d
12524__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12525_mm512_mask_getmant_pd (__m512d __W, __mmask8 __U, __m512d __A,
12526			_MM_MANTISSA_NORM_ENUM __B, _MM_MANTISSA_SIGN_ENUM __C)
12527{
12528  return (__m512d) __builtin_ia32_getmantpd512_mask ((__v8df) __A,
12529						     (__C << 2) | __B,
12530						     (__v8df) __W, __U,
12531						     _MM_FROUND_CUR_DIRECTION);
12532}
12533
12534extern __inline __m512d
12535__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12536_mm512_maskz_getmant_pd (__mmask8 __U, __m512d __A,
12537			 _MM_MANTISSA_NORM_ENUM __B, _MM_MANTISSA_SIGN_ENUM __C)
12538{
12539  return (__m512d) __builtin_ia32_getmantpd512_mask ((__v8df) __A,
12540						     (__C << 2) | __B,
12541						     (__v8df)
12542						     _mm512_setzero_pd (),
12543						     __U,
12544						     _MM_FROUND_CUR_DIRECTION);
12545}
12546
12547extern __inline __m512
12548__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12549_mm512_getmant_ps (__m512 __A, _MM_MANTISSA_NORM_ENUM __B,
12550		   _MM_MANTISSA_SIGN_ENUM __C)
12551{
12552  return (__m512) __builtin_ia32_getmantps512_mask ((__v16sf) __A,
12553						    (__C << 2) | __B,
12554						    _mm512_undefined_ps (),
12555						    (__mmask16) -1,
12556						    _MM_FROUND_CUR_DIRECTION);
12557}
12558
12559extern __inline __m512
12560__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12561_mm512_mask_getmant_ps (__m512 __W, __mmask16 __U, __m512 __A,
12562			_MM_MANTISSA_NORM_ENUM __B, _MM_MANTISSA_SIGN_ENUM __C)
12563{
12564  return (__m512) __builtin_ia32_getmantps512_mask ((__v16sf) __A,
12565						    (__C << 2) | __B,
12566						    (__v16sf) __W, __U,
12567						    _MM_FROUND_CUR_DIRECTION);
12568}
12569
12570extern __inline __m512
12571__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12572_mm512_maskz_getmant_ps (__mmask16 __U, __m512 __A,
12573			 _MM_MANTISSA_NORM_ENUM __B, _MM_MANTISSA_SIGN_ENUM __C)
12574{
12575  return (__m512) __builtin_ia32_getmantps512_mask ((__v16sf) __A,
12576						    (__C << 2) | __B,
12577						    (__v16sf)
12578						    _mm512_setzero_ps (),
12579						    __U,
12580						    _MM_FROUND_CUR_DIRECTION);
12581}
12582
12583extern __inline __m128d
12584__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12585_mm_getmant_sd (__m128d __A, __m128d __B, _MM_MANTISSA_NORM_ENUM __C,
12586		_MM_MANTISSA_SIGN_ENUM __D)
12587{
12588  return (__m128d) __builtin_ia32_getmantsd_round ((__v2df) __A,
12589						   (__v2df) __B,
12590						   (__D << 2) | __C,
12591						   _MM_FROUND_CUR_DIRECTION);
12592}
12593
12594extern __inline __m128
12595__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12596_mm_getmant_ss (__m128 __A, __m128 __B, _MM_MANTISSA_NORM_ENUM __C,
12597		_MM_MANTISSA_SIGN_ENUM __D)
12598{
12599  return (__m128) __builtin_ia32_getmantss_round ((__v4sf) __A,
12600						  (__v4sf) __B,
12601						  (__D << 2) | __C,
12602						  _MM_FROUND_CUR_DIRECTION);
12603}
12604
12605#else
12606#define _mm512_getmant_pd(X, B, C)                                                  \
12607  ((__m512d)__builtin_ia32_getmantpd512_mask ((__v8df)(__m512d)(X),                 \
12608                                              (int)(((C)<<2) | (B)),                \
12609                                              (__v8df)_mm512_undefined_pd(),        \
12610                                              (__mmask8)-1,\
12611					      _MM_FROUND_CUR_DIRECTION))
12612
12613#define _mm512_mask_getmant_pd(W, U, X, B, C)                                       \
12614  ((__m512d)__builtin_ia32_getmantpd512_mask ((__v8df)(__m512d)(X),                 \
12615                                              (int)(((C)<<2) | (B)),                \
12616                                              (__v8df)(__m512d)(W),                 \
12617                                              (__mmask8)(U),\
12618					      _MM_FROUND_CUR_DIRECTION))
12619
12620#define _mm512_maskz_getmant_pd(U, X, B, C)                                         \
12621  ((__m512d)__builtin_ia32_getmantpd512_mask ((__v8df)(__m512d)(X),                 \
12622                                              (int)(((C)<<2) | (B)),                \
12623                                              (__v8df)_mm512_setzero_pd(),          \
12624                                              (__mmask8)(U),\
12625					      _MM_FROUND_CUR_DIRECTION))
12626#define _mm512_getmant_ps(X, B, C)                                                  \
12627  ((__m512)__builtin_ia32_getmantps512_mask ((__v16sf)(__m512)(X),                  \
12628                                             (int)(((C)<<2) | (B)),                 \
12629                                             (__v16sf)_mm512_undefined_ps(),        \
12630                                             (__mmask16)-1,\
12631					     _MM_FROUND_CUR_DIRECTION))
12632
12633#define _mm512_mask_getmant_ps(W, U, X, B, C)                                       \
12634  ((__m512)__builtin_ia32_getmantps512_mask ((__v16sf)(__m512)(X),                  \
12635                                             (int)(((C)<<2) | (B)),                 \
12636                                             (__v16sf)(__m512)(W),                  \
12637                                             (__mmask16)(U),\
12638					     _MM_FROUND_CUR_DIRECTION))
12639
12640#define _mm512_maskz_getmant_ps(U, X, B, C)                                         \
12641  ((__m512)__builtin_ia32_getmantps512_mask ((__v16sf)(__m512)(X),                  \
12642                                             (int)(((C)<<2) | (B)),                 \
12643                                             (__v16sf)_mm512_setzero_ps(),          \
12644                                             (__mmask16)(U),\
12645					     _MM_FROUND_CUR_DIRECTION))
12646#define _mm_getmant_sd(X, Y, C, D)                                                  \
12647  ((__m128d)__builtin_ia32_getmantsd_round ((__v2df)(__m128d)(X),                    \
12648                                           (__v2df)(__m128d)(Y),                    \
12649                                           (int)(((D)<<2) | (C)),                   \
12650					   _MM_FROUND_CUR_DIRECTION))
12651
12652#define _mm_getmant_ss(X, Y, C, D)                                                  \
12653  ((__m128)__builtin_ia32_getmantss_round ((__v4sf)(__m128)(X),                      \
12654                                          (__v4sf)(__m128)(Y),                      \
12655                                          (int)(((D)<<2) | (C)),                    \
12656					  _MM_FROUND_CUR_DIRECTION))
12657
12658#define _mm_getexp_ss(A, B)						      \
12659  ((__m128)__builtin_ia32_getexpss128_mask((__v4sf)(__m128)(A), (__v4sf)(__m128)(B),  \
12660					   _MM_FROUND_CUR_DIRECTION))
12661
12662#define _mm_getexp_sd(A, B)						       \
12663  ((__m128d)__builtin_ia32_getexpsd128_mask((__v2df)(__m128d)(A), (__v2df)(__m128d)(B),\
12664					    _MM_FROUND_CUR_DIRECTION))
12665
12666#define _mm512_getexp_ps(A)						\
12667  ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A),		\
12668  (__v16sf)_mm512_undefined_ps(), (__mmask16)-1, _MM_FROUND_CUR_DIRECTION))
12669
12670#define _mm512_mask_getexp_ps(W, U, A)					\
12671  ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A),		\
12672  (__v16sf)(__m512)(W), (__mmask16)(U), _MM_FROUND_CUR_DIRECTION))
12673
12674#define _mm512_maskz_getexp_ps(U, A)					\
12675  ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A),		\
12676  (__v16sf)_mm512_setzero_ps(), (__mmask16)(U), _MM_FROUND_CUR_DIRECTION))
12677
12678#define _mm512_getexp_pd(A)						\
12679  ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A),		\
12680  (__v8df)_mm512_undefined_pd(), (__mmask8)-1, _MM_FROUND_CUR_DIRECTION))
12681
12682#define _mm512_mask_getexp_pd(W, U, A)					\
12683  ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A),		\
12684  (__v8df)(__m512d)(W), (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
12685
12686#define _mm512_maskz_getexp_pd(U, A)					\
12687  ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A),		\
12688  (__v8df)_mm512_setzero_pd(), (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
12689#endif
12690
12691#ifdef __OPTIMIZE__
12692extern __inline __m512
12693__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12694_mm512_roundscale_ps (__m512 __A, const int __imm)
12695{
12696  return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A, __imm,
12697						  (__v16sf)
12698						  _mm512_undefined_ps (),
12699						  -1,
12700						  _MM_FROUND_CUR_DIRECTION);
12701}
12702
12703extern __inline __m512
12704__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12705_mm512_mask_roundscale_ps (__m512 __A, __mmask16 __B, __m512 __C,
12706			   const int __imm)
12707{
12708  return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __C, __imm,
12709						  (__v16sf) __A,
12710						  (__mmask16) __B,
12711						  _MM_FROUND_CUR_DIRECTION);
12712}
12713
12714extern __inline __m512
12715__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12716_mm512_maskz_roundscale_ps (__mmask16 __A, __m512 __B, const int __imm)
12717{
12718  return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __B,
12719						  __imm,
12720						  (__v16sf)
12721						  _mm512_setzero_ps (),
12722						  (__mmask16) __A,
12723						  _MM_FROUND_CUR_DIRECTION);
12724}
12725
12726extern __inline __m512d
12727__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12728_mm512_roundscale_pd (__m512d __A, const int __imm)
12729{
12730  return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A, __imm,
12731						   (__v8df)
12732						   _mm512_undefined_pd (),
12733						   -1,
12734						   _MM_FROUND_CUR_DIRECTION);
12735}
12736
12737extern __inline __m512d
12738__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12739_mm512_mask_roundscale_pd (__m512d __A, __mmask8 __B, __m512d __C,
12740			   const int __imm)
12741{
12742  return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __C, __imm,
12743						   (__v8df) __A,
12744						   (__mmask8) __B,
12745						   _MM_FROUND_CUR_DIRECTION);
12746}
12747
12748extern __inline __m512d
12749__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12750_mm512_maskz_roundscale_pd (__mmask8 __A, __m512d __B, const int __imm)
12751{
12752  return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __B,
12753						   __imm,
12754						   (__v8df)
12755						   _mm512_setzero_pd (),
12756						   (__mmask8) __A,
12757						   _MM_FROUND_CUR_DIRECTION);
12758}
12759
12760extern __inline __m128
12761__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12762_mm_roundscale_ss (__m128 __A, __m128 __B, const int __imm)
12763{
12764  return (__m128) __builtin_ia32_rndscaless_round ((__v4sf) __A,
12765						   (__v4sf) __B, __imm,
12766						   _MM_FROUND_CUR_DIRECTION);
12767}
12768
12769extern __inline __m128d
12770__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12771_mm_roundscale_sd (__m128d __A, __m128d __B, const int __imm)
12772{
12773  return (__m128d) __builtin_ia32_rndscalesd_round ((__v2df) __A,
12774						    (__v2df) __B, __imm,
12775						   _MM_FROUND_CUR_DIRECTION);
12776}
12777
12778#else
12779#define _mm512_roundscale_ps(A, B) \
12780  ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(A), (int)(B),\
12781    (__v16sf)_mm512_undefined_ps(), (__mmask16)(-1), _MM_FROUND_CUR_DIRECTION))
12782#define _mm512_mask_roundscale_ps(A, B, C, D)				\
12783  ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(C),	\
12784					    (int)(D),			\
12785					    (__v16sf)(__m512)(A),	\
12786					    (__mmask16)(B), _MM_FROUND_CUR_DIRECTION))
12787#define _mm512_maskz_roundscale_ps(A, B, C)				\
12788  ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(B),	\
12789					    (int)(C),			\
12790					    (__v16sf)_mm512_setzero_ps(),\
12791					    (__mmask16)(A), _MM_FROUND_CUR_DIRECTION))
12792#define _mm512_roundscale_pd(A, B) \
12793  ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(A), (int)(B),\
12794    (__v8df)_mm512_undefined_pd(), (__mmask8)(-1), _MM_FROUND_CUR_DIRECTION))
12795#define _mm512_mask_roundscale_pd(A, B, C, D)				\
12796  ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(C),	\
12797					     (int)(D),			\
12798					     (__v8df)(__m512d)(A),	\
12799					     (__mmask8)(B), _MM_FROUND_CUR_DIRECTION))
12800#define _mm512_maskz_roundscale_pd(A, B, C)				\
12801  ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(B),	\
12802					     (int)(C),			\
12803					     (__v8df)_mm512_setzero_pd(),\
12804					     (__mmask8)(A), _MM_FROUND_CUR_DIRECTION))
12805#define _mm_roundscale_ss(A, B, C)					\
12806  ((__m128) __builtin_ia32_rndscaless_round ((__v4sf)(__m128)(A),	\
12807  (__v4sf)(__m128)(B), (int)(C), _MM_FROUND_CUR_DIRECTION))
12808#define _mm_roundscale_sd(A, B, C)					\
12809  ((__m128d) __builtin_ia32_rndscalesd_round ((__v2df)(__m128d)(A),	\
12810    (__v2df)(__m128d)(B), (int)(C), _MM_FROUND_CUR_DIRECTION))
12811#endif
12812
12813#ifdef __OPTIMIZE__
12814extern __inline __mmask8
12815__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12816_mm512_cmp_pd_mask (__m512d __X, __m512d __Y, const int __P)
12817{
12818  return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
12819						  (__v8df) __Y, __P,
12820						  (__mmask8) -1,
12821						  _MM_FROUND_CUR_DIRECTION);
12822}
12823
12824extern __inline __mmask16
12825__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12826_mm512_cmp_ps_mask (__m512 __X, __m512 __Y, const int __P)
12827{
12828  return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
12829						   (__v16sf) __Y, __P,
12830						   (__mmask16) -1,
12831						   _MM_FROUND_CUR_DIRECTION);
12832}
12833
12834extern __inline __mmask16
12835__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12836_mm512_mask_cmp_ps_mask (__mmask16 __U, __m512 __X, __m512 __Y, const int __P)
12837{
12838  return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
12839						   (__v16sf) __Y, __P,
12840						   (__mmask16) __U,
12841						   _MM_FROUND_CUR_DIRECTION);
12842}
12843
12844extern __inline __mmask8
12845__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12846_mm512_mask_cmp_pd_mask (__mmask8 __U, __m512d __X, __m512d __Y, const int __P)
12847{
12848  return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
12849						  (__v8df) __Y, __P,
12850						  (__mmask8) __U,
12851						  _MM_FROUND_CUR_DIRECTION);
12852}
12853
12854extern __inline __mmask8
12855__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12856_mm_cmp_sd_mask (__m128d __X, __m128d __Y, const int __P)
12857{
12858  return (__mmask8) __builtin_ia32_cmpsd_mask ((__v2df) __X,
12859					       (__v2df) __Y, __P,
12860					       (__mmask8) -1,
12861					       _MM_FROUND_CUR_DIRECTION);
12862}
12863
12864extern __inline __mmask8
12865__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12866_mm_mask_cmp_sd_mask (__mmask8 __M, __m128d __X, __m128d __Y, const int __P)
12867{
12868  return (__mmask8) __builtin_ia32_cmpsd_mask ((__v2df) __X,
12869					       (__v2df) __Y, __P,
12870					       (__mmask8) __M,
12871					       _MM_FROUND_CUR_DIRECTION);
12872}
12873
12874extern __inline __mmask8
12875__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12876_mm_cmp_ss_mask (__m128 __X, __m128 __Y, const int __P)
12877{
12878  return (__mmask8) __builtin_ia32_cmpss_mask ((__v4sf) __X,
12879					       (__v4sf) __Y, __P,
12880					       (__mmask8) -1,
12881					       _MM_FROUND_CUR_DIRECTION);
12882}
12883
12884extern __inline __mmask8
12885__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12886_mm_mask_cmp_ss_mask (__mmask8 __M, __m128 __X, __m128 __Y, const int __P)
12887{
12888  return (__mmask8) __builtin_ia32_cmpss_mask ((__v4sf) __X,
12889					       (__v4sf) __Y, __P,
12890					       (__mmask8) __M,
12891					       _MM_FROUND_CUR_DIRECTION);
12892}
12893
12894#else
12895#define _mm512_cmp_pd_mask(X, Y, P)					\
12896  ((__mmask8) __builtin_ia32_cmppd512_mask ((__v8df)(__m512d)(X),	\
12897					    (__v8df)(__m512d)(Y), (int)(P),\
12898					    (__mmask8)-1,_MM_FROUND_CUR_DIRECTION))
12899
12900#define _mm512_cmp_ps_mask(X, Y, P)					\
12901  ((__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf)(__m512)(X),	\
12902					     (__v16sf)(__m512)(Y), (int)(P),\
12903					     (__mmask16)-1,_MM_FROUND_CUR_DIRECTION))
12904
12905#define _mm512_mask_cmp_pd_mask(M, X, Y, P)					\
12906  ((__mmask8) __builtin_ia32_cmppd512_mask ((__v8df)(__m512d)(X),	\
12907					    (__v8df)(__m512d)(Y), (int)(P),\
12908					    (__mmask8)M, _MM_FROUND_CUR_DIRECTION))
12909
12910#define _mm512_mask_cmp_ps_mask(M, X, Y, P)					\
12911  ((__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf)(__m512)(X),	\
12912					     (__v16sf)(__m512)(Y), (int)(P),\
12913					     (__mmask16)M,_MM_FROUND_CUR_DIRECTION))
12914
12915#define _mm_cmp_sd_mask(X, Y, P)					\
12916  ((__mmask8) __builtin_ia32_cmpsd_mask ((__v2df)(__m128d)(X),		\
12917					 (__v2df)(__m128d)(Y), (int)(P),\
12918					 (__mmask8)-1,_MM_FROUND_CUR_DIRECTION))
12919
12920#define _mm_mask_cmp_sd_mask(M, X, Y, P)					\
12921  ((__mmask8) __builtin_ia32_cmpsd_mask ((__v2df)(__m128d)(X),		\
12922					 (__v2df)(__m128d)(Y), (int)(P),\
12923					 M,_MM_FROUND_CUR_DIRECTION))
12924
12925#define _mm_cmp_ss_mask(X, Y, P)					\
12926  ((__mmask8) __builtin_ia32_cmpss_mask ((__v4sf)(__m128)(X),		\
12927					 (__v4sf)(__m128)(Y), (int)(P), \
12928					 (__mmask8)-1,_MM_FROUND_CUR_DIRECTION))
12929
12930#define _mm_mask_cmp_ss_mask(M, X, Y, P)					\
12931  ((__mmask8) __builtin_ia32_cmpss_mask ((__v4sf)(__m128)(X),		\
12932					 (__v4sf)(__m128)(Y), (int)(P), \
12933					 M,_MM_FROUND_CUR_DIRECTION))
12934#endif
12935
12936extern __inline __mmask16
12937__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12938_mm512_kmov (__mmask16 __A)
12939{
12940  return __builtin_ia32_kmov16 (__A);
12941}
12942
12943extern __inline __m512
12944__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12945_mm512_castpd_ps (__m512d __A)
12946{
12947  return (__m512) (__A);
12948}
12949
12950extern __inline __m512i
12951__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12952_mm512_castpd_si512 (__m512d __A)
12953{
12954  return (__m512i) (__A);
12955}
12956
12957extern __inline __m512d
12958__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12959_mm512_castps_pd (__m512 __A)
12960{
12961  return (__m512d) (__A);
12962}
12963
12964extern __inline __m512i
12965__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12966_mm512_castps_si512 (__m512 __A)
12967{
12968  return (__m512i) (__A);
12969}
12970
12971extern __inline __m512
12972__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12973_mm512_castsi512_ps (__m512i __A)
12974{
12975  return (__m512) (__A);
12976}
12977
12978extern __inline __m512d
12979__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12980_mm512_castsi512_pd (__m512i __A)
12981{
12982  return (__m512d) (__A);
12983}
12984
12985extern __inline __m128d
12986__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12987_mm512_castpd512_pd128 (__m512d __A)
12988{
12989  return (__m128d)_mm512_extractf32x4_ps((__m512)__A, 0);
12990}
12991
12992extern __inline __m128
12993__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12994_mm512_castps512_ps128 (__m512 __A)
12995{
12996  return _mm512_extractf32x4_ps(__A, 0);
12997}
12998
12999extern __inline __m128i
13000__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13001_mm512_castsi512_si128 (__m512i __A)
13002{
13003  return (__m128i)_mm512_extracti32x4_epi32((__m512i)__A, 0);
13004}
13005
13006extern __inline __m256d
13007__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13008_mm512_castpd512_pd256 (__m512d __A)
13009{
13010  return _mm512_extractf64x4_pd(__A, 0);
13011}
13012
13013extern __inline __m256
13014__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13015_mm512_castps512_ps256 (__m512 __A)
13016{
13017  return (__m256)_mm512_extractf64x4_pd((__m512d)__A, 0);
13018}
13019
13020extern __inline __m256i
13021__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13022_mm512_castsi512_si256 (__m512i __A)
13023{
13024  return (__m256i)_mm512_extractf64x4_pd((__m512d)__A, 0);
13025}
13026
13027extern __inline __m512d
13028__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13029_mm512_castpd128_pd512 (__m128d __A)
13030{
13031  return (__m512d) __builtin_ia32_pd512_pd((__m128d)__A);
13032}
13033
13034extern __inline __m512
13035__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13036_mm512_castps128_ps512 (__m128 __A)
13037{
13038  return (__m512) __builtin_ia32_ps512_ps((__m128)__A);
13039}
13040
13041extern __inline __m512i
13042__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13043_mm512_castsi128_si512 (__m128i __A)
13044{
13045  return (__m512i) __builtin_ia32_si512_si((__v4si)__A);
13046}
13047
13048extern __inline __m512d
13049__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13050_mm512_castpd256_pd512 (__m256d __A)
13051{
13052  return __builtin_ia32_pd512_256pd (__A);
13053}
13054
13055extern __inline __m512
13056__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13057_mm512_castps256_ps512 (__m256 __A)
13058{
13059  return __builtin_ia32_ps512_256ps (__A);
13060}
13061
13062extern __inline __m512i
13063__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13064_mm512_castsi256_si512 (__m256i __A)
13065{
13066  return (__m512i)__builtin_ia32_si512_256si ((__v8si)__A);
13067}
13068
13069extern __inline __mmask16
13070__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13071_mm512_cmpeq_epu32_mask (__m512i __A, __m512i __B)
13072{
13073  return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __A,
13074						     (__v16si) __B, 0,
13075						     (__mmask16) -1);
13076}
13077
13078extern __inline __mmask16
13079__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13080_mm512_mask_cmpeq_epu32_mask (__mmask16 __U, __m512i __A, __m512i __B)
13081{
13082  return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __A,
13083						     (__v16si) __B, 0, __U);
13084}
13085
13086extern __inline __mmask8
13087__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13088_mm512_mask_cmpeq_epu64_mask (__mmask8 __U, __m512i __A, __m512i __B)
13089{
13090  return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __A,
13091						    (__v8di) __B, 0, __U);
13092}
13093
13094extern __inline __mmask8
13095__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13096_mm512_cmpeq_epu64_mask (__m512i __A, __m512i __B)
13097{
13098  return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __A,
13099						    (__v8di) __B, 0,
13100						    (__mmask8) -1);
13101}
13102
13103extern __inline __mmask16
13104__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13105_mm512_cmpgt_epu32_mask (__m512i __A, __m512i __B)
13106{
13107  return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __A,
13108						     (__v16si) __B, 6,
13109						     (__mmask16) -1);
13110}
13111
13112extern __inline __mmask16
13113__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13114_mm512_mask_cmpgt_epu32_mask (__mmask16 __U, __m512i __A, __m512i __B)
13115{
13116  return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __A,
13117						     (__v16si) __B, 6,  __U);
13118}
13119
13120extern __inline __mmask8
13121__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13122_mm512_mask_cmpgt_epu64_mask (__mmask8 __U, __m512i __A, __m512i __B)
13123{
13124  return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __A,
13125						    (__v8di) __B, 6, __U);
13126}
13127
13128extern __inline __mmask8
13129__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13130_mm512_cmpgt_epu64_mask (__m512i __A, __m512i __B)
13131{
13132  return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __A,
13133						    (__v8di) __B, 6,
13134						    (__mmask8) -1);
13135}
13136
13137#ifdef __DISABLE_AVX512F__
13138#undef __DISABLE_AVX512F__
13139#pragma GCC pop_options
13140#endif /* __DISABLE_AVX512F__ */
13141
13142#endif /* _AVX512FINTRIN_H_INCLUDED */
13143