1/* Copyright (C) 2014-2015 Free Software Foundation, Inc.
2
3   This file is part of GCC.
4
5   GCC is free software; you can redistribute it and/or modify
6   it under the terms of the GNU General Public License as published by
7   the Free Software Foundation; either version 3, or (at your option)
8   any later version.
9
10   GCC is distributed in the hope that it will be useful,
11   but WITHOUT ANY WARRANTY; without even the implied warranty of
12   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13   GNU General Public License for more details.
14
15   Under Section 7 of GPL version 3, you are granted additional
16   permissions described in the GCC Runtime Library Exception, version
17   3.1, as published by the Free Software Foundation.
18
19   You should have received a copy of the GNU General Public License and
20   a copy of the GCC Runtime Library Exception along with this program;
21   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
22   <http://www.gnu.org/licenses/>.  */
23
24#ifndef _IMMINTRIN_H_INCLUDED
25#error "Never use <avx512vlintrin.h> directly; include <immintrin.h> instead."
26#endif
27
28#ifndef _AVX512VLINTRIN_H_INCLUDED
29#define _AVX512VLINTRIN_H_INCLUDED
30
31/* Doesn't require avx512vl target and is used in avx512dqintrin.h.  */
32extern __inline __m128i
33__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
34_mm_setzero_di (void)
35{
36  return __extension__ (__m128i)(__v2di){ 0LL, 0LL};
37}
38
39#ifndef __AVX512VL__
40#pragma GCC push_options
41#pragma GCC target("avx512vl")
42#define __DISABLE_AVX512VL__
43#endif /* __AVX512VL__ */
44
45/* Internal data types for implementing the intrinsics.  */
46typedef unsigned int __mmask32;
47
48extern __inline __m256d
49__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
50_mm256_mask_mov_pd (__m256d __W, __mmask8 __U, __m256d __A)
51{
52  return (__m256d) __builtin_ia32_movapd256_mask ((__v4df) __A,
53						  (__v4df) __W,
54						  (__mmask8) __U);
55}
56
57extern __inline __m256d
58__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
59_mm256_maskz_mov_pd (__mmask8 __U, __m256d __A)
60{
61  return (__m256d) __builtin_ia32_movapd256_mask ((__v4df) __A,
62						  (__v4df)
63						  _mm256_setzero_pd (),
64						  (__mmask8) __U);
65}
66
67extern __inline __m128d
68__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
69_mm_mask_mov_pd (__m128d __W, __mmask8 __U, __m128d __A)
70{
71  return (__m128d) __builtin_ia32_movapd128_mask ((__v2df) __A,
72						  (__v2df) __W,
73						  (__mmask8) __U);
74}
75
76extern __inline __m128d
77__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
78_mm_maskz_mov_pd (__mmask8 __U, __m128d __A)
79{
80  return (__m128d) __builtin_ia32_movapd128_mask ((__v2df) __A,
81						  (__v2df)
82						  _mm_setzero_pd (),
83						  (__mmask8) __U);
84}
85
86extern __inline __m256d
87__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
88_mm256_mask_load_pd (__m256d __W, __mmask8 __U, void const *__P)
89{
90  return (__m256d) __builtin_ia32_loadapd256_mask ((__v4df *) __P,
91						   (__v4df) __W,
92						   (__mmask8) __U);
93}
94
95extern __inline __m256d
96__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
97_mm256_maskz_load_pd (__mmask8 __U, void const *__P)
98{
99  return (__m256d) __builtin_ia32_loadapd256_mask ((__v4df *) __P,
100						   (__v4df)
101						   _mm256_setzero_pd (),
102						   (__mmask8) __U);
103}
104
105extern __inline __m128d
106__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
107_mm_mask_load_pd (__m128d __W, __mmask8 __U, void const *__P)
108{
109  return (__m128d) __builtin_ia32_loadapd128_mask ((__v2df *) __P,
110						   (__v2df) __W,
111						   (__mmask8) __U);
112}
113
114extern __inline __m128d
115__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
116_mm_maskz_load_pd (__mmask8 __U, void const *__P)
117{
118  return (__m128d) __builtin_ia32_loadapd128_mask ((__v2df *) __P,
119						   (__v2df)
120						   _mm_setzero_pd (),
121						   (__mmask8) __U);
122}
123
124extern __inline void
125__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
126_mm256_mask_store_pd (void *__P, __mmask8 __U, __m256d __A)
127{
128  __builtin_ia32_storeapd256_mask ((__v4df *) __P,
129				   (__v4df) __A,
130				   (__mmask8) __U);
131}
132
133extern __inline void
134__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
135_mm_mask_store_pd (void *__P, __mmask8 __U, __m128d __A)
136{
137  __builtin_ia32_storeapd128_mask ((__v2df *) __P,
138				   (__v2df) __A,
139				   (__mmask8) __U);
140}
141
142extern __inline __m256
143__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
144_mm256_mask_mov_ps (__m256 __W, __mmask8 __U, __m256 __A)
145{
146  return (__m256) __builtin_ia32_movaps256_mask ((__v8sf) __A,
147						 (__v8sf) __W,
148						 (__mmask8) __U);
149}
150
151extern __inline __m256
152__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
153_mm256_maskz_mov_ps (__mmask8 __U, __m256 __A)
154{
155  return (__m256) __builtin_ia32_movaps256_mask ((__v8sf) __A,
156						 (__v8sf)
157						 _mm256_setzero_ps (),
158						 (__mmask8) __U);
159}
160
161extern __inline __m128
162__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
163_mm_mask_mov_ps (__m128 __W, __mmask8 __U, __m128 __A)
164{
165  return (__m128) __builtin_ia32_movaps128_mask ((__v4sf) __A,
166						 (__v4sf) __W,
167						 (__mmask8) __U);
168}
169
170extern __inline __m128
171__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
172_mm_maskz_mov_ps (__mmask8 __U, __m128 __A)
173{
174  return (__m128) __builtin_ia32_movaps128_mask ((__v4sf) __A,
175						 (__v4sf)
176						 _mm_setzero_ps (),
177						 (__mmask8) __U);
178}
179
180extern __inline __m256
181__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
182_mm256_mask_load_ps (__m256 __W, __mmask8 __U, void const *__P)
183{
184  return (__m256) __builtin_ia32_loadaps256_mask ((__v8sf *) __P,
185						  (__v8sf) __W,
186						  (__mmask8) __U);
187}
188
189extern __inline __m256
190__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
191_mm256_maskz_load_ps (__mmask8 __U, void const *__P)
192{
193  return (__m256) __builtin_ia32_loadaps256_mask ((__v8sf *) __P,
194						  (__v8sf)
195						  _mm256_setzero_ps (),
196						  (__mmask8) __U);
197}
198
199extern __inline __m128
200__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
201_mm_mask_load_ps (__m128 __W, __mmask8 __U, void const *__P)
202{
203  return (__m128) __builtin_ia32_loadaps128_mask ((__v4sf *) __P,
204						  (__v4sf) __W,
205						  (__mmask8) __U);
206}
207
208extern __inline __m128
209__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
210_mm_maskz_load_ps (__mmask8 __U, void const *__P)
211{
212  return (__m128) __builtin_ia32_loadaps128_mask ((__v4sf *) __P,
213						  (__v4sf)
214						  _mm_setzero_ps (),
215						  (__mmask8) __U);
216}
217
218extern __inline void
219__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
220_mm256_mask_store_ps (void *__P, __mmask8 __U, __m256 __A)
221{
222  __builtin_ia32_storeaps256_mask ((__v8sf *) __P,
223				   (__v8sf) __A,
224				   (__mmask8) __U);
225}
226
227extern __inline void
228__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
229_mm_mask_store_ps (void *__P, __mmask8 __U, __m128 __A)
230{
231  __builtin_ia32_storeaps128_mask ((__v4sf *) __P,
232				   (__v4sf) __A,
233				   (__mmask8) __U);
234}
235
236extern __inline __m256i
237__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
238_mm256_mask_mov_epi64 (__m256i __W, __mmask8 __U, __m256i __A)
239{
240  return (__m256i) __builtin_ia32_movdqa64_256_mask ((__v4di) __A,
241						     (__v4di) __W,
242						     (__mmask8) __U);
243}
244
245extern __inline __m256i
246__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
247_mm256_maskz_mov_epi64 (__mmask8 __U, __m256i __A)
248{
249  return (__m256i) __builtin_ia32_movdqa64_256_mask ((__v4di) __A,
250						     (__v4di)
251						     _mm256_setzero_si256 (),
252						     (__mmask8) __U);
253}
254
255extern __inline __m128i
256__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
257_mm_mask_mov_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
258{
259  return (__m128i) __builtin_ia32_movdqa64_128_mask ((__v2di) __A,
260						     (__v2di) __W,
261						     (__mmask8) __U);
262}
263
264extern __inline __m128i
265__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
266_mm_maskz_mov_epi64 (__mmask8 __U, __m128i __A)
267{
268  return (__m128i) __builtin_ia32_movdqa64_128_mask ((__v2di) __A,
269						     (__v2di)
270						     _mm_setzero_di (),
271						     (__mmask8) __U);
272}
273
274extern __inline __m256i
275__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
276_mm256_mask_load_epi64 (__m256i __W, __mmask8 __U, void const *__P)
277{
278  return (__m256i) __builtin_ia32_movdqa64load256_mask ((__v4di *) __P,
279							(__v4di) __W,
280							(__mmask8)
281							__U);
282}
283
284extern __inline __m256i
285__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
286_mm256_maskz_load_epi64 (__mmask8 __U, void const *__P)
287{
288  return (__m256i) __builtin_ia32_movdqa64load256_mask ((__v4di *) __P,
289							(__v4di)
290							_mm256_setzero_si256 (),
291							(__mmask8)
292							__U);
293}
294
295extern __inline __m128i
296__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
297_mm_mask_load_epi64 (__m128i __W, __mmask8 __U, void const *__P)
298{
299  return (__m128i) __builtin_ia32_movdqa64load128_mask ((__v2di *) __P,
300							(__v2di) __W,
301							(__mmask8)
302							__U);
303}
304
305extern __inline __m128i
306__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
307_mm_maskz_load_epi64 (__mmask8 __U, void const *__P)
308{
309  return (__m128i) __builtin_ia32_movdqa64load128_mask ((__v2di *) __P,
310							(__v2di)
311							_mm_setzero_di (),
312							(__mmask8)
313							__U);
314}
315
316extern __inline void
317__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
318_mm256_mask_store_epi64 (void *__P, __mmask8 __U, __m256i __A)
319{
320  __builtin_ia32_movdqa64store256_mask ((__v4di *) __P,
321					(__v4di) __A,
322					(__mmask8) __U);
323}
324
325extern __inline void
326__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
327_mm_mask_store_epi64 (void *__P, __mmask8 __U, __m128i __A)
328{
329  __builtin_ia32_movdqa64store128_mask ((__v2di *) __P,
330					(__v2di) __A,
331					(__mmask8) __U);
332}
333
334extern __inline __m256i
335__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
336_mm256_mask_mov_epi32 (__m256i __W, __mmask8 __U, __m256i __A)
337{
338  return (__m256i) __builtin_ia32_movdqa32_256_mask ((__v8si) __A,
339						     (__v8si) __W,
340						     (__mmask8) __U);
341}
342
343extern __inline __m256i
344__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
345_mm256_maskz_mov_epi32 (__mmask8 __U, __m256i __A)
346{
347  return (__m256i) __builtin_ia32_movdqa32_256_mask ((__v8si) __A,
348						     (__v8si)
349						     _mm256_setzero_si256 (),
350						     (__mmask8) __U);
351}
352
353extern __inline __m128i
354__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
355_mm_mask_mov_epi32 (__m128i __W, __mmask8 __U, __m128i __A)
356{
357  return (__m128i) __builtin_ia32_movdqa32_128_mask ((__v4si) __A,
358						     (__v4si) __W,
359						     (__mmask8) __U);
360}
361
362extern __inline __m128i
363__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
364_mm_maskz_mov_epi32 (__mmask8 __U, __m128i __A)
365{
366  return (__m128i) __builtin_ia32_movdqa32_128_mask ((__v4si) __A,
367						     (__v4si)
368						     _mm_setzero_si128 (),
369						     (__mmask8) __U);
370}
371
372extern __inline __m256i
373__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
374_mm256_mask_load_epi32 (__m256i __W, __mmask8 __U, void const *__P)
375{
376  return (__m256i) __builtin_ia32_movdqa32load256_mask ((__v8si *) __P,
377							(__v8si) __W,
378							(__mmask8)
379							__U);
380}
381
382extern __inline __m256i
383__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
384_mm256_maskz_load_epi32 (__mmask8 __U, void const *__P)
385{
386  return (__m256i) __builtin_ia32_movdqa32load256_mask ((__v8si *) __P,
387							(__v8si)
388							_mm256_setzero_si256 (),
389							(__mmask8)
390							__U);
391}
392
393extern __inline __m128i
394__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
395_mm_mask_load_epi32 (__m128i __W, __mmask8 __U, void const *__P)
396{
397  return (__m128i) __builtin_ia32_movdqa32load128_mask ((__v4si *) __P,
398							(__v4si) __W,
399							(__mmask8)
400							__U);
401}
402
403extern __inline __m128i
404__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
405_mm_maskz_load_epi32 (__mmask8 __U, void const *__P)
406{
407  return (__m128i) __builtin_ia32_movdqa32load128_mask ((__v4si *) __P,
408							(__v4si)
409							_mm_setzero_si128 (),
410							(__mmask8)
411							__U);
412}
413
414extern __inline void
415__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
416_mm256_mask_store_epi32 (void *__P, __mmask8 __U, __m256i __A)
417{
418  __builtin_ia32_movdqa32store256_mask ((__v8si *) __P,
419					(__v8si) __A,
420					(__mmask8) __U);
421}
422
423extern __inline void
424__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
425_mm_mask_store_epi32 (void *__P, __mmask8 __U, __m128i __A)
426{
427  __builtin_ia32_movdqa32store128_mask ((__v4si *) __P,
428					(__v4si) __A,
429					(__mmask8) __U);
430}
431
432extern __inline __m128i
433__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
434_mm_setzero_hi (void)
435{
436  return __extension__ (__m128i) (__v8hi)
437  {
438  0, 0, 0, 0, 0, 0, 0, 0};
439}
440
441extern __inline __m128d
442__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
443_mm_mask_add_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
444{
445  return (__m128d) __builtin_ia32_addpd128_mask ((__v2df) __A,
446						 (__v2df) __B,
447						 (__v2df) __W,
448						 (__mmask8) __U);
449}
450
451extern __inline __m128d
452__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
453_mm_maskz_add_pd (__mmask8 __U, __m128d __A, __m128d __B)
454{
455  return (__m128d) __builtin_ia32_addpd128_mask ((__v2df) __A,
456						 (__v2df) __B,
457						 (__v2df)
458						 _mm_setzero_pd (),
459						 (__mmask8) __U);
460}
461
462extern __inline __m256d
463__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
464_mm256_mask_add_pd (__m256d __W, __mmask8 __U, __m256d __A,
465		    __m256d __B)
466{
467  return (__m256d) __builtin_ia32_addpd256_mask ((__v4df) __A,
468						 (__v4df) __B,
469						 (__v4df) __W,
470						 (__mmask8) __U);
471}
472
473extern __inline __m256d
474__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
475_mm256_maskz_add_pd (__mmask8 __U, __m256d __A, __m256d __B)
476{
477  return (__m256d) __builtin_ia32_addpd256_mask ((__v4df) __A,
478						 (__v4df) __B,
479						 (__v4df)
480						 _mm256_setzero_pd (),
481						 (__mmask8) __U);
482}
483
484extern __inline __m128
485__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
486_mm_mask_add_ps (__m128 __W, __mmask16 __U, __m128 __A, __m128 __B)
487{
488  return (__m128) __builtin_ia32_addps128_mask ((__v4sf) __A,
489						(__v4sf) __B,
490						(__v4sf) __W,
491						(__mmask8) __U);
492}
493
494extern __inline __m128
495__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
496_mm_maskz_add_ps (__mmask16 __U, __m128 __A, __m128 __B)
497{
498  return (__m128) __builtin_ia32_addps128_mask ((__v4sf) __A,
499						(__v4sf) __B,
500						(__v4sf)
501						_mm_setzero_ps (),
502						(__mmask8) __U);
503}
504
505extern __inline __m256
506__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
507_mm256_mask_add_ps (__m256 __W, __mmask16 __U, __m256 __A, __m256 __B)
508{
509  return (__m256) __builtin_ia32_addps256_mask ((__v8sf) __A,
510						(__v8sf) __B,
511						(__v8sf) __W,
512						(__mmask8) __U);
513}
514
515extern __inline __m256
516__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
517_mm256_maskz_add_ps (__mmask16 __U, __m256 __A, __m256 __B)
518{
519  return (__m256) __builtin_ia32_addps256_mask ((__v8sf) __A,
520						(__v8sf) __B,
521						(__v8sf)
522						_mm256_setzero_ps (),
523						(__mmask8) __U);
524}
525
526extern __inline __m128d
527__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
528_mm_mask_sub_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
529{
530  return (__m128d) __builtin_ia32_subpd128_mask ((__v2df) __A,
531						 (__v2df) __B,
532						 (__v2df) __W,
533						 (__mmask8) __U);
534}
535
536extern __inline __m128d
537__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
538_mm_maskz_sub_pd (__mmask8 __U, __m128d __A, __m128d __B)
539{
540  return (__m128d) __builtin_ia32_subpd128_mask ((__v2df) __A,
541						 (__v2df) __B,
542						 (__v2df)
543						 _mm_setzero_pd (),
544						 (__mmask8) __U);
545}
546
547extern __inline __m256d
548__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
549_mm256_mask_sub_pd (__m256d __W, __mmask8 __U, __m256d __A,
550		    __m256d __B)
551{
552  return (__m256d) __builtin_ia32_subpd256_mask ((__v4df) __A,
553						 (__v4df) __B,
554						 (__v4df) __W,
555						 (__mmask8) __U);
556}
557
558extern __inline __m256d
559__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
560_mm256_maskz_sub_pd (__mmask8 __U, __m256d __A, __m256d __B)
561{
562  return (__m256d) __builtin_ia32_subpd256_mask ((__v4df) __A,
563						 (__v4df) __B,
564						 (__v4df)
565						 _mm256_setzero_pd (),
566						 (__mmask8) __U);
567}
568
569extern __inline __m128
570__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
571_mm_mask_sub_ps (__m128 __W, __mmask16 __U, __m128 __A, __m128 __B)
572{
573  return (__m128) __builtin_ia32_subps128_mask ((__v4sf) __A,
574						(__v4sf) __B,
575						(__v4sf) __W,
576						(__mmask8) __U);
577}
578
579extern __inline __m128
580__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
581_mm_maskz_sub_ps (__mmask16 __U, __m128 __A, __m128 __B)
582{
583  return (__m128) __builtin_ia32_subps128_mask ((__v4sf) __A,
584						(__v4sf) __B,
585						(__v4sf)
586						_mm_setzero_ps (),
587						(__mmask8) __U);
588}
589
590extern __inline __m256
591__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
592_mm256_mask_sub_ps (__m256 __W, __mmask16 __U, __m256 __A, __m256 __B)
593{
594  return (__m256) __builtin_ia32_subps256_mask ((__v8sf) __A,
595						(__v8sf) __B,
596						(__v8sf) __W,
597						(__mmask8) __U);
598}
599
600extern __inline __m256
601__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
602_mm256_maskz_sub_ps (__mmask16 __U, __m256 __A, __m256 __B)
603{
604  return (__m256) __builtin_ia32_subps256_mask ((__v8sf) __A,
605						(__v8sf) __B,
606						(__v8sf)
607						_mm256_setzero_ps (),
608						(__mmask8) __U);
609}
610
611extern __inline void
612__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
613_mm256_store_epi64 (void *__P, __m256i __A)
614{
615  *(__m256i *) __P = __A;
616}
617
618extern __inline void
619__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
620_mm_store_epi64 (void *__P, __m128i __A)
621{
622  *(__m128i *) __P = __A;
623}
624
625extern __inline __m256d
626__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
627_mm256_mask_loadu_pd (__m256d __W, __mmask8 __U, void const *__P)
628{
629  return (__m256d) __builtin_ia32_loadupd256_mask ((__v4df *) __P,
630						   (__v4df) __W,
631						   (__mmask8) __U);
632}
633
634extern __inline __m256d
635__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
636_mm256_maskz_loadu_pd (__mmask8 __U, void const *__P)
637{
638  return (__m256d) __builtin_ia32_loadupd256_mask ((__v4df *) __P,
639						   (__v4df)
640						   _mm256_setzero_pd (),
641						   (__mmask8) __U);
642}
643
644extern __inline __m128d
645__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
646_mm_mask_loadu_pd (__m128d __W, __mmask8 __U, void const *__P)
647{
648  return (__m128d) __builtin_ia32_loadupd128_mask ((__v2df *) __P,
649						   (__v2df) __W,
650						   (__mmask8) __U);
651}
652
653extern __inline __m128d
654__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
655_mm_maskz_loadu_pd (__mmask8 __U, void const *__P)
656{
657  return (__m128d) __builtin_ia32_loadupd128_mask ((__v2df *) __P,
658						   (__v2df)
659						   _mm_setzero_pd (),
660						   (__mmask8) __U);
661}
662
663extern __inline void
664__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
665_mm256_mask_storeu_pd (void *__P, __mmask8 __U, __m256d __A)
666{
667  __builtin_ia32_storeupd256_mask ((__v4df *) __P,
668				   (__v4df) __A,
669				   (__mmask8) __U);
670}
671
672extern __inline void
673__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
674_mm_mask_storeu_pd (void *__P, __mmask8 __U, __m128d __A)
675{
676  __builtin_ia32_storeupd128_mask ((__v2df *) __P,
677				   (__v2df) __A,
678				   (__mmask8) __U);
679}
680
681extern __inline __m256
682__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
683_mm256_mask_loadu_ps (__m256 __W, __mmask8 __U, void const *__P)
684{
685  return (__m256) __builtin_ia32_loadups256_mask ((__v8sf *) __P,
686						  (__v8sf) __W,
687						  (__mmask8) __U);
688}
689
690extern __inline __m256
691__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
692_mm256_maskz_loadu_ps (__mmask8 __U, void const *__P)
693{
694  return (__m256) __builtin_ia32_loadups256_mask ((__v8sf *) __P,
695						  (__v8sf)
696						  _mm256_setzero_ps (),
697						  (__mmask8) __U);
698}
699
700extern __inline __m128
701__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
702_mm_mask_loadu_ps (__m128 __W, __mmask8 __U, void const *__P)
703{
704  return (__m128) __builtin_ia32_loadups128_mask ((__v4sf *) __P,
705						  (__v4sf) __W,
706						  (__mmask8) __U);
707}
708
709extern __inline __m128
710__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
711_mm_maskz_loadu_ps (__mmask8 __U, void const *__P)
712{
713  return (__m128) __builtin_ia32_loadups128_mask ((__v4sf *) __P,
714						  (__v4sf)
715						  _mm_setzero_ps (),
716						  (__mmask8) __U);
717}
718
719extern __inline void
720__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
721_mm256_mask_storeu_ps (void *__P, __mmask8 __U, __m256 __A)
722{
723  __builtin_ia32_storeups256_mask ((__v8sf *) __P,
724				   (__v8sf) __A,
725				   (__mmask8) __U);
726}
727
728extern __inline void
729__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
730_mm_mask_storeu_ps (void *__P, __mmask8 __U, __m128 __A)
731{
732  __builtin_ia32_storeups128_mask ((__v4sf *) __P,
733				   (__v4sf) __A,
734				   (__mmask8) __U);
735}
736
737extern __inline __m256i
738__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
739_mm256_mask_loadu_epi64 (__m256i __W, __mmask8 __U, void const *__P)
740{
741  return (__m256i) __builtin_ia32_loaddqudi256_mask ((__v4di *) __P,
742						     (__v4di) __W,
743						     (__mmask8) __U);
744}
745
746extern __inline __m256i
747__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
748_mm256_maskz_loadu_epi64 (__mmask8 __U, void const *__P)
749{
750  return (__m256i) __builtin_ia32_loaddqudi256_mask ((__v4di *) __P,
751						     (__v4di)
752						     _mm256_setzero_si256 (),
753						     (__mmask8) __U);
754}
755
756extern __inline __m128i
757__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
758_mm_mask_loadu_epi64 (__m128i __W, __mmask8 __U, void const *__P)
759{
760  return (__m128i) __builtin_ia32_loaddqudi128_mask ((__v2di *) __P,
761						     (__v2di) __W,
762						     (__mmask8) __U);
763}
764
765extern __inline __m128i
766__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
767_mm_maskz_loadu_epi64 (__mmask8 __U, void const *__P)
768{
769  return (__m128i) __builtin_ia32_loaddqudi128_mask ((__v2di *) __P,
770						     (__v2di)
771						     _mm_setzero_di (),
772						     (__mmask8) __U);
773}
774
775extern __inline void
776__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
777_mm256_mask_storeu_epi64 (void *__P, __mmask8 __U, __m256i __A)
778{
779  __builtin_ia32_storedqudi256_mask ((__v4di *) __P,
780				     (__v4di) __A,
781				     (__mmask8) __U);
782}
783
784extern __inline void
785__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
786_mm_mask_storeu_epi64 (void *__P, __mmask8 __U, __m128i __A)
787{
788  __builtin_ia32_storedqudi128_mask ((__v2di *) __P,
789				     (__v2di) __A,
790				     (__mmask8) __U);
791}
792
793extern __inline __m256i
794__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
795_mm256_mask_loadu_epi32 (__m256i __W, __mmask8 __U, void const *__P)
796{
797  return (__m256i) __builtin_ia32_loaddqusi256_mask ((__v8si *) __P,
798						     (__v8si) __W,
799						     (__mmask8) __U);
800}
801
802extern __inline __m256i
803__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
804_mm256_maskz_loadu_epi32 (__mmask8 __U, void const *__P)
805{
806  return (__m256i) __builtin_ia32_loaddqusi256_mask ((__v8si *) __P,
807						     (__v8si)
808						     _mm256_setzero_si256 (),
809						     (__mmask8) __U);
810}
811
812extern __inline __m128i
813__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
814_mm_mask_loadu_epi32 (__m128i __W, __mmask8 __U, void const *__P)
815{
816  return (__m128i) __builtin_ia32_loaddqusi128_mask ((__v4si *) __P,
817						     (__v4si) __W,
818						     (__mmask8) __U);
819}
820
821extern __inline __m128i
822__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
823_mm_maskz_loadu_epi32 (__mmask8 __U, void const *__P)
824{
825  return (__m128i) __builtin_ia32_loaddqusi128_mask ((__v4si *) __P,
826						     (__v4si)
827						     _mm_setzero_si128 (),
828						     (__mmask8) __U);
829}
830
831extern __inline void
832__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
833_mm256_mask_storeu_epi32 (void *__P, __mmask8 __U, __m256i __A)
834{
835  __builtin_ia32_storedqusi256_mask ((__v8si *) __P,
836				     (__v8si) __A,
837				     (__mmask8) __U);
838}
839
840extern __inline void
841__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
842_mm_mask_storeu_epi32 (void *__P, __mmask8 __U, __m128i __A)
843{
844  __builtin_ia32_storedqusi128_mask ((__v4si *) __P,
845				     (__v4si) __A,
846				     (__mmask8) __U);
847}
848
849extern __inline __m256i
850__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
851_mm256_mask_abs_epi32 (__m256i __W, __mmask8 __U, __m256i __A)
852{
853  return (__m256i) __builtin_ia32_pabsd256_mask ((__v8si) __A,
854						 (__v8si) __W,
855						 (__mmask8) __U);
856}
857
858extern __inline __m256i
859__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
860_mm256_maskz_abs_epi32 (__mmask8 __U, __m256i __A)
861{
862  return (__m256i) __builtin_ia32_pabsd256_mask ((__v8si) __A,
863						 (__v8si)
864						 _mm256_setzero_si256 (),
865						 (__mmask8) __U);
866}
867
868extern __inline __m128i
869__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
870_mm_mask_abs_epi32 (__m128i __W, __mmask8 __U, __m128i __A)
871{
872  return (__m128i) __builtin_ia32_pabsd128_mask ((__v4si) __A,
873						 (__v4si) __W,
874						 (__mmask8) __U);
875}
876
877extern __inline __m128i
878__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
879_mm_maskz_abs_epi32 (__mmask8 __U, __m128i __A)
880{
881  return (__m128i) __builtin_ia32_pabsd128_mask ((__v4si) __A,
882						 (__v4si)
883						 _mm_setzero_si128 (),
884						 (__mmask8) __U);
885}
886
887extern __inline __m256i
888__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
889_mm256_abs_epi64 (__m256i __A)
890{
891  return (__m256i) __builtin_ia32_pabsq256_mask ((__v4di) __A,
892						 (__v4di)
893						 _mm256_setzero_si256 (),
894						 (__mmask8) -1);
895}
896
897extern __inline __m256i
898__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
899_mm256_mask_abs_epi64 (__m256i __W, __mmask8 __U, __m256i __A)
900{
901  return (__m256i) __builtin_ia32_pabsq256_mask ((__v4di) __A,
902						 (__v4di) __W,
903						 (__mmask8) __U);
904}
905
906extern __inline __m256i
907__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
908_mm256_maskz_abs_epi64 (__mmask8 __U, __m256i __A)
909{
910  return (__m256i) __builtin_ia32_pabsq256_mask ((__v4di) __A,
911						 (__v4di)
912						 _mm256_setzero_si256 (),
913						 (__mmask8) __U);
914}
915
916extern __inline __m128i
917__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
918_mm_abs_epi64 (__m128i __A)
919{
920  return (__m128i) __builtin_ia32_pabsq128_mask ((__v2di) __A,
921						 (__v2di)
922						 _mm_setzero_di (),
923						 (__mmask8) -1);
924}
925
926extern __inline __m128i
927__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
928_mm_mask_abs_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
929{
930  return (__m128i) __builtin_ia32_pabsq128_mask ((__v2di) __A,
931						 (__v2di) __W,
932						 (__mmask8) __U);
933}
934
935extern __inline __m128i
936__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
937_mm_maskz_abs_epi64 (__mmask8 __U, __m128i __A)
938{
939  return (__m128i) __builtin_ia32_pabsq128_mask ((__v2di) __A,
940						 (__v2di)
941						 _mm_setzero_di (),
942						 (__mmask8) __U);
943}
944
945extern __inline __m128i
946__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
947_mm256_cvtpd_epu32 (__m256d __A)
948{
949  return (__m128i) __builtin_ia32_cvtpd2udq256_mask ((__v4df) __A,
950						     (__v4si)
951						     _mm_setzero_si128 (),
952						     (__mmask8) -1);
953}
954
955extern __inline __m128i
956__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
957_mm256_mask_cvtpd_epu32 (__m128i __W, __mmask8 __U, __m256d __A)
958{
959  return (__m128i) __builtin_ia32_cvtpd2udq256_mask ((__v4df) __A,
960						     (__v4si) __W,
961						     (__mmask8) __U);
962}
963
964extern __inline __m128i
965__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
966_mm256_maskz_cvtpd_epu32 (__mmask8 __U, __m256d __A)
967{
968  return (__m128i) __builtin_ia32_cvtpd2udq256_mask ((__v4df) __A,
969						     (__v4si)
970						     _mm_setzero_si128 (),
971						     (__mmask8) __U);
972}
973
974extern __inline __m128i
975__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
976_mm_cvtpd_epu32 (__m128d __A)
977{
978  return (__m128i) __builtin_ia32_cvtpd2udq128_mask ((__v2df) __A,
979						     (__v4si)
980						     _mm_setzero_si128 (),
981						     (__mmask8) -1);
982}
983
984extern __inline __m128i
985__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
986_mm_mask_cvtpd_epu32 (__m128i __W, __mmask8 __U, __m128d __A)
987{
988  return (__m128i) __builtin_ia32_cvtpd2udq128_mask ((__v2df) __A,
989						     (__v4si) __W,
990						     (__mmask8) __U);
991}
992
993extern __inline __m128i
994__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
995_mm_maskz_cvtpd_epu32 (__mmask8 __U, __m128d __A)
996{
997  return (__m128i) __builtin_ia32_cvtpd2udq128_mask ((__v2df) __A,
998						     (__v4si)
999						     _mm_setzero_si128 (),
1000						     (__mmask8) __U);
1001}
1002
1003extern __inline __m256i
1004__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1005_mm256_mask_cvttps_epi32 (__m256i __W, __mmask8 __U, __m256 __A)
1006{
1007  return (__m256i) __builtin_ia32_cvttps2dq256_mask ((__v8sf) __A,
1008						     (__v8si) __W,
1009						     (__mmask8) __U);
1010}
1011
1012extern __inline __m256i
1013__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1014_mm256_maskz_cvttps_epi32 (__mmask8 __U, __m256 __A)
1015{
1016  return (__m256i) __builtin_ia32_cvttps2dq256_mask ((__v8sf) __A,
1017						     (__v8si)
1018						     _mm256_setzero_si256 (),
1019						     (__mmask8) __U);
1020}
1021
1022extern __inline __m128i
1023__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1024_mm_mask_cvttps_epi32 (__m128i __W, __mmask8 __U, __m128 __A)
1025{
1026  return (__m128i) __builtin_ia32_cvttps2dq128_mask ((__v4sf) __A,
1027						     (__v4si) __W,
1028						     (__mmask8) __U);
1029}
1030
1031extern __inline __m128i
1032__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1033_mm_maskz_cvttps_epi32 (__mmask8 __U, __m128 __A)
1034{
1035  return (__m128i) __builtin_ia32_cvttps2dq128_mask ((__v4sf) __A,
1036						     (__v4si)
1037						     _mm_setzero_si128 (),
1038						     (__mmask8) __U);
1039}
1040
1041extern __inline __m256i
1042__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1043_mm256_cvttps_epu32 (__m256 __A)
1044{
1045  return (__m256i) __builtin_ia32_cvttps2udq256_mask ((__v8sf) __A,
1046						      (__v8si)
1047						      _mm256_setzero_si256 (),
1048						      (__mmask8) -1);
1049}
1050
1051extern __inline __m256i
1052__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1053_mm256_mask_cvttps_epu32 (__m256i __W, __mmask8 __U, __m256 __A)
1054{
1055  return (__m256i) __builtin_ia32_cvttps2udq256_mask ((__v8sf) __A,
1056						      (__v8si) __W,
1057						      (__mmask8) __U);
1058}
1059
1060extern __inline __m256i
1061__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1062_mm256_maskz_cvttps_epu32 (__mmask8 __U, __m256 __A)
1063{
1064  return (__m256i) __builtin_ia32_cvttps2udq256_mask ((__v8sf) __A,
1065						      (__v8si)
1066						      _mm256_setzero_si256 (),
1067						      (__mmask8) __U);
1068}
1069
1070extern __inline __m128i
1071__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1072_mm_cvttps_epu32 (__m128 __A)
1073{
1074  return (__m128i) __builtin_ia32_cvttps2udq128_mask ((__v4sf) __A,
1075						      (__v4si)
1076						      _mm_setzero_si128 (),
1077						      (__mmask8) -1);
1078}
1079
1080extern __inline __m128i
1081__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1082_mm_mask_cvttps_epu32 (__m128i __W, __mmask8 __U, __m128 __A)
1083{
1084  return (__m128i) __builtin_ia32_cvttps2udq128_mask ((__v4sf) __A,
1085						      (__v4si) __W,
1086						      (__mmask8) __U);
1087}
1088
1089extern __inline __m128i
1090__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1091_mm_maskz_cvttps_epu32 (__mmask8 __U, __m128 __A)
1092{
1093  return (__m128i) __builtin_ia32_cvttps2udq128_mask ((__v4sf) __A,
1094						      (__v4si)
1095						      _mm_setzero_si128 (),
1096						      (__mmask8) __U);
1097}
1098
1099extern __inline __m128i
1100__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1101_mm256_mask_cvttpd_epi32 (__m128i __W, __mmask8 __U, __m256d __A)
1102{
1103  return (__m128i) __builtin_ia32_cvttpd2dq256_mask ((__v4df) __A,
1104						     (__v4si) __W,
1105						     (__mmask8) __U);
1106}
1107
1108extern __inline __m128i
1109__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1110_mm256_maskz_cvttpd_epi32 (__mmask8 __U, __m256d __A)
1111{
1112  return (__m128i) __builtin_ia32_cvttpd2dq256_mask ((__v4df) __A,
1113						     (__v4si)
1114						     _mm_setzero_si128 (),
1115						     (__mmask8) __U);
1116}
1117
1118extern __inline __m128i
1119__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1120_mm_mask_cvttpd_epi32 (__m128i __W, __mmask8 __U, __m128d __A)
1121{
1122  return (__m128i) __builtin_ia32_cvttpd2dq128_mask ((__v2df) __A,
1123						     (__v4si) __W,
1124						     (__mmask8) __U);
1125}
1126
1127extern __inline __m128i
1128__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1129_mm_maskz_cvttpd_epi32 (__mmask8 __U, __m128d __A)
1130{
1131  return (__m128i) __builtin_ia32_cvttpd2dq128_mask ((__v2df) __A,
1132						     (__v4si)
1133						     _mm_setzero_si128 (),
1134						     (__mmask8) __U);
1135}
1136
1137extern __inline __m128i
1138__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1139_mm256_cvttpd_epu32 (__m256d __A)
1140{
1141  return (__m128i) __builtin_ia32_cvttpd2udq256_mask ((__v4df) __A,
1142						      (__v4si)
1143						      _mm_setzero_si128 (),
1144						      (__mmask8) -1);
1145}
1146
1147extern __inline __m128i
1148__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1149_mm256_mask_cvttpd_epu32 (__m128i __W, __mmask8 __U, __m256d __A)
1150{
1151  return (__m128i) __builtin_ia32_cvttpd2udq256_mask ((__v4df) __A,
1152						      (__v4si) __W,
1153						      (__mmask8) __U);
1154}
1155
1156extern __inline __m128i
1157__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1158_mm256_maskz_cvttpd_epu32 (__mmask8 __U, __m256d __A)
1159{
1160  return (__m128i) __builtin_ia32_cvttpd2udq256_mask ((__v4df) __A,
1161						      (__v4si)
1162						      _mm_setzero_si128 (),
1163						      (__mmask8) __U);
1164}
1165
1166extern __inline __m128i
1167__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1168_mm_cvttpd_epu32 (__m128d __A)
1169{
1170  return (__m128i) __builtin_ia32_cvttpd2udq128_mask ((__v2df) __A,
1171						      (__v4si)
1172						      _mm_setzero_si128 (),
1173						      (__mmask8) -1);
1174}
1175
1176extern __inline __m128i
1177__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1178_mm_mask_cvttpd_epu32 (__m128i __W, __mmask8 __U, __m128d __A)
1179{
1180  return (__m128i) __builtin_ia32_cvttpd2udq128_mask ((__v2df) __A,
1181						      (__v4si) __W,
1182						      (__mmask8) __U);
1183}
1184
1185extern __inline __m128i
1186__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1187_mm_maskz_cvttpd_epu32 (__mmask8 __U, __m128d __A)
1188{
1189  return (__m128i) __builtin_ia32_cvttpd2udq128_mask ((__v2df) __A,
1190						      (__v4si)
1191						      _mm_setzero_si128 (),
1192						      (__mmask8) __U);
1193}
1194
1195extern __inline __m128i
1196__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1197_mm256_mask_cvtpd_epi32 (__m128i __W, __mmask8 __U, __m256d __A)
1198{
1199  return (__m128i) __builtin_ia32_cvtpd2dq256_mask ((__v4df) __A,
1200						    (__v4si) __W,
1201						    (__mmask8) __U);
1202}
1203
1204extern __inline __m128i
1205__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1206_mm256_maskz_cvtpd_epi32 (__mmask8 __U, __m256d __A)
1207{
1208  return (__m128i) __builtin_ia32_cvtpd2dq256_mask ((__v4df) __A,
1209						    (__v4si)
1210						    _mm_setzero_si128 (),
1211						    (__mmask8) __U);
1212}
1213
1214extern __inline __m128i
1215__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1216_mm_mask_cvtpd_epi32 (__m128i __W, __mmask8 __U, __m128d __A)
1217{
1218  return (__m128i) __builtin_ia32_cvtpd2dq128_mask ((__v2df) __A,
1219						    (__v4si) __W,
1220						    (__mmask8) __U);
1221}
1222
1223extern __inline __m128i
1224__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1225_mm_maskz_cvtpd_epi32 (__mmask8 __U, __m128d __A)
1226{
1227  return (__m128i) __builtin_ia32_cvtpd2dq128_mask ((__v2df) __A,
1228						    (__v4si)
1229						    _mm_setzero_si128 (),
1230						    (__mmask8) __U);
1231}
1232
1233extern __inline __m256d
1234__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1235_mm256_mask_cvtepi32_pd (__m256d __W, __mmask8 __U, __m128i __A)
1236{
1237  return (__m256d) __builtin_ia32_cvtdq2pd256_mask ((__v4si) __A,
1238						    (__v4df) __W,
1239						    (__mmask8) __U);
1240}
1241
1242extern __inline __m256d
1243__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1244_mm256_maskz_cvtepi32_pd (__mmask8 __U, __m128i __A)
1245{
1246  return (__m256d) __builtin_ia32_cvtdq2pd256_mask ((__v4si) __A,
1247						    (__v4df)
1248						    _mm256_setzero_pd (),
1249						    (__mmask8) __U);
1250}
1251
1252extern __inline __m128d
1253__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1254_mm_mask_cvtepi32_pd (__m128d __W, __mmask8 __U, __m128i __A)
1255{
1256  return (__m128d) __builtin_ia32_cvtdq2pd128_mask ((__v4si) __A,
1257						    (__v2df) __W,
1258						    (__mmask8) __U);
1259}
1260
1261extern __inline __m128d
1262__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1263_mm_maskz_cvtepi32_pd (__mmask8 __U, __m128i __A)
1264{
1265  return (__m128d) __builtin_ia32_cvtdq2pd128_mask ((__v4si) __A,
1266						    (__v2df)
1267						    _mm_setzero_pd (),
1268						    (__mmask8) __U);
1269}
1270
1271extern __inline __m256d
1272__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1273_mm256_cvtepu32_pd (__m128i __A)
1274{
1275  return (__m256d) __builtin_ia32_cvtudq2pd256_mask ((__v4si) __A,
1276						     (__v4df)
1277						     _mm256_setzero_pd (),
1278						     (__mmask8) -1);
1279}
1280
1281extern __inline __m256d
1282__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1283_mm256_mask_cvtepu32_pd (__m256d __W, __mmask8 __U, __m128i __A)
1284{
1285  return (__m256d) __builtin_ia32_cvtudq2pd256_mask ((__v4si) __A,
1286						     (__v4df) __W,
1287						     (__mmask8) __U);
1288}
1289
1290extern __inline __m256d
1291__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1292_mm256_maskz_cvtepu32_pd (__mmask8 __U, __m128i __A)
1293{
1294  return (__m256d) __builtin_ia32_cvtudq2pd256_mask ((__v4si) __A,
1295						     (__v4df)
1296						     _mm256_setzero_pd (),
1297						     (__mmask8) __U);
1298}
1299
1300extern __inline __m128d
1301__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1302_mm_cvtepu32_pd (__m128i __A)
1303{
1304  return (__m128d) __builtin_ia32_cvtudq2pd128_mask ((__v4si) __A,
1305						     (__v2df)
1306						     _mm_setzero_pd (),
1307						     (__mmask8) -1);
1308}
1309
1310extern __inline __m128d
1311__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1312_mm_mask_cvtepu32_pd (__m128d __W, __mmask8 __U, __m128i __A)
1313{
1314  return (__m128d) __builtin_ia32_cvtudq2pd128_mask ((__v4si) __A,
1315						     (__v2df) __W,
1316						     (__mmask8) __U);
1317}
1318
1319extern __inline __m128d
1320__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1321_mm_maskz_cvtepu32_pd (__mmask8 __U, __m128i __A)
1322{
1323  return (__m128d) __builtin_ia32_cvtudq2pd128_mask ((__v4si) __A,
1324						     (__v2df)
1325						     _mm_setzero_pd (),
1326						     (__mmask8) __U);
1327}
1328
1329extern __inline __m256
1330__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1331_mm256_mask_cvtepi32_ps (__m256 __W, __mmask8 __U, __m256i __A)
1332{
1333  return (__m256) __builtin_ia32_cvtdq2ps256_mask ((__v8si) __A,
1334						   (__v8sf) __W,
1335						   (__mmask8) __U);
1336}
1337
1338extern __inline __m256
1339__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1340_mm256_maskz_cvtepi32_ps (__mmask16 __U, __m256i __A)
1341{
1342  return (__m256) __builtin_ia32_cvtdq2ps256_mask ((__v8si) __A,
1343						   (__v8sf)
1344						   _mm256_setzero_ps (),
1345						   (__mmask8) __U);
1346}
1347
1348extern __inline __m128
1349__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1350_mm_mask_cvtepi32_ps (__m128 __W, __mmask8 __U, __m128i __A)
1351{
1352  return (__m128) __builtin_ia32_cvtdq2ps128_mask ((__v4si) __A,
1353						   (__v4sf) __W,
1354						   (__mmask8) __U);
1355}
1356
1357extern __inline __m128
1358__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1359_mm_maskz_cvtepi32_ps (__mmask16 __U, __m128i __A)
1360{
1361  return (__m128) __builtin_ia32_cvtdq2ps128_mask ((__v4si) __A,
1362						   (__v4sf)
1363						   _mm_setzero_ps (),
1364						   (__mmask8) __U);
1365}
1366
1367extern __inline __m256
1368__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1369_mm256_cvtepu32_ps (__m256i __A)
1370{
1371  return (__m256) __builtin_ia32_cvtudq2ps256_mask ((__v8si) __A,
1372						    (__v8sf)
1373						    _mm256_setzero_ps (),
1374						    (__mmask8) -1);
1375}
1376
1377extern __inline __m256
1378__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1379_mm256_mask_cvtepu32_ps (__m256 __W, __mmask8 __U, __m256i __A)
1380{
1381  return (__m256) __builtin_ia32_cvtudq2ps256_mask ((__v8si) __A,
1382						    (__v8sf) __W,
1383						    (__mmask8) __U);
1384}
1385
1386extern __inline __m256
1387__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1388_mm256_maskz_cvtepu32_ps (__mmask8 __U, __m256i __A)
1389{
1390  return (__m256) __builtin_ia32_cvtudq2ps256_mask ((__v8si) __A,
1391						    (__v8sf)
1392						    _mm256_setzero_ps (),
1393						    (__mmask8) __U);
1394}
1395
1396extern __inline __m128
1397__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1398_mm_cvtepu32_ps (__m128i __A)
1399{
1400  return (__m128) __builtin_ia32_cvtudq2ps128_mask ((__v4si) __A,
1401						    (__v4sf)
1402						    _mm_setzero_ps (),
1403						    (__mmask8) -1);
1404}
1405
1406extern __inline __m128
1407__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1408_mm_mask_cvtepu32_ps (__m128 __W, __mmask8 __U, __m128i __A)
1409{
1410  return (__m128) __builtin_ia32_cvtudq2ps128_mask ((__v4si) __A,
1411						    (__v4sf) __W,
1412						    (__mmask8) __U);
1413}
1414
1415extern __inline __m128
1416__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1417_mm_maskz_cvtepu32_ps (__mmask8 __U, __m128i __A)
1418{
1419  return (__m128) __builtin_ia32_cvtudq2ps128_mask ((__v4si) __A,
1420						    (__v4sf)
1421						    _mm_setzero_ps (),
1422						    (__mmask8) __U);
1423}
1424
1425extern __inline __m256d
1426__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1427_mm256_mask_cvtps_pd (__m256d __W, __mmask8 __U, __m128 __A)
1428{
1429  return (__m256d) __builtin_ia32_cvtps2pd256_mask ((__v4sf) __A,
1430						    (__v4df) __W,
1431						    (__mmask8) __U);
1432}
1433
1434extern __inline __m256d
1435__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1436_mm256_maskz_cvtps_pd (__mmask8 __U, __m128 __A)
1437{
1438  return (__m256d) __builtin_ia32_cvtps2pd256_mask ((__v4sf) __A,
1439						    (__v4df)
1440						    _mm256_setzero_pd (),
1441						    (__mmask8) __U);
1442}
1443
1444extern __inline __m128d
1445__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1446_mm_mask_cvtps_pd (__m128d __W, __mmask8 __U, __m128 __A)
1447{
1448  return (__m128d) __builtin_ia32_cvtps2pd128_mask ((__v4sf) __A,
1449						    (__v2df) __W,
1450						    (__mmask8) __U);
1451}
1452
1453extern __inline __m128d
1454__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1455_mm_maskz_cvtps_pd (__mmask8 __U, __m128 __A)
1456{
1457  return (__m128d) __builtin_ia32_cvtps2pd128_mask ((__v4sf) __A,
1458						    (__v2df)
1459						    _mm_setzero_pd (),
1460						    (__mmask8) __U);
1461}
1462
1463extern __inline __m128i
1464__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1465_mm_cvtepi32_epi8 (__m128i __A)
1466{
1467  return (__m128i) __builtin_ia32_pmovdb128_mask ((__v4si) __A,
1468						  (__v16qi)_mm_undefined_si128(),
1469						  (__mmask8) -1);
1470}
1471
1472extern __inline void
1473__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1474_mm_mask_cvtepi32_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A)
1475{
1476  __builtin_ia32_pmovdb128mem_mask ((__v16qi *) __P, (__v4si) __A, __M);
1477}
1478
1479extern __inline __m128i
1480__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1481_mm_mask_cvtepi32_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
1482{
1483  return (__m128i) __builtin_ia32_pmovdb128_mask ((__v4si) __A,
1484						  (__v16qi) __O, __M);
1485}
1486
1487extern __inline __m128i
1488__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1489_mm_maskz_cvtepi32_epi8 (__mmask8 __M, __m128i __A)
1490{
1491  return (__m128i) __builtin_ia32_pmovdb128_mask ((__v4si) __A,
1492						  (__v16qi)
1493						  _mm_setzero_si128 (),
1494						  __M);
1495}
1496
1497extern __inline __m128i
1498__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1499_mm256_cvtepi32_epi8 (__m256i __A)
1500{
1501  return (__m128i) __builtin_ia32_pmovdb256_mask ((__v8si) __A,
1502						  (__v16qi)_mm_undefined_si128(),
1503						  (__mmask8) -1);
1504}
1505
1506extern __inline __m128i
1507__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1508_mm256_mask_cvtepi32_epi8 (__m128i __O, __mmask8 __M, __m256i __A)
1509{
1510  return (__m128i) __builtin_ia32_pmovdb256_mask ((__v8si) __A,
1511						  (__v16qi) __O, __M);
1512}
1513
1514extern __inline void
1515__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1516_mm256_mask_cvtepi32_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A)
1517{
1518  __builtin_ia32_pmovdb256mem_mask ((__v16qi *) __P, (__v8si) __A, __M);
1519}
1520
1521extern __inline __m128i
1522__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1523_mm256_maskz_cvtepi32_epi8 (__mmask8 __M, __m256i __A)
1524{
1525  return (__m128i) __builtin_ia32_pmovdb256_mask ((__v8si) __A,
1526						  (__v16qi)
1527						  _mm_setzero_si128 (),
1528						  __M);
1529}
1530
1531extern __inline __m128i
1532__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1533_mm_cvtsepi32_epi8 (__m128i __A)
1534{
1535  return (__m128i) __builtin_ia32_pmovsdb128_mask ((__v4si) __A,
1536						   (__v16qi)_mm_undefined_si128(),
1537						   (__mmask8) -1);
1538}
1539
1540extern __inline void
1541__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1542_mm_mask_cvtsepi32_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A)
1543{
1544  __builtin_ia32_pmovsdb128mem_mask ((__v16qi *) __P, (__v4si) __A, __M);
1545}
1546
1547extern __inline __m128i
1548__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1549_mm_mask_cvtsepi32_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
1550{
1551  return (__m128i) __builtin_ia32_pmovsdb128_mask ((__v4si) __A,
1552						   (__v16qi) __O, __M);
1553}
1554
1555extern __inline __m128i
1556__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1557_mm_maskz_cvtsepi32_epi8 (__mmask8 __M, __m128i __A)
1558{
1559  return (__m128i) __builtin_ia32_pmovsdb128_mask ((__v4si) __A,
1560						   (__v16qi)
1561						   _mm_setzero_si128 (),
1562						   __M);
1563}
1564
1565extern __inline __m128i
1566__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1567_mm256_cvtsepi32_epi8 (__m256i __A)
1568{
1569  return (__m128i) __builtin_ia32_pmovsdb256_mask ((__v8si) __A,
1570						   (__v16qi)_mm_undefined_si128(),
1571						   (__mmask8) -1);
1572}
1573
1574extern __inline void
1575__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1576_mm256_mask_cvtsepi32_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A)
1577{
1578  __builtin_ia32_pmovsdb256mem_mask ((__v16qi *) __P, (__v8si) __A, __M);
1579}
1580
1581extern __inline __m128i
1582__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1583_mm256_mask_cvtsepi32_epi8 (__m128i __O, __mmask8 __M, __m256i __A)
1584{
1585  return (__m128i) __builtin_ia32_pmovsdb256_mask ((__v8si) __A,
1586						   (__v16qi) __O, __M);
1587}
1588
1589extern __inline __m128i
1590__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1591_mm256_maskz_cvtsepi32_epi8 (__mmask8 __M, __m256i __A)
1592{
1593  return (__m128i) __builtin_ia32_pmovsdb256_mask ((__v8si) __A,
1594						   (__v16qi)
1595						   _mm_setzero_si128 (),
1596						   __M);
1597}
1598
1599extern __inline __m128i
1600__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1601_mm_cvtusepi32_epi8 (__m128i __A)
1602{
1603  return (__m128i) __builtin_ia32_pmovusdb128_mask ((__v4si) __A,
1604						    (__v16qi)_mm_undefined_si128(),
1605						    (__mmask8) -1);
1606}
1607
1608extern __inline void
1609__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1610_mm_mask_cvtusepi32_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A)
1611{
1612  __builtin_ia32_pmovusdb128mem_mask ((__v16qi *) __P, (__v4si) __A, __M);
1613}
1614
1615extern __inline __m128i
1616__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1617_mm_mask_cvtusepi32_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
1618{
1619  return (__m128i) __builtin_ia32_pmovusdb128_mask ((__v4si) __A,
1620						    (__v16qi) __O,
1621						    __M);
1622}
1623
1624extern __inline __m128i
1625__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1626_mm_maskz_cvtusepi32_epi8 (__mmask8 __M, __m128i __A)
1627{
1628  return (__m128i) __builtin_ia32_pmovusdb128_mask ((__v4si) __A,
1629						    (__v16qi)
1630						    _mm_setzero_si128 (),
1631						    __M);
1632}
1633
1634extern __inline __m128i
1635__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1636_mm256_cvtusepi32_epi8 (__m256i __A)
1637{
1638  return (__m128i) __builtin_ia32_pmovusdb256_mask ((__v8si) __A,
1639						    (__v16qi)_mm_undefined_si128(),
1640						    (__mmask8) -1);
1641}
1642
1643extern __inline void
1644__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1645_mm256_mask_cvtusepi32_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A)
1646{
1647  __builtin_ia32_pmovusdb256mem_mask ((__v16qi*) __P, (__v8si) __A, __M);
1648}
1649
1650extern __inline __m128i
1651__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1652_mm256_mask_cvtusepi32_epi8 (__m128i __O, __mmask8 __M, __m256i __A)
1653{
1654  return (__m128i) __builtin_ia32_pmovusdb256_mask ((__v8si) __A,
1655						    (__v16qi) __O,
1656						    __M);
1657}
1658
1659extern __inline __m128i
1660__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1661_mm256_maskz_cvtusepi32_epi8 (__mmask8 __M, __m256i __A)
1662{
1663  return (__m128i) __builtin_ia32_pmovusdb256_mask ((__v8si) __A,
1664						    (__v16qi)
1665						    _mm_setzero_si128 (),
1666						    __M);
1667}
1668
1669extern __inline __m128i
1670__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1671_mm_cvtepi32_epi16 (__m128i __A)
1672{
1673  return (__m128i) __builtin_ia32_pmovdw128_mask ((__v4si) __A,
1674						  (__v8hi) _mm_setzero_si128 (),
1675						  (__mmask8) -1);
1676}
1677
1678extern __inline void
1679__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1680_mm_mask_cvtepi32_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A)
1681{
1682  __builtin_ia32_pmovdw128mem_mask ((__v8hi *) __P, (__v4si) __A, __M);
1683}
1684
1685extern __inline __m128i
1686__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1687_mm_mask_cvtepi32_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
1688{
1689  return (__m128i) __builtin_ia32_pmovdw128_mask ((__v4si) __A,
1690						  (__v8hi) __O, __M);
1691}
1692
1693extern __inline __m128i
1694__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1695_mm_maskz_cvtepi32_epi16 (__mmask8 __M, __m128i __A)
1696{
1697  return (__m128i) __builtin_ia32_pmovdw128_mask ((__v4si) __A,
1698						  (__v8hi)
1699						  _mm_setzero_si128 (),
1700						  __M);
1701}
1702
1703extern __inline __m128i
1704__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1705_mm256_cvtepi32_epi16 (__m256i __A)
1706{
1707  return (__m128i) __builtin_ia32_pmovdw256_mask ((__v8si) __A,
1708						  (__v8hi)_mm_setzero_si128 (),
1709						  (__mmask8) -1);
1710}
1711
1712extern __inline void
1713__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1714_mm256_mask_cvtepi32_storeu_epi16 (void *  __P, __mmask8 __M, __m256i __A)
1715{
1716  __builtin_ia32_pmovdw256mem_mask ((__v8hi *) __P, (__v8si) __A, __M);
1717}
1718
1719extern __inline __m128i
1720__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1721_mm256_mask_cvtepi32_epi16 (__m128i __O, __mmask8 __M, __m256i __A)
1722{
1723  return (__m128i) __builtin_ia32_pmovdw256_mask ((__v8si) __A,
1724						  (__v8hi) __O, __M);
1725}
1726
1727extern __inline __m128i
1728__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1729_mm256_maskz_cvtepi32_epi16 (__mmask8 __M, __m256i __A)
1730{
1731  return (__m128i) __builtin_ia32_pmovdw256_mask ((__v8si) __A,
1732						  (__v8hi)
1733						  _mm_setzero_si128 (),
1734						  __M);
1735}
1736
1737extern __inline __m128i
1738__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1739_mm_cvtsepi32_epi16 (__m128i __A)
1740{
1741  return (__m128i) __builtin_ia32_pmovsdw128_mask ((__v4si) __A,
1742						   (__v8hi)_mm_setzero_si128 (),
1743						   (__mmask8) -1);
1744}
1745
1746extern __inline void
1747__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1748_mm_mask_cvtsepi32_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A)
1749{
1750  __builtin_ia32_pmovsdw128mem_mask ((__v8hi *) __P, (__v4si) __A, __M);
1751}
1752
1753extern __inline __m128i
1754__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1755_mm_mask_cvtsepi32_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
1756{
1757  return (__m128i) __builtin_ia32_pmovsdw128_mask ((__v4si) __A,
1758						   (__v8hi)__O,
1759						   __M);
1760}
1761
1762extern __inline __m128i
1763__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1764_mm_maskz_cvtsepi32_epi16 (__mmask8 __M, __m128i __A)
1765{
1766  return (__m128i) __builtin_ia32_pmovsdw128_mask ((__v4si) __A,
1767						   (__v8hi)
1768						   _mm_setzero_si128 (),
1769						   __M);
1770}
1771
1772extern __inline __m128i
1773__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1774_mm256_cvtsepi32_epi16 (__m256i __A)
1775{
1776  return (__m128i) __builtin_ia32_pmovsdw256_mask ((__v8si) __A,
1777						   (__v8hi)_mm_undefined_si128(),
1778						   (__mmask8) -1);
1779}
1780
1781extern __inline void
1782__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1783_mm256_mask_cvtsepi32_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A)
1784{
1785  __builtin_ia32_pmovsdw256mem_mask ((__v8hi *) __P, (__v8si) __A, __M);
1786}
1787
1788extern __inline __m128i
1789__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1790_mm256_mask_cvtsepi32_epi16 (__m128i __O, __mmask8 __M, __m256i __A)
1791{
1792  return (__m128i) __builtin_ia32_pmovsdw256_mask ((__v8si) __A,
1793						   (__v8hi) __O, __M);
1794}
1795
1796extern __inline __m128i
1797__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1798_mm256_maskz_cvtsepi32_epi16 (__mmask8 __M, __m256i __A)
1799{
1800  return (__m128i) __builtin_ia32_pmovsdw256_mask ((__v8si) __A,
1801						   (__v8hi)
1802						   _mm_setzero_si128 (),
1803						   __M);
1804}
1805
1806extern __inline __m128i
1807__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1808_mm_cvtusepi32_epi16 (__m128i __A)
1809{
1810  return (__m128i) __builtin_ia32_pmovusdw128_mask ((__v4si) __A,
1811						    (__v8hi)_mm_undefined_si128(),
1812						    (__mmask8) -1);
1813}
1814
1815extern __inline void
1816__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1817_mm_mask_cvtusepi32_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A)
1818{
1819  __builtin_ia32_pmovusdw128mem_mask ((__v8hi *) __P, (__v4si) __A, __M);
1820}
1821
1822extern __inline __m128i
1823__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1824_mm_mask_cvtusepi32_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
1825{
1826  return (__m128i) __builtin_ia32_pmovusdw128_mask ((__v4si) __A,
1827						    (__v8hi) __O, __M);
1828}
1829
1830extern __inline __m128i
1831__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1832_mm_maskz_cvtusepi32_epi16 (__mmask8 __M, __m128i __A)
1833{
1834  return (__m128i) __builtin_ia32_pmovusdw128_mask ((__v4si) __A,
1835						    (__v8hi)
1836						    _mm_setzero_si128 (),
1837						    __M);
1838}
1839
1840extern __inline __m128i
1841__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1842_mm256_cvtusepi32_epi16 (__m256i __A)
1843{
1844  return (__m128i) __builtin_ia32_pmovusdw256_mask ((__v8si) __A,
1845						    (__v8hi)_mm_undefined_si128(),
1846						    (__mmask8) -1);
1847}
1848
1849extern __inline void
1850__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1851_mm256_mask_cvtusepi32_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A)
1852{
1853  __builtin_ia32_pmovusdw256mem_mask ((__v8hi *) __P, (__v8si) __A, __M);
1854}
1855
1856extern __inline __m128i
1857__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1858_mm256_mask_cvtusepi32_epi16 (__m128i __O, __mmask8 __M, __m256i __A)
1859{
1860  return (__m128i) __builtin_ia32_pmovusdw256_mask ((__v8si) __A,
1861						    (__v8hi) __O, __M);
1862}
1863
1864extern __inline __m128i
1865__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1866_mm256_maskz_cvtusepi32_epi16 (__mmask8 __M, __m256i __A)
1867{
1868  return (__m128i) __builtin_ia32_pmovusdw256_mask ((__v8si) __A,
1869						    (__v8hi)
1870						    _mm_setzero_si128 (),
1871						    __M);
1872}
1873
1874extern __inline __m128i
1875__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1876_mm_cvtepi64_epi8 (__m128i __A)
1877{
1878  return (__m128i) __builtin_ia32_pmovqb128_mask ((__v2di) __A,
1879						  (__v16qi)_mm_undefined_si128(),
1880						  (__mmask8) -1);
1881}
1882
1883extern __inline void
1884__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1885_mm_mask_cvtepi64_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A)
1886{
1887  __builtin_ia32_pmovqb128mem_mask ((__v16qi *) __P, (__v2di) __A, __M);
1888}
1889
1890extern __inline __m128i
1891__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1892_mm_mask_cvtepi64_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
1893{
1894  return (__m128i) __builtin_ia32_pmovqb128_mask ((__v2di) __A,
1895						  (__v16qi) __O, __M);
1896}
1897
1898extern __inline __m128i
1899__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1900_mm_maskz_cvtepi64_epi8 (__mmask8 __M, __m128i __A)
1901{
1902  return (__m128i) __builtin_ia32_pmovqb128_mask ((__v2di) __A,
1903						  (__v16qi)
1904						  _mm_setzero_si128 (),
1905						  __M);
1906}
1907
1908extern __inline __m128i
1909__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1910_mm256_cvtepi64_epi8 (__m256i __A)
1911{
1912  return (__m128i) __builtin_ia32_pmovqb256_mask ((__v4di) __A,
1913						  (__v16qi)_mm_undefined_si128(),
1914						  (__mmask8) -1);
1915}
1916
1917extern __inline void
1918__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1919_mm256_mask_cvtepi64_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A)
1920{
1921  __builtin_ia32_pmovqb256mem_mask ((__v16qi *) __P, (__v4di) __A, __M);
1922}
1923
1924extern __inline __m128i
1925__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1926_mm256_mask_cvtepi64_epi8 (__m128i __O, __mmask8 __M, __m256i __A)
1927{
1928  return (__m128i) __builtin_ia32_pmovqb256_mask ((__v4di) __A,
1929						  (__v16qi) __O, __M);
1930}
1931
1932extern __inline __m128i
1933__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1934_mm256_maskz_cvtepi64_epi8 (__mmask8 __M, __m256i __A)
1935{
1936  return (__m128i) __builtin_ia32_pmovqb256_mask ((__v4di) __A,
1937						  (__v16qi)
1938						  _mm_setzero_si128 (),
1939						  __M);
1940}
1941
1942extern __inline __m128i
1943__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1944_mm_cvtsepi64_epi8 (__m128i __A)
1945{
1946  return (__m128i) __builtin_ia32_pmovsqb128_mask ((__v2di) __A,
1947						   (__v16qi)_mm_undefined_si128(),
1948						   (__mmask8) -1);
1949}
1950
1951extern __inline void
1952__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1953_mm_mask_cvtsepi64_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A)
1954{
1955  __builtin_ia32_pmovsqb128mem_mask ((__v16qi *) __P, (__v2di) __A, __M);
1956}
1957
1958extern __inline __m128i
1959__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1960_mm_mask_cvtsepi64_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
1961{
1962  return (__m128i) __builtin_ia32_pmovsqb128_mask ((__v2di) __A,
1963						   (__v16qi) __O, __M);
1964}
1965
1966extern __inline __m128i
1967__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1968_mm_maskz_cvtsepi64_epi8 (__mmask8 __M, __m128i __A)
1969{
1970  return (__m128i) __builtin_ia32_pmovsqb128_mask ((__v2di) __A,
1971						   (__v16qi)
1972						   _mm_setzero_si128 (),
1973						   __M);
1974}
1975
1976extern __inline __m128i
1977__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1978_mm256_cvtsepi64_epi8 (__m256i __A)
1979{
1980  return (__m128i) __builtin_ia32_pmovsqb256_mask ((__v4di) __A,
1981						   (__v16qi)_mm_undefined_si128(),
1982						   (__mmask8) -1);
1983}
1984
1985extern __inline void
1986__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1987_mm256_mask_cvtsepi64_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A)
1988{
1989  __builtin_ia32_pmovsqb256mem_mask ((__v16qi *) __P, (__v4di) __A, __M);
1990}
1991
1992extern __inline __m128i
1993__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1994_mm256_mask_cvtsepi64_epi8 (__m128i __O, __mmask8 __M, __m256i __A)
1995{
1996  return (__m128i) __builtin_ia32_pmovsqb256_mask ((__v4di) __A,
1997						   (__v16qi) __O, __M);
1998}
1999
2000extern __inline __m128i
2001__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2002_mm256_maskz_cvtsepi64_epi8 (__mmask8 __M, __m256i __A)
2003{
2004  return (__m128i) __builtin_ia32_pmovsqb256_mask ((__v4di) __A,
2005						   (__v16qi)
2006						   _mm_setzero_si128 (),
2007						   __M);
2008}
2009
2010extern __inline __m128i
2011__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2012_mm_cvtusepi64_epi8 (__m128i __A)
2013{
2014  return (__m128i) __builtin_ia32_pmovusqb128_mask ((__v2di) __A,
2015						    (__v16qi)_mm_undefined_si128(),
2016						    (__mmask8) -1);
2017}
2018
2019extern __inline void
2020__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2021_mm_mask_cvtusepi64_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A)
2022{
2023  __builtin_ia32_pmovusqb128mem_mask ((__v16qi *) __P, (__v2di) __A, __M);
2024}
2025
2026extern __inline __m128i
2027__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2028_mm_mask_cvtusepi64_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
2029{
2030  return (__m128i) __builtin_ia32_pmovusqb128_mask ((__v2di) __A,
2031						    (__v16qi) __O,
2032						    __M);
2033}
2034
2035extern __inline __m128i
2036__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2037_mm_maskz_cvtusepi64_epi8 (__mmask8 __M, __m128i __A)
2038{
2039  return (__m128i) __builtin_ia32_pmovusqb128_mask ((__v2di) __A,
2040						    (__v16qi)
2041						    _mm_setzero_si128 (),
2042						    __M);
2043}
2044
2045extern __inline __m128i
2046__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2047_mm256_cvtusepi64_epi8 (__m256i __A)
2048{
2049  return (__m128i) __builtin_ia32_pmovusqb256_mask ((__v4di) __A,
2050						    (__v16qi)_mm_undefined_si128(),
2051						    (__mmask8) -1);
2052}
2053
2054extern __inline void
2055__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2056_mm256_mask_cvtusepi64_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A)
2057{
2058  __builtin_ia32_pmovusqb256mem_mask ((__v16qi *) __P, (__v4di) __A, __M);
2059}
2060
2061extern __inline __m128i
2062__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2063_mm256_mask_cvtusepi64_epi8 (__m128i __O, __mmask8 __M, __m256i __A)
2064{
2065  return (__m128i) __builtin_ia32_pmovusqb256_mask ((__v4di) __A,
2066						    (__v16qi) __O,
2067						    __M);
2068}
2069
2070extern __inline __m128i
2071__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2072_mm256_maskz_cvtusepi64_epi8 (__mmask8 __M, __m256i __A)
2073{
2074  return (__m128i) __builtin_ia32_pmovusqb256_mask ((__v4di) __A,
2075						    (__v16qi)
2076						    _mm_setzero_si128 (),
2077						    __M);
2078}
2079
2080extern __inline __m128i
2081__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2082_mm_cvtepi64_epi16 (__m128i __A)
2083{
2084  return (__m128i) __builtin_ia32_pmovqw128_mask ((__v2di) __A,
2085						  (__v8hi)_mm_undefined_si128(),
2086						  (__mmask8) -1);
2087}
2088
2089extern __inline void
2090__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2091_mm_mask_cvtepi64_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A)
2092{
2093  __builtin_ia32_pmovqw128mem_mask ((__v8hi *) __P, (__v2di) __A, __M);
2094}
2095
2096extern __inline __m128i
2097__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2098_mm_mask_cvtepi64_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
2099{
2100  return (__m128i) __builtin_ia32_pmovqw128_mask ((__v2di) __A,
2101						  (__v8hi)__O,
2102						  __M);
2103}
2104
2105extern __inline __m128i
2106__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2107_mm_maskz_cvtepi64_epi16 (__mmask8 __M, __m128i __A)
2108{
2109  return (__m128i) __builtin_ia32_pmovqw128_mask ((__v2di) __A,
2110						  (__v8hi)
2111						  _mm_setzero_si128 (),
2112						  __M);
2113}
2114
2115extern __inline __m128i
2116__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2117_mm256_cvtepi64_epi16 (__m256i __A)
2118{
2119  return (__m128i) __builtin_ia32_pmovqw256_mask ((__v4di) __A,
2120						  (__v8hi)_mm_undefined_si128(),
2121						  (__mmask8) -1);
2122}
2123
2124extern __inline void
2125__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2126_mm256_mask_cvtepi64_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A)
2127{
2128  __builtin_ia32_pmovqw256mem_mask ((__v8hi *) __P, (__v4di) __A, __M);
2129}
2130
2131extern __inline __m128i
2132__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2133_mm256_mask_cvtepi64_epi16 (__m128i __O, __mmask8 __M, __m256i __A)
2134{
2135  return (__m128i) __builtin_ia32_pmovqw256_mask ((__v4di) __A,
2136						  (__v8hi) __O, __M);
2137}
2138
2139extern __inline __m128i
2140__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2141_mm256_maskz_cvtepi64_epi16 (__mmask8 __M, __m256i __A)
2142{
2143  return (__m128i) __builtin_ia32_pmovqw256_mask ((__v4di) __A,
2144						  (__v8hi)
2145						  _mm_setzero_si128 (),
2146						  __M);
2147}
2148
2149extern __inline __m128i
2150__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2151_mm_cvtsepi64_epi16 (__m128i __A)
2152{
2153  return (__m128i) __builtin_ia32_pmovsqw128_mask ((__v2di) __A,
2154						   (__v8hi)_mm_undefined_si128(),
2155						   (__mmask8) -1);
2156}
2157
2158extern __inline void
2159__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2160_mm_mask_cvtsepi64_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A)
2161{
2162  __builtin_ia32_pmovsqw128mem_mask ((__v8hi *) __P, (__v2di) __A, __M);
2163}
2164
2165extern __inline __m128i
2166__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2167_mm_mask_cvtsepi64_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
2168{
2169  return (__m128i) __builtin_ia32_pmovsqw128_mask ((__v2di) __A,
2170						   (__v8hi) __O, __M);
2171}
2172
2173extern __inline __m128i
2174__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2175_mm_maskz_cvtsepi64_epi16 (__mmask8 __M, __m128i __A)
2176{
2177  return (__m128i) __builtin_ia32_pmovsqw128_mask ((__v2di) __A,
2178						   (__v8hi)
2179						   _mm_setzero_si128 (),
2180						   __M);
2181}
2182
2183extern __inline __m128i
2184__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2185_mm256_cvtsepi64_epi16 (__m256i __A)
2186{
2187  return (__m128i) __builtin_ia32_pmovsqw256_mask ((__v4di) __A,
2188						   (__v8hi)_mm_undefined_si128(),
2189						   (__mmask8) -1);
2190}
2191
2192extern __inline void
2193__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2194_mm256_mask_cvtsepi64_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A)
2195{
2196  __builtin_ia32_pmovsqw256mem_mask ((__v8hi *) __P, (__v4di) __A, __M);
2197}
2198
2199extern __inline __m128i
2200__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2201_mm256_mask_cvtsepi64_epi16 (__m128i __O, __mmask8 __M, __m256i __A)
2202{
2203  return (__m128i) __builtin_ia32_pmovsqw256_mask ((__v4di) __A,
2204						   (__v8hi) __O, __M);
2205}
2206
2207extern __inline __m128i
2208__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2209_mm256_maskz_cvtsepi64_epi16 (__mmask8 __M, __m256i __A)
2210{
2211  return (__m128i) __builtin_ia32_pmovsqw256_mask ((__v4di) __A,
2212						   (__v8hi)
2213						   _mm_setzero_si128 (),
2214						   __M);
2215}
2216
2217extern __inline __m128i
2218__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2219_mm_cvtusepi64_epi16 (__m128i __A)
2220{
2221  return (__m128i) __builtin_ia32_pmovusqw128_mask ((__v2di) __A,
2222						    (__v8hi)_mm_undefined_si128(),
2223						    (__mmask8) -1);
2224}
2225
2226extern __inline void
2227__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2228_mm_mask_cvtusepi64_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A)
2229{
2230  __builtin_ia32_pmovusqw128mem_mask ((__v8hi *) __P, (__v2di) __A, __M);
2231}
2232
2233extern __inline __m128i
2234__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2235_mm_mask_cvtusepi64_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
2236{
2237  return (__m128i) __builtin_ia32_pmovusqw128_mask ((__v2di) __A,
2238						    (__v8hi) __O, __M);
2239}
2240
2241extern __inline __m128i
2242__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2243_mm_maskz_cvtusepi64_epi16 (__mmask8 __M, __m128i __A)
2244{
2245  return (__m128i) __builtin_ia32_pmovusqw128_mask ((__v2di) __A,
2246						    (__v8hi)
2247						    _mm_setzero_si128 (),
2248						    __M);
2249}
2250
2251extern __inline __m128i
2252__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2253_mm256_cvtusepi64_epi16 (__m256i __A)
2254{
2255  return (__m128i) __builtin_ia32_pmovusqw256_mask ((__v4di) __A,
2256						    (__v8hi)_mm_undefined_si128(),
2257						    (__mmask8) -1);
2258}
2259
2260extern __inline void
2261__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2262_mm256_mask_cvtusepi64_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A)
2263{
2264  __builtin_ia32_pmovusqw256mem_mask ((__v8hi *) __P, (__v4di) __A, __M);
2265}
2266
2267extern __inline __m128i
2268__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2269_mm256_mask_cvtusepi64_epi16 (__m128i __O, __mmask8 __M, __m256i __A)
2270{
2271  return (__m128i) __builtin_ia32_pmovusqw256_mask ((__v4di) __A,
2272						    (__v8hi) __O, __M);
2273}
2274
2275extern __inline __m128i
2276__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2277_mm256_maskz_cvtusepi64_epi16 (__mmask8 __M, __m256i __A)
2278{
2279  return (__m128i) __builtin_ia32_pmovusqw256_mask ((__v4di) __A,
2280						    (__v8hi)
2281						    _mm_setzero_si128 (),
2282						    __M);
2283}
2284
2285extern __inline __m128i
2286__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2287_mm_cvtepi64_epi32 (__m128i __A)
2288{
2289  return (__m128i) __builtin_ia32_pmovqd128_mask ((__v2di) __A,
2290						  (__v4si)_mm_undefined_si128(),
2291						  (__mmask8) -1);
2292}
2293
2294extern __inline void
2295__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2296_mm_mask_cvtepi64_storeu_epi32 (void * __P, __mmask8 __M, __m128i __A)
2297{
2298  __builtin_ia32_pmovqd128mem_mask ((__v4si *) __P, (__v2di) __A, __M);
2299}
2300
2301extern __inline __m128i
2302__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2303_mm_mask_cvtepi64_epi32 (__m128i __O, __mmask8 __M, __m128i __A)
2304{
2305  return (__m128i) __builtin_ia32_pmovqd128_mask ((__v2di) __A,
2306						  (__v4si) __O, __M);
2307}
2308
2309extern __inline __m128i
2310__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2311_mm_maskz_cvtepi64_epi32 (__mmask8 __M, __m128i __A)
2312{
2313  return (__m128i) __builtin_ia32_pmovqd128_mask ((__v2di) __A,
2314						  (__v4si)
2315						  _mm_setzero_si128 (),
2316						  __M);
2317}
2318
2319extern __inline __m128i
2320__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2321_mm256_cvtepi64_epi32 (__m256i __A)
2322{
2323  return (__m128i) __builtin_ia32_pmovqd256_mask ((__v4di) __A,
2324						  (__v4si)_mm_undefined_si128(),
2325						  (__mmask8) -1);
2326}
2327
2328extern __inline void
2329__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2330_mm256_mask_cvtepi64_storeu_epi32 (void * __P, __mmask8 __M, __m256i __A)
2331{
2332  __builtin_ia32_pmovqd256mem_mask ((__v4si *) __P, (__v4di) __A, __M);
2333}
2334
2335extern __inline __m128i
2336__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2337_mm256_mask_cvtepi64_epi32 (__m128i __O, __mmask8 __M, __m256i __A)
2338{
2339  return (__m128i) __builtin_ia32_pmovqd256_mask ((__v4di) __A,
2340						  (__v4si) __O, __M);
2341}
2342
2343extern __inline __m128i
2344__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2345_mm256_maskz_cvtepi64_epi32 (__mmask8 __M, __m256i __A)
2346{
2347  return (__m128i) __builtin_ia32_pmovqd256_mask ((__v4di) __A,
2348						  (__v4si)
2349						  _mm_setzero_si128 (),
2350						  __M);
2351}
2352
2353extern __inline __m128i
2354__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2355_mm_cvtsepi64_epi32 (__m128i __A)
2356{
2357  return (__m128i) __builtin_ia32_pmovsqd128_mask ((__v2di) __A,
2358						   (__v4si)_mm_undefined_si128(),
2359						   (__mmask8) -1);
2360}
2361
2362extern __inline void
2363__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2364_mm_mask_cvtsepi64_storeu_epi32 (void * __P, __mmask8 __M, __m128i __A)
2365{
2366  __builtin_ia32_pmovsqd128mem_mask ((__v4si *) __P, (__v2di) __A, __M);
2367}
2368
2369extern __inline __m128i
2370__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2371_mm_mask_cvtsepi64_epi32 (__m128i __O, __mmask8 __M, __m128i __A)
2372{
2373  return (__m128i) __builtin_ia32_pmovsqd128_mask ((__v2di) __A,
2374						   (__v4si) __O, __M);
2375}
2376
2377extern __inline __m128i
2378__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2379_mm_maskz_cvtsepi64_epi32 (__mmask8 __M, __m128i __A)
2380{
2381  return (__m128i) __builtin_ia32_pmovsqd128_mask ((__v2di) __A,
2382						   (__v4si)
2383						   _mm_setzero_si128 (),
2384						   __M);
2385}
2386
2387extern __inline __m128i
2388__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2389_mm256_cvtsepi64_epi32 (__m256i __A)
2390{
2391  return (__m128i) __builtin_ia32_pmovsqd256_mask ((__v4di) __A,
2392						   (__v4si)_mm_undefined_si128(),
2393						   (__mmask8) -1);
2394}
2395
2396extern __inline void
2397__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2398_mm256_mask_cvtsepi64_storeu_epi32 (void * __P, __mmask8 __M, __m256i __A)
2399{
2400  __builtin_ia32_pmovsqd256mem_mask ((__v4si *) __P, (__v4di) __A, __M);
2401}
2402
2403extern __inline __m128i
2404__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2405_mm256_mask_cvtsepi64_epi32 (__m128i __O, __mmask8 __M, __m256i __A)
2406{
2407  return (__m128i) __builtin_ia32_pmovsqd256_mask ((__v4di) __A,
2408						   (__v4si)__O,
2409						   __M);
2410}
2411
2412extern __inline __m128i
2413__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2414_mm256_maskz_cvtsepi64_epi32 (__mmask8 __M, __m256i __A)
2415{
2416  return (__m128i) __builtin_ia32_pmovsqd256_mask ((__v4di) __A,
2417						   (__v4si)
2418						   _mm_setzero_si128 (),
2419						   __M);
2420}
2421
2422extern __inline __m128i
2423__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2424_mm_cvtusepi64_epi32 (__m128i __A)
2425{
2426  return (__m128i) __builtin_ia32_pmovusqd128_mask ((__v2di) __A,
2427						    (__v4si)_mm_undefined_si128(),
2428						    (__mmask8) -1);
2429}
2430
2431extern __inline void
2432__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2433_mm_mask_cvtusepi64_storeu_epi32 (void * __P, __mmask8 __M, __m128i __A)
2434{
2435  __builtin_ia32_pmovusqd128mem_mask ((__v4si *) __P, (__v2di) __A, __M);
2436}
2437
2438extern __inline __m128i
2439__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2440_mm_mask_cvtusepi64_epi32 (__m128i __O, __mmask8 __M, __m128i __A)
2441{
2442  return (__m128i) __builtin_ia32_pmovusqd128_mask ((__v2di) __A,
2443						    (__v4si) __O, __M);
2444}
2445
2446extern __inline __m128i
2447__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2448_mm_maskz_cvtusepi64_epi32 (__mmask8 __M, __m128i __A)
2449{
2450  return (__m128i) __builtin_ia32_pmovusqd128_mask ((__v2di) __A,
2451						    (__v4si)
2452						    _mm_setzero_si128 (),
2453						    __M);
2454}
2455
2456extern __inline __m128i
2457__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2458_mm256_cvtusepi64_epi32 (__m256i __A)
2459{
2460  return (__m128i) __builtin_ia32_pmovusqd256_mask ((__v4di) __A,
2461						    (__v4si)_mm_undefined_si128(),
2462						    (__mmask8) -1);
2463}
2464
2465extern __inline void
2466__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2467_mm256_mask_cvtusepi64_storeu_epi32 (void * __P, __mmask8 __M, __m256i __A)
2468{
2469  __builtin_ia32_pmovusqd256mem_mask ((__v4si *) __P, (__v4di) __A, __M);
2470}
2471
2472extern __inline __m128i
2473__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2474_mm256_mask_cvtusepi64_epi32 (__m128i __O, __mmask8 __M, __m256i __A)
2475{
2476  return (__m128i) __builtin_ia32_pmovusqd256_mask ((__v4di) __A,
2477						    (__v4si) __O, __M);
2478}
2479
2480extern __inline __m128i
2481__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2482_mm256_maskz_cvtusepi64_epi32 (__mmask8 __M, __m256i __A)
2483{
2484  return (__m128i) __builtin_ia32_pmovusqd256_mask ((__v4di) __A,
2485						    (__v4si)
2486						    _mm_setzero_si128 (),
2487						    __M);
2488}
2489
2490extern __inline __m256
2491__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2492_mm256_mask_broadcastss_ps (__m256 __O, __mmask8 __M, __m128 __A)
2493{
2494  return (__m256) __builtin_ia32_broadcastss256_mask ((__v4sf) __A,
2495						      (__v8sf) __O,
2496						      __M);
2497}
2498
2499extern __inline __m256
2500__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2501_mm256_maskz_broadcastss_ps (__mmask8 __M, __m128 __A)
2502{
2503  return (__m256) __builtin_ia32_broadcastss256_mask ((__v4sf) __A,
2504						      (__v8sf)
2505						      _mm256_setzero_ps (),
2506						      __M);
2507}
2508
2509extern __inline __m128
2510__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2511_mm_mask_broadcastss_ps (__m128 __O, __mmask8 __M, __m128 __A)
2512{
2513  return (__m128) __builtin_ia32_broadcastss128_mask ((__v4sf) __A,
2514						      (__v4sf) __O,
2515						      __M);
2516}
2517
2518extern __inline __m128
2519__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2520_mm_maskz_broadcastss_ps (__mmask8 __M, __m128 __A)
2521{
2522  return (__m128) __builtin_ia32_broadcastss128_mask ((__v4sf) __A,
2523						      (__v4sf)
2524						      _mm_setzero_ps (),
2525						      __M);
2526}
2527
2528extern __inline __m256d
2529__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2530_mm256_mask_broadcastsd_pd (__m256d __O, __mmask8 __M, __m128d __A)
2531{
2532  return (__m256d) __builtin_ia32_broadcastsd256_mask ((__v2df) __A,
2533						       (__v4df) __O,
2534						       __M);
2535}
2536
2537extern __inline __m256d
2538__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2539_mm256_maskz_broadcastsd_pd (__mmask8 __M, __m128d __A)
2540{
2541  return (__m256d) __builtin_ia32_broadcastsd256_mask ((__v2df) __A,
2542						       (__v4df)
2543						       _mm256_setzero_pd (),
2544						       __M);
2545}
2546
2547extern __inline __m256i
2548__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2549_mm256_mask_broadcastd_epi32 (__m256i __O, __mmask8 __M, __m128i __A)
2550{
2551  return (__m256i) __builtin_ia32_pbroadcastd256_mask ((__v4si) __A,
2552						       (__v8si) __O,
2553						       __M);
2554}
2555
2556extern __inline __m256i
2557__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2558_mm256_maskz_broadcastd_epi32 (__mmask8 __M, __m128i __A)
2559{
2560  return (__m256i) __builtin_ia32_pbroadcastd256_mask ((__v4si) __A,
2561						       (__v8si)
2562						       _mm256_setzero_si256 (),
2563						       __M);
2564}
2565
2566extern __inline __m256i
2567__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2568_mm256_mask_set1_epi32 (__m256i __O, __mmask8 __M, int __A)
2569{
2570  return (__m256i) __builtin_ia32_pbroadcastd256_gpr_mask (__A, (__v8si) __O,
2571							   __M);
2572}
2573
2574extern __inline __m256i
2575__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2576_mm256_maskz_set1_epi32 (__mmask8 __M, int __A)
2577{
2578  return (__m256i) __builtin_ia32_pbroadcastd256_gpr_mask (__A,
2579							   (__v8si)
2580							   _mm256_setzero_si256 (),
2581							   __M);
2582}
2583
2584extern __inline __m128i
2585__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2586_mm_mask_broadcastd_epi32 (__m128i __O, __mmask8 __M, __m128i __A)
2587{
2588  return (__m128i) __builtin_ia32_pbroadcastd128_mask ((__v4si) __A,
2589						       (__v4si) __O,
2590						       __M);
2591}
2592
2593extern __inline __m128i
2594__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2595_mm_maskz_broadcastd_epi32 (__mmask8 __M, __m128i __A)
2596{
2597  return (__m128i) __builtin_ia32_pbroadcastd128_mask ((__v4si) __A,
2598						       (__v4si)
2599						       _mm_setzero_si128 (),
2600						       __M);
2601}
2602
2603extern __inline __m128i
2604__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2605_mm_mask_set1_epi32 (__m128i __O, __mmask8 __M, int __A)
2606{
2607  return (__m128i) __builtin_ia32_pbroadcastd128_gpr_mask (__A, (__v4si) __O,
2608							   __M);
2609}
2610
2611extern __inline __m128i
2612__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2613_mm_maskz_set1_epi32 (__mmask8 __M, int __A)
2614{
2615  return (__m128i) __builtin_ia32_pbroadcastd128_gpr_mask (__A,
2616							   (__v4si)
2617							   _mm_setzero_si128 (),
2618							   __M);
2619}
2620
2621extern __inline __m256i
2622__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2623_mm256_mask_broadcastq_epi64 (__m256i __O, __mmask8 __M, __m128i __A)
2624{
2625  return (__m256i) __builtin_ia32_pbroadcastq256_mask ((__v2di) __A,
2626						       (__v4di) __O,
2627						       __M);
2628}
2629
2630extern __inline __m256i
2631__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2632_mm256_maskz_broadcastq_epi64 (__mmask8 __M, __m128i __A)
2633{
2634  return (__m256i) __builtin_ia32_pbroadcastq256_mask ((__v2di) __A,
2635						       (__v4di)
2636						       _mm256_setzero_si256 (),
2637						       __M);
2638}
2639
2640extern __inline __m256i
2641__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2642_mm256_mask_set1_epi64 (__m256i __O, __mmask8 __M, long long __A)
2643{
2644  return (__m256i) __builtin_ia32_pbroadcastq256_gpr_mask (__A, (__v4di) __O,
2645							   __M);
2646}
2647
2648extern __inline __m256i
2649__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2650_mm256_maskz_set1_epi64 (__mmask8 __M, long long __A)
2651{
2652  return (__m256i) __builtin_ia32_pbroadcastq256_gpr_mask (__A,
2653							   (__v4di)
2654							   _mm256_setzero_si256 (),
2655							   __M);
2656}
2657
2658extern __inline __m128i
2659__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2660_mm_mask_broadcastq_epi64 (__m128i __O, __mmask8 __M, __m128i __A)
2661{
2662  return (__m128i) __builtin_ia32_pbroadcastq128_mask ((__v2di) __A,
2663						       (__v2di) __O,
2664						       __M);
2665}
2666
2667extern __inline __m128i
2668__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2669_mm_maskz_broadcastq_epi64 (__mmask8 __M, __m128i __A)
2670{
2671  return (__m128i) __builtin_ia32_pbroadcastq128_mask ((__v2di) __A,
2672						       (__v2di)
2673						       _mm_setzero_si128 (),
2674						       __M);
2675}
2676
2677extern __inline __m128i
2678__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2679_mm_mask_set1_epi64 (__m128i __O, __mmask8 __M, long long __A)
2680{
2681  return (__m128i) __builtin_ia32_pbroadcastq128_gpr_mask (__A, (__v2di) __O,
2682							   __M);
2683}
2684
2685extern __inline __m128i
2686__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2687_mm_maskz_set1_epi64 (__mmask8 __M, long long __A)
2688{
2689  return (__m128i) __builtin_ia32_pbroadcastq128_gpr_mask (__A,
2690							   (__v2di)
2691							   _mm_setzero_si128 (),
2692							   __M);
2693}
2694
2695extern __inline __m256
2696__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2697_mm256_broadcast_f32x4 (__m128 __A)
2698{
2699  return (__m256) __builtin_ia32_broadcastf32x4_256_mask ((__v4sf) __A,
2700						          (__v8sf)_mm256_undefined_pd (),
2701							  (__mmask8) -
2702							  1);
2703}
2704
2705extern __inline __m256
2706__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2707_mm256_mask_broadcast_f32x4 (__m256 __O, __mmask8 __M, __m128 __A)
2708{
2709  return (__m256) __builtin_ia32_broadcastf32x4_256_mask ((__v4sf) __A,
2710							  (__v8sf) __O,
2711							  __M);
2712}
2713
2714extern __inline __m256
2715__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2716_mm256_maskz_broadcast_f32x4 (__mmask8 __M, __m128 __A)
2717{
2718  return (__m256) __builtin_ia32_broadcastf32x4_256_mask ((__v4sf) __A,
2719							  (__v8sf)
2720							  _mm256_setzero_ps (),
2721							  __M);
2722}
2723
2724extern __inline __m256i
2725__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2726_mm256_broadcast_i32x4 (__m128i __A)
2727{
2728  return (__m256i) __builtin_ia32_broadcasti32x4_256_mask ((__v4si)
2729							   __A,
2730						           (__v8si)_mm256_undefined_si256 (),
2731							   (__mmask8) -
2732							   1);
2733}
2734
2735extern __inline __m256i
2736__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2737_mm256_mask_broadcast_i32x4 (__m256i __O, __mmask8 __M, __m128i __A)
2738{
2739  return (__m256i) __builtin_ia32_broadcasti32x4_256_mask ((__v4si)
2740							   __A,
2741							   (__v8si)
2742							   __O, __M);
2743}
2744
2745extern __inline __m256i
2746__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2747_mm256_maskz_broadcast_i32x4 (__mmask8 __M, __m128i __A)
2748{
2749  return (__m256i) __builtin_ia32_broadcasti32x4_256_mask ((__v4si)
2750							   __A,
2751							   (__v8si)
2752							   _mm256_setzero_si256 (),
2753							   __M);
2754}
2755
2756extern __inline __m256i
2757__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2758_mm256_mask_cvtepi8_epi32 (__m256i __W, __mmask8 __U, __m128i __A)
2759{
2760  return (__m256i) __builtin_ia32_pmovsxbd256_mask ((__v16qi) __A,
2761						    (__v8si) __W,
2762						    (__mmask8) __U);
2763}
2764
2765extern __inline __m256i
2766__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2767_mm256_maskz_cvtepi8_epi32 (__mmask8 __U, __m128i __A)
2768{
2769  return (__m256i) __builtin_ia32_pmovsxbd256_mask ((__v16qi) __A,
2770						    (__v8si)
2771						    _mm256_setzero_si256 (),
2772						    (__mmask8) __U);
2773}
2774
2775extern __inline __m128i
2776__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2777_mm_mask_cvtepi8_epi32 (__m128i __W, __mmask8 __U, __m128i __A)
2778{
2779  return (__m128i) __builtin_ia32_pmovsxbd128_mask ((__v16qi) __A,
2780						    (__v4si) __W,
2781						    (__mmask8) __U);
2782}
2783
2784extern __inline __m128i
2785__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2786_mm_maskz_cvtepi8_epi32 (__mmask8 __U, __m128i __A)
2787{
2788  return (__m128i) __builtin_ia32_pmovsxbd128_mask ((__v16qi) __A,
2789						    (__v4si)
2790						    _mm_setzero_si128 (),
2791						    (__mmask8) __U);
2792}
2793
2794extern __inline __m256i
2795__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2796_mm256_mask_cvtepi8_epi64 (__m256i __W, __mmask8 __U, __m128i __A)
2797{
2798  return (__m256i) __builtin_ia32_pmovsxbq256_mask ((__v16qi) __A,
2799						    (__v4di) __W,
2800						    (__mmask8) __U);
2801}
2802
2803extern __inline __m256i
2804__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2805_mm256_maskz_cvtepi8_epi64 (__mmask8 __U, __m128i __A)
2806{
2807  return (__m256i) __builtin_ia32_pmovsxbq256_mask ((__v16qi) __A,
2808						    (__v4di)
2809						    _mm256_setzero_si256 (),
2810						    (__mmask8) __U);
2811}
2812
2813extern __inline __m128i
2814__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2815_mm_mask_cvtepi8_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
2816{
2817  return (__m128i) __builtin_ia32_pmovsxbq128_mask ((__v16qi) __A,
2818						    (__v2di) __W,
2819						    (__mmask8) __U);
2820}
2821
2822extern __inline __m128i
2823__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2824_mm_maskz_cvtepi8_epi64 (__mmask8 __U, __m128i __A)
2825{
2826  return (__m128i) __builtin_ia32_pmovsxbq128_mask ((__v16qi) __A,
2827						    (__v2di)
2828						    _mm_setzero_si128 (),
2829						    (__mmask8) __U);
2830}
2831
2832extern __inline __m256i
2833__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2834_mm256_mask_cvtepi16_epi32 (__m256i __W, __mmask8 __U, __m128i __A)
2835{
2836  return (__m256i) __builtin_ia32_pmovsxwd256_mask ((__v8hi) __A,
2837						    (__v8si) __W,
2838						    (__mmask8) __U);
2839}
2840
2841extern __inline __m256i
2842__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2843_mm256_maskz_cvtepi16_epi32 (__mmask8 __U, __m128i __A)
2844{
2845  return (__m256i) __builtin_ia32_pmovsxwd256_mask ((__v8hi) __A,
2846						    (__v8si)
2847						    _mm256_setzero_si256 (),
2848						    (__mmask8) __U);
2849}
2850
2851extern __inline __m128i
2852__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2853_mm_mask_cvtepi16_epi32 (__m128i __W, __mmask8 __U, __m128i __A)
2854{
2855  return (__m128i) __builtin_ia32_pmovsxwd128_mask ((__v8hi) __A,
2856						    (__v4si) __W,
2857						    (__mmask8) __U);
2858}
2859
2860extern __inline __m128i
2861__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2862_mm_maskz_cvtepi16_epi32 (__mmask8 __U, __m128i __A)
2863{
2864  return (__m128i) __builtin_ia32_pmovsxwd128_mask ((__v8hi) __A,
2865						    (__v4si)
2866						    _mm_setzero_si128 (),
2867						    (__mmask8) __U);
2868}
2869
2870extern __inline __m256i
2871__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2872_mm256_mask_cvtepi16_epi64 (__m256i __W, __mmask8 __U, __m128i __A)
2873{
2874  return (__m256i) __builtin_ia32_pmovsxwq256_mask ((__v8hi) __A,
2875						    (__v4di) __W,
2876						    (__mmask8) __U);
2877}
2878
2879extern __inline __m256i
2880__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2881_mm256_maskz_cvtepi16_epi64 (__mmask8 __U, __m128i __A)
2882{
2883  return (__m256i) __builtin_ia32_pmovsxwq256_mask ((__v8hi) __A,
2884						    (__v4di)
2885						    _mm256_setzero_si256 (),
2886						    (__mmask8) __U);
2887}
2888
2889extern __inline __m128i
2890__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2891_mm_mask_cvtepi16_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
2892{
2893  return (__m128i) __builtin_ia32_pmovsxwq128_mask ((__v8hi) __A,
2894						    (__v2di) __W,
2895						    (__mmask8) __U);
2896}
2897
2898extern __inline __m128i
2899__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2900_mm_maskz_cvtepi16_epi64 (__mmask8 __U, __m128i __A)
2901{
2902  return (__m128i) __builtin_ia32_pmovsxwq128_mask ((__v8hi) __A,
2903						    (__v2di)
2904						    _mm_setzero_si128 (),
2905						    (__mmask8) __U);
2906}
2907
2908extern __inline __m256i
2909__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2910_mm256_mask_cvtepi32_epi64 (__m256i __W, __mmask8 __U, __m128i __X)
2911{
2912  return (__m256i) __builtin_ia32_pmovsxdq256_mask ((__v4si) __X,
2913						    (__v4di) __W,
2914						    (__mmask8) __U);
2915}
2916
2917extern __inline __m256i
2918__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2919_mm256_maskz_cvtepi32_epi64 (__mmask8 __U, __m128i __X)
2920{
2921  return (__m256i) __builtin_ia32_pmovsxdq256_mask ((__v4si) __X,
2922						    (__v4di)
2923						    _mm256_setzero_si256 (),
2924						    (__mmask8) __U);
2925}
2926
2927extern __inline __m128i
2928__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2929_mm_mask_cvtepi32_epi64 (__m128i __W, __mmask8 __U, __m128i __X)
2930{
2931  return (__m128i) __builtin_ia32_pmovsxdq128_mask ((__v4si) __X,
2932						    (__v2di) __W,
2933						    (__mmask8) __U);
2934}
2935
2936extern __inline __m128i
2937__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2938_mm_maskz_cvtepi32_epi64 (__mmask8 __U, __m128i __X)
2939{
2940  return (__m128i) __builtin_ia32_pmovsxdq128_mask ((__v4si) __X,
2941						    (__v2di)
2942						    _mm_setzero_si128 (),
2943						    (__mmask8) __U);
2944}
2945
2946extern __inline __m256i
2947__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2948_mm256_mask_cvtepu8_epi32 (__m256i __W, __mmask8 __U, __m128i __A)
2949{
2950  return (__m256i) __builtin_ia32_pmovzxbd256_mask ((__v16qi) __A,
2951						    (__v8si) __W,
2952						    (__mmask8) __U);
2953}
2954
2955extern __inline __m256i
2956__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2957_mm256_maskz_cvtepu8_epi32 (__mmask8 __U, __m128i __A)
2958{
2959  return (__m256i) __builtin_ia32_pmovzxbd256_mask ((__v16qi) __A,
2960						    (__v8si)
2961						    _mm256_setzero_si256 (),
2962						    (__mmask8) __U);
2963}
2964
2965extern __inline __m128i
2966__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2967_mm_mask_cvtepu8_epi32 (__m128i __W, __mmask8 __U, __m128i __A)
2968{
2969  return (__m128i) __builtin_ia32_pmovzxbd128_mask ((__v16qi) __A,
2970						    (__v4si) __W,
2971						    (__mmask8) __U);
2972}
2973
2974extern __inline __m128i
2975__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2976_mm_maskz_cvtepu8_epi32 (__mmask8 __U, __m128i __A)
2977{
2978  return (__m128i) __builtin_ia32_pmovzxbd128_mask ((__v16qi) __A,
2979						    (__v4si)
2980						    _mm_setzero_si128 (),
2981						    (__mmask8) __U);
2982}
2983
2984extern __inline __m256i
2985__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2986_mm256_mask_cvtepu8_epi64 (__m256i __W, __mmask8 __U, __m128i __A)
2987{
2988  return (__m256i) __builtin_ia32_pmovzxbq256_mask ((__v16qi) __A,
2989						    (__v4di) __W,
2990						    (__mmask8) __U);
2991}
2992
2993extern __inline __m256i
2994__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2995_mm256_maskz_cvtepu8_epi64 (__mmask8 __U, __m128i __A)
2996{
2997  return (__m256i) __builtin_ia32_pmovzxbq256_mask ((__v16qi) __A,
2998						    (__v4di)
2999						    _mm256_setzero_si256 (),
3000						    (__mmask8) __U);
3001}
3002
3003extern __inline __m128i
3004__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3005_mm_mask_cvtepu8_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
3006{
3007  return (__m128i) __builtin_ia32_pmovzxbq128_mask ((__v16qi) __A,
3008						    (__v2di) __W,
3009						    (__mmask8) __U);
3010}
3011
3012extern __inline __m128i
3013__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3014_mm_maskz_cvtepu8_epi64 (__mmask8 __U, __m128i __A)
3015{
3016  return (__m128i) __builtin_ia32_pmovzxbq128_mask ((__v16qi) __A,
3017						    (__v2di)
3018						    _mm_setzero_si128 (),
3019						    (__mmask8) __U);
3020}
3021
3022extern __inline __m256i
3023__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3024_mm256_mask_cvtepu16_epi32 (__m256i __W, __mmask8 __U, __m128i __A)
3025{
3026  return (__m256i) __builtin_ia32_pmovzxwd256_mask ((__v8hi) __A,
3027						    (__v8si) __W,
3028						    (__mmask8) __U);
3029}
3030
3031extern __inline __m256i
3032__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3033_mm256_maskz_cvtepu16_epi32 (__mmask8 __U, __m128i __A)
3034{
3035  return (__m256i) __builtin_ia32_pmovzxwd256_mask ((__v8hi) __A,
3036						    (__v8si)
3037						    _mm256_setzero_si256 (),
3038						    (__mmask8) __U);
3039}
3040
3041extern __inline __m128i
3042__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3043_mm_mask_cvtepu16_epi32 (__m128i __W, __mmask8 __U, __m128i __A)
3044{
3045  return (__m128i) __builtin_ia32_pmovzxwd128_mask ((__v8hi) __A,
3046						    (__v4si) __W,
3047						    (__mmask8) __U);
3048}
3049
3050extern __inline __m128i
3051__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3052_mm_maskz_cvtepu16_epi32 (__mmask8 __U, __m128i __A)
3053{
3054  return (__m128i) __builtin_ia32_pmovzxwd128_mask ((__v8hi) __A,
3055						    (__v4si)
3056						    _mm_setzero_si128 (),
3057						    (__mmask8) __U);
3058}
3059
3060extern __inline __m256i
3061__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3062_mm256_mask_cvtepu16_epi64 (__m256i __W, __mmask8 __U, __m128i __A)
3063{
3064  return (__m256i) __builtin_ia32_pmovzxwq256_mask ((__v8hi) __A,
3065						    (__v4di) __W,
3066						    (__mmask8) __U);
3067}
3068
3069extern __inline __m256i
3070__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3071_mm256_maskz_cvtepu16_epi64 (__mmask8 __U, __m128i __A)
3072{
3073  return (__m256i) __builtin_ia32_pmovzxwq256_mask ((__v8hi) __A,
3074						    (__v4di)
3075						    _mm256_setzero_si256 (),
3076						    (__mmask8) __U);
3077}
3078
3079extern __inline __m128i
3080__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3081_mm_mask_cvtepu16_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
3082{
3083  return (__m128i) __builtin_ia32_pmovzxwq128_mask ((__v8hi) __A,
3084						    (__v2di) __W,
3085						    (__mmask8) __U);
3086}
3087
3088extern __inline __m128i
3089__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3090_mm_maskz_cvtepu16_epi64 (__mmask8 __U, __m128i __A)
3091{
3092  return (__m128i) __builtin_ia32_pmovzxwq128_mask ((__v8hi) __A,
3093						    (__v2di)
3094						    _mm_setzero_si128 (),
3095						    (__mmask8) __U);
3096}
3097
3098extern __inline __m256i
3099__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3100_mm256_mask_cvtepu32_epi64 (__m256i __W, __mmask8 __U, __m128i __X)
3101{
3102  return (__m256i) __builtin_ia32_pmovzxdq256_mask ((__v4si) __X,
3103						    (__v4di) __W,
3104						    (__mmask8) __U);
3105}
3106
3107extern __inline __m256i
3108__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3109_mm256_maskz_cvtepu32_epi64 (__mmask8 __U, __m128i __X)
3110{
3111  return (__m256i) __builtin_ia32_pmovzxdq256_mask ((__v4si) __X,
3112						    (__v4di)
3113						    _mm256_setzero_si256 (),
3114						    (__mmask8) __U);
3115}
3116
3117extern __inline __m128i
3118__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3119_mm_mask_cvtepu32_epi64 (__m128i __W, __mmask8 __U, __m128i __X)
3120{
3121  return (__m128i) __builtin_ia32_pmovzxdq128_mask ((__v4si) __X,
3122						    (__v2di) __W,
3123						    (__mmask8) __U);
3124}
3125
3126extern __inline __m128i
3127__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3128_mm_maskz_cvtepu32_epi64 (__mmask8 __U, __m128i __X)
3129{
3130  return (__m128i) __builtin_ia32_pmovzxdq128_mask ((__v4si) __X,
3131						    (__v2di)
3132						    _mm_setzero_si128 (),
3133						    (__mmask8) __U);
3134}
3135
3136extern __inline __m256d
3137__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3138_mm256_rcp14_pd (__m256d __A)
3139{
3140  return (__m256d) __builtin_ia32_rcp14pd256_mask ((__v4df) __A,
3141					      (__v4df)
3142					      _mm256_setzero_pd (),
3143					      (__mmask8) -1);
3144}
3145
3146extern __inline __m256d
3147__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3148_mm256_mask_rcp14_pd (__m256d __W, __mmask8 __U, __m256d __A)
3149{
3150  return (__m256d) __builtin_ia32_rcp14pd256_mask ((__v4df) __A,
3151					      (__v4df) __W,
3152					      (__mmask8) __U);
3153}
3154
3155extern __inline __m256d
3156__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3157_mm256_maskz_rcp14_pd (__mmask8 __U, __m256d __A)
3158{
3159  return (__m256d) __builtin_ia32_rcp14pd256_mask ((__v4df) __A,
3160					      (__v4df)
3161					      _mm256_setzero_pd (),
3162					      (__mmask8) __U);
3163}
3164
3165extern __inline __m128d
3166__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3167_mm_rcp14_pd (__m128d __A)
3168{
3169  return (__m128d) __builtin_ia32_rcp14pd128_mask ((__v2df) __A,
3170					      (__v2df)
3171					      _mm_setzero_pd (),
3172					      (__mmask8) -1);
3173}
3174
3175extern __inline __m128d
3176__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3177_mm_mask_rcp14_pd (__m128d __W, __mmask8 __U, __m128d __A)
3178{
3179  return (__m128d) __builtin_ia32_rcp14pd128_mask ((__v2df) __A,
3180					      (__v2df) __W,
3181					      (__mmask8) __U);
3182}
3183
3184extern __inline __m128d
3185__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3186_mm_maskz_rcp14_pd (__mmask8 __U, __m128d __A)
3187{
3188  return (__m128d) __builtin_ia32_rcp14pd128_mask ((__v2df) __A,
3189					      (__v2df)
3190					      _mm_setzero_pd (),
3191					      (__mmask8) __U);
3192}
3193
3194extern __inline __m256
3195__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3196_mm256_rcp14_ps (__m256 __A)
3197{
3198  return (__m256) __builtin_ia32_rcp14ps256_mask ((__v8sf) __A,
3199					     (__v8sf)
3200					     _mm256_setzero_ps (),
3201					     (__mmask8) -1);
3202}
3203
3204extern __inline __m256
3205__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3206_mm256_mask_rcp14_ps (__m256 __W, __mmask8 __U, __m256 __A)
3207{
3208  return (__m256) __builtin_ia32_rcp14ps256_mask ((__v8sf) __A,
3209					     (__v8sf) __W,
3210					     (__mmask8) __U);
3211}
3212
3213extern __inline __m256
3214__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3215_mm256_maskz_rcp14_ps (__mmask8 __U, __m256 __A)
3216{
3217  return (__m256) __builtin_ia32_rcp14ps256_mask ((__v8sf) __A,
3218					     (__v8sf)
3219					     _mm256_setzero_ps (),
3220					     (__mmask8) __U);
3221}
3222
3223extern __inline __m128
3224__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3225_mm_rcp14_ps (__m128 __A)
3226{
3227  return (__m128) __builtin_ia32_rcp14ps128_mask ((__v4sf) __A,
3228					     (__v4sf)
3229					     _mm_setzero_ps (),
3230					     (__mmask8) -1);
3231}
3232
3233extern __inline __m128
3234__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3235_mm_mask_rcp14_ps (__m128 __W, __mmask8 __U, __m128 __A)
3236{
3237  return (__m128) __builtin_ia32_rcp14ps128_mask ((__v4sf) __A,
3238					     (__v4sf) __W,
3239					     (__mmask8) __U);
3240}
3241
3242extern __inline __m128
3243__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3244_mm_maskz_rcp14_ps (__mmask8 __U, __m128 __A)
3245{
3246  return (__m128) __builtin_ia32_rcp14ps128_mask ((__v4sf) __A,
3247					     (__v4sf)
3248					     _mm_setzero_ps (),
3249					     (__mmask8) __U);
3250}
3251
3252extern __inline __m256d
3253__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3254_mm256_rsqrt14_pd (__m256d __A)
3255{
3256  return (__m256d) __builtin_ia32_rsqrt14pd256_mask ((__v4df) __A,
3257						     (__v4df)
3258						     _mm256_setzero_pd (),
3259						     (__mmask8) -1);
3260}
3261
3262extern __inline __m256d
3263__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3264_mm256_mask_rsqrt14_pd (__m256d __W, __mmask8 __U, __m256d __A)
3265{
3266  return (__m256d) __builtin_ia32_rsqrt14pd256_mask ((__v4df) __A,
3267						     (__v4df) __W,
3268						     (__mmask8) __U);
3269}
3270
3271extern __inline __m256d
3272__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3273_mm256_maskz_rsqrt14_pd (__mmask8 __U, __m256d __A)
3274{
3275  return (__m256d) __builtin_ia32_rsqrt14pd256_mask ((__v4df) __A,
3276						     (__v4df)
3277						     _mm256_setzero_pd (),
3278						     (__mmask8) __U);
3279}
3280
3281extern __inline __m128d
3282__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3283_mm_rsqrt14_pd (__m128d __A)
3284{
3285  return (__m128d) __builtin_ia32_rsqrt14pd128_mask ((__v2df) __A,
3286						     (__v2df)
3287						     _mm_setzero_pd (),
3288						     (__mmask8) -1);
3289}
3290
3291extern __inline __m128d
3292__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3293_mm_mask_rsqrt14_pd (__m128d __W, __mmask8 __U, __m128d __A)
3294{
3295  return (__m128d) __builtin_ia32_rsqrt14pd128_mask ((__v2df) __A,
3296						     (__v2df) __W,
3297						     (__mmask8) __U);
3298}
3299
3300extern __inline __m128d
3301__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3302_mm_maskz_rsqrt14_pd (__mmask8 __U, __m128d __A)
3303{
3304  return (__m128d) __builtin_ia32_rsqrt14pd128_mask ((__v2df) __A,
3305						     (__v2df)
3306						     _mm_setzero_pd (),
3307						     (__mmask8) __U);
3308}
3309
3310extern __inline __m256
3311__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3312_mm256_rsqrt14_ps (__m256 __A)
3313{
3314  return (__m256) __builtin_ia32_rsqrt14ps256_mask ((__v8sf) __A,
3315						    (__v8sf)
3316						    _mm256_setzero_ps (),
3317						    (__mmask8) -1);
3318}
3319
3320extern __inline __m256
3321__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3322_mm256_mask_rsqrt14_ps (__m256 __W, __mmask8 __U, __m256 __A)
3323{
3324  return (__m256) __builtin_ia32_rsqrt14ps256_mask ((__v8sf) __A,
3325						    (__v8sf) __W,
3326						    (__mmask8) __U);
3327}
3328
3329extern __inline __m256
3330__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3331_mm256_maskz_rsqrt14_ps (__mmask8 __U, __m256 __A)
3332{
3333  return (__m256) __builtin_ia32_rsqrt14ps256_mask ((__v8sf) __A,
3334						    (__v8sf)
3335						    _mm256_setzero_ps (),
3336						    (__mmask8) __U);
3337}
3338
3339extern __inline __m128
3340__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3341_mm_rsqrt14_ps (__m128 __A)
3342{
3343  return (__m128) __builtin_ia32_rsqrt14ps128_mask ((__v4sf) __A,
3344						    (__v4sf)
3345						    _mm_setzero_ps (),
3346						    (__mmask8) -1);
3347}
3348
3349extern __inline __m128
3350__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3351_mm_mask_rsqrt14_ps (__m128 __W, __mmask8 __U, __m128 __A)
3352{
3353  return (__m128) __builtin_ia32_rsqrt14ps128_mask ((__v4sf) __A,
3354						    (__v4sf) __W,
3355						    (__mmask8) __U);
3356}
3357
3358extern __inline __m128
3359__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3360_mm_maskz_rsqrt14_ps (__mmask8 __U, __m128 __A)
3361{
3362  return (__m128) __builtin_ia32_rsqrt14ps128_mask ((__v4sf) __A,
3363						    (__v4sf)
3364						    _mm_setzero_ps (),
3365						    (__mmask8) __U);
3366}
3367
3368extern __inline __m256d
3369__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3370_mm256_mask_sqrt_pd (__m256d __W, __mmask8 __U, __m256d __A)
3371{
3372  return (__m256d) __builtin_ia32_sqrtpd256_mask ((__v4df) __A,
3373						  (__v4df) __W,
3374						  (__mmask8) __U);
3375}
3376
3377extern __inline __m256d
3378__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3379_mm256_maskz_sqrt_pd (__mmask8 __U, __m256d __A)
3380{
3381  return (__m256d) __builtin_ia32_sqrtpd256_mask ((__v4df) __A,
3382						  (__v4df)
3383						  _mm256_setzero_pd (),
3384						  (__mmask8) __U);
3385}
3386
3387extern __inline __m128d
3388__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3389_mm_mask_sqrt_pd (__m128d __W, __mmask8 __U, __m128d __A)
3390{
3391  return (__m128d) __builtin_ia32_sqrtpd128_mask ((__v2df) __A,
3392						  (__v2df) __W,
3393						  (__mmask8) __U);
3394}
3395
3396extern __inline __m128d
3397__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3398_mm_maskz_sqrt_pd (__mmask8 __U, __m128d __A)
3399{
3400  return (__m128d) __builtin_ia32_sqrtpd128_mask ((__v2df) __A,
3401						  (__v2df)
3402						  _mm_setzero_pd (),
3403						  (__mmask8) __U);
3404}
3405
3406extern __inline __m256
3407__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3408_mm256_mask_sqrt_ps (__m256 __W, __mmask8 __U, __m256 __A)
3409{
3410  return (__m256) __builtin_ia32_sqrtps256_mask ((__v8sf) __A,
3411						 (__v8sf) __W,
3412						 (__mmask8) __U);
3413}
3414
3415extern __inline __m256
3416__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3417_mm256_maskz_sqrt_ps (__mmask8 __U, __m256 __A)
3418{
3419  return (__m256) __builtin_ia32_sqrtps256_mask ((__v8sf) __A,
3420						 (__v8sf)
3421						 _mm256_setzero_ps (),
3422						 (__mmask8) __U);
3423}
3424
3425extern __inline __m128
3426__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3427_mm_mask_sqrt_ps (__m128 __W, __mmask8 __U, __m128 __A)
3428{
3429  return (__m128) __builtin_ia32_sqrtps128_mask ((__v4sf) __A,
3430						 (__v4sf) __W,
3431						 (__mmask8) __U);
3432}
3433
3434extern __inline __m128
3435__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3436_mm_maskz_sqrt_ps (__mmask8 __U, __m128 __A)
3437{
3438  return (__m128) __builtin_ia32_sqrtps128_mask ((__v4sf) __A,
3439						 (__v4sf)
3440						 _mm_setzero_ps (),
3441						 (__mmask8) __U);
3442}
3443
3444extern __inline __m256i
3445__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3446_mm256_mask_add_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
3447		       __m256i __B)
3448{
3449  return (__m256i) __builtin_ia32_paddd256_mask ((__v8si) __A,
3450						 (__v8si) __B,
3451						 (__v8si) __W,
3452						 (__mmask8) __U);
3453}
3454
3455extern __inline __m256i
3456__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3457_mm256_maskz_add_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
3458{
3459  return (__m256i) __builtin_ia32_paddd256_mask ((__v8si) __A,
3460						 (__v8si) __B,
3461						 (__v8si)
3462						 _mm256_setzero_si256 (),
3463						 (__mmask8) __U);
3464}
3465
3466extern __inline __m256i
3467__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3468_mm256_mask_add_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
3469		       __m256i __B)
3470{
3471  return (__m256i) __builtin_ia32_paddq256_mask ((__v4di) __A,
3472						 (__v4di) __B,
3473						 (__v4di) __W,
3474						 (__mmask8) __U);
3475}
3476
3477extern __inline __m256i
3478__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3479_mm256_maskz_add_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
3480{
3481  return (__m256i) __builtin_ia32_paddq256_mask ((__v4di) __A,
3482						 (__v4di) __B,
3483						 (__v4di)
3484						 _mm256_setzero_si256 (),
3485						 (__mmask8) __U);
3486}
3487
3488extern __inline __m256i
3489__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3490_mm256_mask_sub_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
3491		       __m256i __B)
3492{
3493  return (__m256i) __builtin_ia32_psubd256_mask ((__v8si) __A,
3494						 (__v8si) __B,
3495						 (__v8si) __W,
3496						 (__mmask8) __U);
3497}
3498
3499extern __inline __m256i
3500__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3501_mm256_maskz_sub_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
3502{
3503  return (__m256i) __builtin_ia32_psubd256_mask ((__v8si) __A,
3504						 (__v8si) __B,
3505						 (__v8si)
3506						 _mm256_setzero_si256 (),
3507						 (__mmask8) __U);
3508}
3509
3510extern __inline __m256i
3511__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3512_mm256_mask_sub_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
3513		       __m256i __B)
3514{
3515  return (__m256i) __builtin_ia32_psubq256_mask ((__v4di) __A,
3516						 (__v4di) __B,
3517						 (__v4di) __W,
3518						 (__mmask8) __U);
3519}
3520
3521extern __inline __m256i
3522__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3523_mm256_maskz_sub_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
3524{
3525  return (__m256i) __builtin_ia32_psubq256_mask ((__v4di) __A,
3526						 (__v4di) __B,
3527						 (__v4di)
3528						 _mm256_setzero_si256 (),
3529						 (__mmask8) __U);
3530}
3531
3532extern __inline __m128i
3533__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3534_mm_mask_add_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
3535		    __m128i __B)
3536{
3537  return (__m128i) __builtin_ia32_paddd128_mask ((__v4si) __A,
3538						 (__v4si) __B,
3539						 (__v4si) __W,
3540						 (__mmask8) __U);
3541}
3542
3543extern __inline __m128i
3544__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3545_mm_maskz_add_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
3546{
3547  return (__m128i) __builtin_ia32_paddd128_mask ((__v4si) __A,
3548						 (__v4si) __B,
3549						 (__v4si)
3550						 _mm_setzero_si128 (),
3551						 (__mmask8) __U);
3552}
3553
3554extern __inline __m128i
3555__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3556_mm_mask_add_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
3557		    __m128i __B)
3558{
3559  return (__m128i) __builtin_ia32_paddq128_mask ((__v2di) __A,
3560						 (__v2di) __B,
3561						 (__v2di) __W,
3562						 (__mmask8) __U);
3563}
3564
3565extern __inline __m128i
3566__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3567_mm_maskz_add_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
3568{
3569  return (__m128i) __builtin_ia32_paddq128_mask ((__v2di) __A,
3570						 (__v2di) __B,
3571						 (__v2di)
3572						 _mm_setzero_si128 (),
3573						 (__mmask8) __U);
3574}
3575
3576extern __inline __m128i
3577__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3578_mm_mask_sub_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
3579		    __m128i __B)
3580{
3581  return (__m128i) __builtin_ia32_psubd128_mask ((__v4si) __A,
3582						 (__v4si) __B,
3583						 (__v4si) __W,
3584						 (__mmask8) __U);
3585}
3586
3587extern __inline __m128i
3588__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3589_mm_maskz_sub_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
3590{
3591  return (__m128i) __builtin_ia32_psubd128_mask ((__v4si) __A,
3592						 (__v4si) __B,
3593						 (__v4si)
3594						 _mm_setzero_si128 (),
3595						 (__mmask8) __U);
3596}
3597
3598extern __inline __m128i
3599__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3600_mm_mask_sub_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
3601		    __m128i __B)
3602{
3603  return (__m128i) __builtin_ia32_psubq128_mask ((__v2di) __A,
3604						 (__v2di) __B,
3605						 (__v2di) __W,
3606						 (__mmask8) __U);
3607}
3608
3609extern __inline __m128i
3610__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3611_mm_maskz_sub_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
3612{
3613  return (__m128i) __builtin_ia32_psubq128_mask ((__v2di) __A,
3614						 (__v2di) __B,
3615						 (__v2di)
3616						 _mm_setzero_si128 (),
3617						 (__mmask8) __U);
3618}
3619
3620extern __inline __m256
3621__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3622_mm256_getexp_ps (__m256 __A)
3623{
3624  return (__m256) __builtin_ia32_getexpps256_mask ((__v8sf) __A,
3625						   (__v8sf)
3626						   _mm256_setzero_ps (),
3627						   (__mmask8) -1);
3628}
3629
3630extern __inline __m256
3631__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3632_mm256_mask_getexp_ps (__m256 __W, __mmask8 __U, __m256 __A)
3633{
3634  return (__m256) __builtin_ia32_getexpps256_mask ((__v8sf) __A,
3635						   (__v8sf) __W,
3636						   (__mmask8) __U);
3637}
3638
3639extern __inline __m256
3640__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3641_mm256_maskz_getexp_ps (__mmask8 __U, __m256 __A)
3642{
3643  return (__m256) __builtin_ia32_getexpps256_mask ((__v8sf) __A,
3644						   (__v8sf)
3645						   _mm256_setzero_ps (),
3646						   (__mmask8) __U);
3647}
3648
3649extern __inline __m256d
3650__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3651_mm256_getexp_pd (__m256d __A)
3652{
3653  return (__m256d) __builtin_ia32_getexppd256_mask ((__v4df) __A,
3654						    (__v4df)
3655						    _mm256_setzero_pd (),
3656						    (__mmask8) -1);
3657}
3658
3659extern __inline __m256d
3660__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3661_mm256_mask_getexp_pd (__m256d __W, __mmask8 __U, __m256d __A)
3662{
3663  return (__m256d) __builtin_ia32_getexppd256_mask ((__v4df) __A,
3664						    (__v4df) __W,
3665						    (__mmask8) __U);
3666}
3667
3668extern __inline __m256d
3669__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3670_mm256_maskz_getexp_pd (__mmask8 __U, __m256d __A)
3671{
3672  return (__m256d) __builtin_ia32_getexppd256_mask ((__v4df) __A,
3673						    (__v4df)
3674						    _mm256_setzero_pd (),
3675						    (__mmask8) __U);
3676}
3677
3678extern __inline __m128
3679__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3680_mm_getexp_ps (__m128 __A)
3681{
3682  return (__m128) __builtin_ia32_getexpps128_mask ((__v4sf) __A,
3683						   (__v4sf)
3684						   _mm_setzero_ps (),
3685						   (__mmask8) -1);
3686}
3687
3688extern __inline __m128
3689__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3690_mm_mask_getexp_ps (__m128 __W, __mmask8 __U, __m128 __A)
3691{
3692  return (__m128) __builtin_ia32_getexpps128_mask ((__v4sf) __A,
3693						   (__v4sf) __W,
3694						   (__mmask8) __U);
3695}
3696
3697extern __inline __m128
3698__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3699_mm_maskz_getexp_ps (__mmask8 __U, __m128 __A)
3700{
3701  return (__m128) __builtin_ia32_getexpps128_mask ((__v4sf) __A,
3702						   (__v4sf)
3703						   _mm_setzero_ps (),
3704						   (__mmask8) __U);
3705}
3706
3707extern __inline __m128d
3708__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3709_mm_getexp_pd (__m128d __A)
3710{
3711  return (__m128d) __builtin_ia32_getexppd128_mask ((__v2df) __A,
3712						    (__v2df)
3713						    _mm_setzero_pd (),
3714						    (__mmask8) -1);
3715}
3716
3717extern __inline __m128d
3718__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3719_mm_mask_getexp_pd (__m128d __W, __mmask8 __U, __m128d __A)
3720{
3721  return (__m128d) __builtin_ia32_getexppd128_mask ((__v2df) __A,
3722						    (__v2df) __W,
3723						    (__mmask8) __U);
3724}
3725
3726extern __inline __m128d
3727__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3728_mm_maskz_getexp_pd (__mmask8 __U, __m128d __A)
3729{
3730  return (__m128d) __builtin_ia32_getexppd128_mask ((__v2df) __A,
3731						    (__v2df)
3732						    _mm_setzero_pd (),
3733						    (__mmask8) __U);
3734}
3735
3736extern __inline __m256i
3737__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3738_mm256_mask_srl_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
3739		       __m128i __B)
3740{
3741  return (__m256i) __builtin_ia32_psrld256_mask ((__v8si) __A,
3742						 (__v4si) __B,
3743						 (__v8si) __W,
3744						 (__mmask8) __U);
3745}
3746
3747extern __inline __m256i
3748__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3749_mm256_maskz_srl_epi32 (__mmask8 __U, __m256i __A, __m128i __B)
3750{
3751  return (__m256i) __builtin_ia32_psrld256_mask ((__v8si) __A,
3752						 (__v4si) __B,
3753						 (__v8si)
3754						 _mm256_setzero_si256 (),
3755						 (__mmask8) __U);
3756}
3757
3758extern __inline __m128i
3759__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3760_mm_mask_srl_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
3761		    __m128i __B)
3762{
3763  return (__m128i) __builtin_ia32_psrld128_mask ((__v4si) __A,
3764						 (__v4si) __B,
3765						 (__v4si) __W,
3766						 (__mmask8) __U);
3767}
3768
3769extern __inline __m128i
3770__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3771_mm_maskz_srl_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
3772{
3773  return (__m128i) __builtin_ia32_psrld128_mask ((__v4si) __A,
3774						 (__v4si) __B,
3775						 (__v4si)
3776						 _mm_setzero_si128 (),
3777						 (__mmask8) __U);
3778}
3779
3780extern __inline __m256i
3781__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3782_mm256_mask_srl_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
3783		       __m128i __B)
3784{
3785  return (__m256i) __builtin_ia32_psrlq256_mask ((__v4di) __A,
3786						 (__v2di) __B,
3787						 (__v4di) __W,
3788						 (__mmask8) __U);
3789}
3790
3791extern __inline __m256i
3792__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3793_mm256_maskz_srl_epi64 (__mmask8 __U, __m256i __A, __m128i __B)
3794{
3795  return (__m256i) __builtin_ia32_psrlq256_mask ((__v4di) __A,
3796						 (__v2di) __B,
3797						 (__v4di)
3798						 _mm256_setzero_si256 (),
3799						 (__mmask8) __U);
3800}
3801
3802extern __inline __m128i
3803__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3804_mm_mask_srl_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
3805		    __m128i __B)
3806{
3807  return (__m128i) __builtin_ia32_psrlq128_mask ((__v2di) __A,
3808						 (__v2di) __B,
3809						 (__v2di) __W,
3810						 (__mmask8) __U);
3811}
3812
3813extern __inline __m128i
3814__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3815_mm_maskz_srl_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
3816{
3817  return (__m128i) __builtin_ia32_psrlq128_mask ((__v2di) __A,
3818						 (__v2di) __B,
3819						 (__v2di)
3820						 _mm_setzero_di (),
3821						 (__mmask8) __U);
3822}
3823
3824extern __inline __m256i
3825__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3826_mm256_mask_and_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
3827		       __m256i __B)
3828{
3829  return (__m256i) __builtin_ia32_pandd256_mask ((__v8si) __A,
3830						 (__v8si) __B,
3831						 (__v8si) __W,
3832						 (__mmask8) __U);
3833}
3834
3835extern __inline __m256i
3836__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3837_mm256_maskz_and_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
3838{
3839  return (__m256i) __builtin_ia32_pandd256_mask ((__v8si) __A,
3840						 (__v8si) __B,
3841						 (__v8si)
3842						 _mm256_setzero_si256 (),
3843						 (__mmask8) __U);
3844}
3845
3846extern __inline __m256d
3847__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3848_mm256_scalef_pd (__m256d __A, __m256d __B)
3849{
3850  return (__m256d) __builtin_ia32_scalefpd256_mask ((__v4df) __A,
3851						    (__v4df) __B,
3852						    (__v4df)
3853						    _mm256_setzero_pd (),
3854						    (__mmask8) -1);
3855}
3856
3857extern __inline __m256d
3858__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3859_mm256_mask_scalef_pd (__m256d __W, __mmask8 __U, __m256d __A,
3860		       __m256d __B)
3861{
3862  return (__m256d) __builtin_ia32_scalefpd256_mask ((__v4df) __A,
3863						    (__v4df) __B,
3864						    (__v4df) __W,
3865						    (__mmask8) __U);
3866}
3867
3868extern __inline __m256d
3869__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3870_mm256_maskz_scalef_pd (__mmask8 __U, __m256d __A, __m256d __B)
3871{
3872  return (__m256d) __builtin_ia32_scalefpd256_mask ((__v4df) __A,
3873						    (__v4df) __B,
3874						    (__v4df)
3875						    _mm256_setzero_pd (),
3876						    (__mmask8) __U);
3877}
3878
3879extern __inline __m256
3880__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3881_mm256_scalef_ps (__m256 __A, __m256 __B)
3882{
3883  return (__m256) __builtin_ia32_scalefps256_mask ((__v8sf) __A,
3884						   (__v8sf) __B,
3885						   (__v8sf)
3886						   _mm256_setzero_ps (),
3887						   (__mmask8) -1);
3888}
3889
3890extern __inline __m256
3891__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3892_mm256_mask_scalef_ps (__m256 __W, __mmask8 __U, __m256 __A,
3893		       __m256 __B)
3894{
3895  return (__m256) __builtin_ia32_scalefps256_mask ((__v8sf) __A,
3896						   (__v8sf) __B,
3897						   (__v8sf) __W,
3898						   (__mmask8) __U);
3899}
3900
3901extern __inline __m256
3902__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3903_mm256_maskz_scalef_ps (__mmask8 __U, __m256 __A, __m256 __B)
3904{
3905  return (__m256) __builtin_ia32_scalefps256_mask ((__v8sf) __A,
3906						   (__v8sf) __B,
3907						   (__v8sf)
3908						   _mm256_setzero_ps (),
3909						   (__mmask8) __U);
3910}
3911
3912extern __inline __m128d
3913__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3914_mm_scalef_pd (__m128d __A, __m128d __B)
3915{
3916  return (__m128d) __builtin_ia32_scalefpd128_mask ((__v2df) __A,
3917						    (__v2df) __B,
3918						    (__v2df)
3919						    _mm_setzero_pd (),
3920						    (__mmask8) -1);
3921}
3922
3923extern __inline __m128d
3924__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3925_mm_mask_scalef_pd (__m128d __W, __mmask8 __U, __m128d __A,
3926		    __m128d __B)
3927{
3928  return (__m128d) __builtin_ia32_scalefpd128_mask ((__v2df) __A,
3929						    (__v2df) __B,
3930						    (__v2df) __W,
3931						    (__mmask8) __U);
3932}
3933
3934extern __inline __m128d
3935__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3936_mm_maskz_scalef_pd (__mmask8 __U, __m128d __A, __m128d __B)
3937{
3938  return (__m128d) __builtin_ia32_scalefpd128_mask ((__v2df) __A,
3939						    (__v2df) __B,
3940						    (__v2df)
3941						    _mm_setzero_pd (),
3942						    (__mmask8) __U);
3943}
3944
3945extern __inline __m128
3946__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3947_mm_scalef_ps (__m128 __A, __m128 __B)
3948{
3949  return (__m128) __builtin_ia32_scalefps128_mask ((__v4sf) __A,
3950						   (__v4sf) __B,
3951						   (__v4sf)
3952						   _mm_setzero_ps (),
3953						   (__mmask8) -1);
3954}
3955
3956extern __inline __m128
3957__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3958_mm_mask_scalef_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
3959{
3960  return (__m128) __builtin_ia32_scalefps128_mask ((__v4sf) __A,
3961						   (__v4sf) __B,
3962						   (__v4sf) __W,
3963						   (__mmask8) __U);
3964}
3965
3966extern __inline __m128
3967__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3968_mm_maskz_scalef_ps (__mmask8 __U, __m128 __A, __m128 __B)
3969{
3970  return (__m128) __builtin_ia32_scalefps128_mask ((__v4sf) __A,
3971						   (__v4sf) __B,
3972						   (__v4sf)
3973						   _mm_setzero_ps (),
3974						   (__mmask8) __U);
3975}
3976
3977extern __inline __m256d
3978__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3979_mm256_mask_fmadd_pd (__m256d __A, __mmask8 __U, __m256d __B,
3980		      __m256d __C)
3981{
3982  return (__m256d) __builtin_ia32_vfmaddpd256_mask ((__v4df) __A,
3983						    (__v4df) __B,
3984						    (__v4df) __C,
3985						    (__mmask8) __U);
3986}
3987
3988extern __inline __m256d
3989__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3990_mm256_mask3_fmadd_pd (__m256d __A, __m256d __B, __m256d __C,
3991		       __mmask8 __U)
3992{
3993  return (__m256d) __builtin_ia32_vfmaddpd256_mask3 ((__v4df) __A,
3994						     (__v4df) __B,
3995						     (__v4df) __C,
3996						     (__mmask8) __U);
3997}
3998
3999extern __inline __m256d
4000__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4001_mm256_maskz_fmadd_pd (__mmask8 __U, __m256d __A, __m256d __B,
4002		       __m256d __C)
4003{
4004  return (__m256d) __builtin_ia32_vfmaddpd256_maskz ((__v4df) __A,
4005						     (__v4df) __B,
4006						     (__v4df) __C,
4007						     (__mmask8) __U);
4008}
4009
4010extern __inline __m128d
4011__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4012_mm_mask_fmadd_pd (__m128d __A, __mmask8 __U, __m128d __B, __m128d __C)
4013{
4014  return (__m128d) __builtin_ia32_vfmaddpd128_mask ((__v2df) __A,
4015						    (__v2df) __B,
4016						    (__v2df) __C,
4017						    (__mmask8) __U);
4018}
4019
4020extern __inline __m128d
4021__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4022_mm_mask3_fmadd_pd (__m128d __A, __m128d __B, __m128d __C,
4023		    __mmask8 __U)
4024{
4025  return (__m128d) __builtin_ia32_vfmaddpd128_mask3 ((__v2df) __A,
4026						     (__v2df) __B,
4027						     (__v2df) __C,
4028						     (__mmask8) __U);
4029}
4030
4031extern __inline __m128d
4032__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4033_mm_maskz_fmadd_pd (__mmask8 __U, __m128d __A, __m128d __B,
4034		    __m128d __C)
4035{
4036  return (__m128d) __builtin_ia32_vfmaddpd128_maskz ((__v2df) __A,
4037						     (__v2df) __B,
4038						     (__v2df) __C,
4039						     (__mmask8) __U);
4040}
4041
4042extern __inline __m256
4043__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4044_mm256_mask_fmadd_ps (__m256 __A, __mmask8 __U, __m256 __B, __m256 __C)
4045{
4046  return (__m256) __builtin_ia32_vfmaddps256_mask ((__v8sf) __A,
4047						   (__v8sf) __B,
4048						   (__v8sf) __C,
4049						   (__mmask8) __U);
4050}
4051
4052extern __inline __m256
4053__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4054_mm256_mask3_fmadd_ps (__m256 __A, __m256 __B, __m256 __C,
4055		       __mmask8 __U)
4056{
4057  return (__m256) __builtin_ia32_vfmaddps256_mask3 ((__v8sf) __A,
4058						    (__v8sf) __B,
4059						    (__v8sf) __C,
4060						    (__mmask8) __U);
4061}
4062
4063extern __inline __m256
4064__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4065_mm256_maskz_fmadd_ps (__mmask8 __U, __m256 __A, __m256 __B,
4066		       __m256 __C)
4067{
4068  return (__m256) __builtin_ia32_vfmaddps256_maskz ((__v8sf) __A,
4069						    (__v8sf) __B,
4070						    (__v8sf) __C,
4071						    (__mmask8) __U);
4072}
4073
4074extern __inline __m128
4075__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4076_mm_mask_fmadd_ps (__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
4077{
4078  return (__m128) __builtin_ia32_vfmaddps128_mask ((__v4sf) __A,
4079						   (__v4sf) __B,
4080						   (__v4sf) __C,
4081						   (__mmask8) __U);
4082}
4083
4084extern __inline __m128
4085__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4086_mm_mask3_fmadd_ps (__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
4087{
4088  return (__m128) __builtin_ia32_vfmaddps128_mask3 ((__v4sf) __A,
4089						    (__v4sf) __B,
4090						    (__v4sf) __C,
4091						    (__mmask8) __U);
4092}
4093
4094extern __inline __m128
4095__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4096_mm_maskz_fmadd_ps (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
4097{
4098  return (__m128) __builtin_ia32_vfmaddps128_maskz ((__v4sf) __A,
4099						    (__v4sf) __B,
4100						    (__v4sf) __C,
4101						    (__mmask8) __U);
4102}
4103
4104extern __inline __m256d
4105__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4106_mm256_mask_fmsub_pd (__m256d __A, __mmask8 __U, __m256d __B,
4107		      __m256d __C)
4108{
4109  return (__m256d) __builtin_ia32_vfmaddpd256_mask ((__v4df) __A,
4110						    (__v4df) __B,
4111						    -(__v4df) __C,
4112						    (__mmask8) __U);
4113}
4114
4115extern __inline __m256d
4116__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4117_mm256_mask3_fmsub_pd (__m256d __A, __m256d __B, __m256d __C,
4118		       __mmask8 __U)
4119{
4120  return (__m256d) __builtin_ia32_vfmsubpd256_mask3 ((__v4df) __A,
4121						     (__v4df) __B,
4122						     (__v4df) __C,
4123						     (__mmask8) __U);
4124}
4125
4126extern __inline __m256d
4127__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4128_mm256_maskz_fmsub_pd (__mmask8 __U, __m256d __A, __m256d __B,
4129		       __m256d __C)
4130{
4131  return (__m256d) __builtin_ia32_vfmaddpd256_maskz ((__v4df) __A,
4132						     (__v4df) __B,
4133						     -(__v4df) __C,
4134						     (__mmask8) __U);
4135}
4136
4137extern __inline __m128d
4138__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4139_mm_mask_fmsub_pd (__m128d __A, __mmask8 __U, __m128d __B, __m128d __C)
4140{
4141  return (__m128d) __builtin_ia32_vfmaddpd128_mask ((__v2df) __A,
4142						    (__v2df) __B,
4143						    -(__v2df) __C,
4144						    (__mmask8) __U);
4145}
4146
4147extern __inline __m128d
4148__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4149_mm_mask3_fmsub_pd (__m128d __A, __m128d __B, __m128d __C,
4150		    __mmask8 __U)
4151{
4152  return (__m128d) __builtin_ia32_vfmsubpd128_mask3 ((__v2df) __A,
4153						     (__v2df) __B,
4154						     (__v2df) __C,
4155						     (__mmask8) __U);
4156}
4157
4158extern __inline __m128d
4159__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4160_mm_maskz_fmsub_pd (__mmask8 __U, __m128d __A, __m128d __B,
4161		    __m128d __C)
4162{
4163  return (__m128d) __builtin_ia32_vfmaddpd128_maskz ((__v2df) __A,
4164						     (__v2df) __B,
4165						     -(__v2df) __C,
4166						     (__mmask8) __U);
4167}
4168
4169extern __inline __m256
4170__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4171_mm256_mask_fmsub_ps (__m256 __A, __mmask8 __U, __m256 __B, __m256 __C)
4172{
4173  return (__m256) __builtin_ia32_vfmaddps256_mask ((__v8sf) __A,
4174						   (__v8sf) __B,
4175						   -(__v8sf) __C,
4176						   (__mmask8) __U);
4177}
4178
4179extern __inline __m256
4180__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4181_mm256_mask3_fmsub_ps (__m256 __A, __m256 __B, __m256 __C,
4182		       __mmask8 __U)
4183{
4184  return (__m256) __builtin_ia32_vfmsubps256_mask3 ((__v8sf) __A,
4185						    (__v8sf) __B,
4186						    (__v8sf) __C,
4187						    (__mmask8) __U);
4188}
4189
4190extern __inline __m256
4191__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4192_mm256_maskz_fmsub_ps (__mmask8 __U, __m256 __A, __m256 __B,
4193		       __m256 __C)
4194{
4195  return (__m256) __builtin_ia32_vfmaddps256_maskz ((__v8sf) __A,
4196						    (__v8sf) __B,
4197						    -(__v8sf) __C,
4198						    (__mmask8) __U);
4199}
4200
4201extern __inline __m128
4202__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4203_mm_mask_fmsub_ps (__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
4204{
4205  return (__m128) __builtin_ia32_vfmaddps128_mask ((__v4sf) __A,
4206						   (__v4sf) __B,
4207						   -(__v4sf) __C,
4208						   (__mmask8) __U);
4209}
4210
4211extern __inline __m128
4212__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4213_mm_mask3_fmsub_ps (__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
4214{
4215  return (__m128) __builtin_ia32_vfmsubps128_mask3 ((__v4sf) __A,
4216						    (__v4sf) __B,
4217						    (__v4sf) __C,
4218						    (__mmask8) __U);
4219}
4220
4221extern __inline __m128
4222__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4223_mm_maskz_fmsub_ps (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
4224{
4225  return (__m128) __builtin_ia32_vfmaddps128_maskz ((__v4sf) __A,
4226						    (__v4sf) __B,
4227						    -(__v4sf) __C,
4228						    (__mmask8) __U);
4229}
4230
4231extern __inline __m256d
4232__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4233_mm256_mask_fmaddsub_pd (__m256d __A, __mmask8 __U, __m256d __B,
4234			 __m256d __C)
4235{
4236  return (__m256d) __builtin_ia32_vfmaddsubpd256_mask ((__v4df) __A,
4237						       (__v4df) __B,
4238						       (__v4df) __C,
4239						       (__mmask8) __U);
4240}
4241
4242extern __inline __m256d
4243__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4244_mm256_mask3_fmaddsub_pd (__m256d __A, __m256d __B, __m256d __C,
4245			  __mmask8 __U)
4246{
4247  return (__m256d) __builtin_ia32_vfmaddsubpd256_mask3 ((__v4df) __A,
4248							(__v4df) __B,
4249							(__v4df) __C,
4250							(__mmask8)
4251							__U);
4252}
4253
4254extern __inline __m256d
4255__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4256_mm256_maskz_fmaddsub_pd (__mmask8 __U, __m256d __A, __m256d __B,
4257			  __m256d __C)
4258{
4259  return (__m256d) __builtin_ia32_vfmaddsubpd256_maskz ((__v4df) __A,
4260							(__v4df) __B,
4261							(__v4df) __C,
4262							(__mmask8)
4263							__U);
4264}
4265
4266extern __inline __m128d
4267__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4268_mm_mask_fmaddsub_pd (__m128d __A, __mmask8 __U, __m128d __B,
4269		      __m128d __C)
4270{
4271  return (__m128d) __builtin_ia32_vfmaddsubpd128_mask ((__v2df) __A,
4272						       (__v2df) __B,
4273						       (__v2df) __C,
4274						       (__mmask8) __U);
4275}
4276
4277extern __inline __m128d
4278__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4279_mm_mask3_fmaddsub_pd (__m128d __A, __m128d __B, __m128d __C,
4280		       __mmask8 __U)
4281{
4282  return (__m128d) __builtin_ia32_vfmaddsubpd128_mask3 ((__v2df) __A,
4283							(__v2df) __B,
4284							(__v2df) __C,
4285							(__mmask8)
4286							__U);
4287}
4288
4289extern __inline __m128d
4290__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4291_mm_maskz_fmaddsub_pd (__mmask8 __U, __m128d __A, __m128d __B,
4292		       __m128d __C)
4293{
4294  return (__m128d) __builtin_ia32_vfmaddsubpd128_maskz ((__v2df) __A,
4295							(__v2df) __B,
4296							(__v2df) __C,
4297							(__mmask8)
4298							__U);
4299}
4300
4301extern __inline __m256
4302__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4303_mm256_mask_fmaddsub_ps (__m256 __A, __mmask8 __U, __m256 __B,
4304			 __m256 __C)
4305{
4306  return (__m256) __builtin_ia32_vfmaddsubps256_mask ((__v8sf) __A,
4307						      (__v8sf) __B,
4308						      (__v8sf) __C,
4309						      (__mmask8) __U);
4310}
4311
4312extern __inline __m256
4313__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4314_mm256_mask3_fmaddsub_ps (__m256 __A, __m256 __B, __m256 __C,
4315			  __mmask8 __U)
4316{
4317  return (__m256) __builtin_ia32_vfmaddsubps256_mask3 ((__v8sf) __A,
4318						       (__v8sf) __B,
4319						       (__v8sf) __C,
4320						       (__mmask8) __U);
4321}
4322
4323extern __inline __m256
4324__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4325_mm256_maskz_fmaddsub_ps (__mmask8 __U, __m256 __A, __m256 __B,
4326			  __m256 __C)
4327{
4328  return (__m256) __builtin_ia32_vfmaddsubps256_maskz ((__v8sf) __A,
4329						       (__v8sf) __B,
4330						       (__v8sf) __C,
4331						       (__mmask8) __U);
4332}
4333
4334extern __inline __m128
4335__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4336_mm_mask_fmaddsub_ps (__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
4337{
4338  return (__m128) __builtin_ia32_vfmaddsubps128_mask ((__v4sf) __A,
4339						      (__v4sf) __B,
4340						      (__v4sf) __C,
4341						      (__mmask8) __U);
4342}
4343
4344extern __inline __m128
4345__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4346_mm_mask3_fmaddsub_ps (__m128 __A, __m128 __B, __m128 __C,
4347		       __mmask8 __U)
4348{
4349  return (__m128) __builtin_ia32_vfmaddsubps128_mask3 ((__v4sf) __A,
4350						       (__v4sf) __B,
4351						       (__v4sf) __C,
4352						       (__mmask8) __U);
4353}
4354
4355extern __inline __m128
4356__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4357_mm_maskz_fmaddsub_ps (__mmask8 __U, __m128 __A, __m128 __B,
4358		       __m128 __C)
4359{
4360  return (__m128) __builtin_ia32_vfmaddsubps128_maskz ((__v4sf) __A,
4361						       (__v4sf) __B,
4362						       (__v4sf) __C,
4363						       (__mmask8) __U);
4364}
4365
4366extern __inline __m256d
4367__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4368_mm256_mask_fmsubadd_pd (__m256d __A, __mmask8 __U, __m256d __B,
4369			 __m256d __C)
4370{
4371  return (__m256d) __builtin_ia32_vfmaddsubpd256_mask ((__v4df) __A,
4372						       (__v4df) __B,
4373						       -(__v4df) __C,
4374						       (__mmask8) __U);
4375}
4376
4377extern __inline __m256d
4378__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4379_mm256_mask3_fmsubadd_pd (__m256d __A, __m256d __B, __m256d __C,
4380			  __mmask8 __U)
4381{
4382  return (__m256d) __builtin_ia32_vfmsubaddpd256_mask3 ((__v4df) __A,
4383							(__v4df) __B,
4384							(__v4df) __C,
4385							(__mmask8)
4386							__U);
4387}
4388
4389extern __inline __m256d
4390__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4391_mm256_maskz_fmsubadd_pd (__mmask8 __U, __m256d __A, __m256d __B,
4392			  __m256d __C)
4393{
4394  return (__m256d) __builtin_ia32_vfmaddsubpd256_maskz ((__v4df) __A,
4395							(__v4df) __B,
4396							-(__v4df) __C,
4397							(__mmask8)
4398							__U);
4399}
4400
4401extern __inline __m128d
4402__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4403_mm_mask_fmsubadd_pd (__m128d __A, __mmask8 __U, __m128d __B,
4404		      __m128d __C)
4405{
4406  return (__m128d) __builtin_ia32_vfmaddsubpd128_mask ((__v2df) __A,
4407						       (__v2df) __B,
4408						       -(__v2df) __C,
4409						       (__mmask8) __U);
4410}
4411
4412extern __inline __m128d
4413__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4414_mm_mask3_fmsubadd_pd (__m128d __A, __m128d __B, __m128d __C,
4415		       __mmask8 __U)
4416{
4417  return (__m128d) __builtin_ia32_vfmsubaddpd128_mask3 ((__v2df) __A,
4418							(__v2df) __B,
4419							(__v2df) __C,
4420							(__mmask8)
4421							__U);
4422}
4423
4424extern __inline __m128d
4425__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4426_mm_maskz_fmsubadd_pd (__mmask8 __U, __m128d __A, __m128d __B,
4427		       __m128d __C)
4428{
4429  return (__m128d) __builtin_ia32_vfmaddsubpd128_maskz ((__v2df) __A,
4430							(__v2df) __B,
4431							-(__v2df) __C,
4432							(__mmask8)
4433							__U);
4434}
4435
4436extern __inline __m256
4437__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4438_mm256_mask_fmsubadd_ps (__m256 __A, __mmask8 __U, __m256 __B,
4439			 __m256 __C)
4440{
4441  return (__m256) __builtin_ia32_vfmaddsubps256_mask ((__v8sf) __A,
4442						      (__v8sf) __B,
4443						      -(__v8sf) __C,
4444						      (__mmask8) __U);
4445}
4446
4447extern __inline __m256
4448__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4449_mm256_mask3_fmsubadd_ps (__m256 __A, __m256 __B, __m256 __C,
4450			  __mmask8 __U)
4451{
4452  return (__m256) __builtin_ia32_vfmsubaddps256_mask3 ((__v8sf) __A,
4453						       (__v8sf) __B,
4454						       (__v8sf) __C,
4455						       (__mmask8) __U);
4456}
4457
4458extern __inline __m256
4459__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4460_mm256_maskz_fmsubadd_ps (__mmask8 __U, __m256 __A, __m256 __B,
4461			  __m256 __C)
4462{
4463  return (__m256) __builtin_ia32_vfmaddsubps256_maskz ((__v8sf) __A,
4464						       (__v8sf) __B,
4465						       -(__v8sf) __C,
4466						       (__mmask8) __U);
4467}
4468
4469extern __inline __m128
4470__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4471_mm_mask_fmsubadd_ps (__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
4472{
4473  return (__m128) __builtin_ia32_vfmaddsubps128_mask ((__v4sf) __A,
4474						      (__v4sf) __B,
4475						      -(__v4sf) __C,
4476						      (__mmask8) __U);
4477}
4478
4479extern __inline __m128
4480__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4481_mm_mask3_fmsubadd_ps (__m128 __A, __m128 __B, __m128 __C,
4482		       __mmask8 __U)
4483{
4484  return (__m128) __builtin_ia32_vfmsubaddps128_mask3 ((__v4sf) __A,
4485						       (__v4sf) __B,
4486						       (__v4sf) __C,
4487						       (__mmask8) __U);
4488}
4489
4490extern __inline __m128
4491__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4492_mm_maskz_fmsubadd_ps (__mmask8 __U, __m128 __A, __m128 __B,
4493		       __m128 __C)
4494{
4495  return (__m128) __builtin_ia32_vfmaddsubps128_maskz ((__v4sf) __A,
4496						       (__v4sf) __B,
4497						       -(__v4sf) __C,
4498						       (__mmask8) __U);
4499}
4500
4501extern __inline __m256d
4502__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4503_mm256_mask_fnmadd_pd (__m256d __A, __mmask8 __U, __m256d __B,
4504		       __m256d __C)
4505{
4506  return (__m256d) __builtin_ia32_vfnmaddpd256_mask ((__v4df) __A,
4507						     (__v4df) __B,
4508						     (__v4df) __C,
4509						     (__mmask8) __U);
4510}
4511
4512extern __inline __m256d
4513__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4514_mm256_mask3_fnmadd_pd (__m256d __A, __m256d __B, __m256d __C,
4515			__mmask8 __U)
4516{
4517  return (__m256d) __builtin_ia32_vfmaddpd256_mask3 (-(__v4df) __A,
4518						     (__v4df) __B,
4519						     (__v4df) __C,
4520						     (__mmask8) __U);
4521}
4522
4523extern __inline __m256d
4524__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4525_mm256_maskz_fnmadd_pd (__mmask8 __U, __m256d __A, __m256d __B,
4526			__m256d __C)
4527{
4528  return (__m256d) __builtin_ia32_vfmaddpd256_maskz (-(__v4df) __A,
4529						     (__v4df) __B,
4530						     (__v4df) __C,
4531						     (__mmask8) __U);
4532}
4533
4534extern __inline __m128d
4535__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4536_mm_mask_fnmadd_pd (__m128d __A, __mmask8 __U, __m128d __B,
4537		    __m128d __C)
4538{
4539  return (__m128d) __builtin_ia32_vfnmaddpd128_mask ((__v2df) __A,
4540						     (__v2df) __B,
4541						     (__v2df) __C,
4542						     (__mmask8) __U);
4543}
4544
4545extern __inline __m128d
4546__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4547_mm_mask3_fnmadd_pd (__m128d __A, __m128d __B, __m128d __C,
4548		     __mmask8 __U)
4549{
4550  return (__m128d) __builtin_ia32_vfmaddpd128_mask3 (-(__v2df) __A,
4551						     (__v2df) __B,
4552						     (__v2df) __C,
4553						     (__mmask8) __U);
4554}
4555
4556extern __inline __m128d
4557__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4558_mm_maskz_fnmadd_pd (__mmask8 __U, __m128d __A, __m128d __B,
4559		     __m128d __C)
4560{
4561  return (__m128d) __builtin_ia32_vfmaddpd128_maskz (-(__v2df) __A,
4562						     (__v2df) __B,
4563						     (__v2df) __C,
4564						     (__mmask8) __U);
4565}
4566
4567extern __inline __m256
4568__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4569_mm256_mask_fnmadd_ps (__m256 __A, __mmask8 __U, __m256 __B,
4570		       __m256 __C)
4571{
4572  return (__m256) __builtin_ia32_vfnmaddps256_mask ((__v8sf) __A,
4573						    (__v8sf) __B,
4574						    (__v8sf) __C,
4575						    (__mmask8) __U);
4576}
4577
4578extern __inline __m256
4579__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4580_mm256_mask3_fnmadd_ps (__m256 __A, __m256 __B, __m256 __C,
4581			__mmask8 __U)
4582{
4583  return (__m256) __builtin_ia32_vfmaddps256_mask3 (-(__v8sf) __A,
4584						    (__v8sf) __B,
4585						    (__v8sf) __C,
4586						    (__mmask8) __U);
4587}
4588
4589extern __inline __m256
4590__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4591_mm256_maskz_fnmadd_ps (__mmask8 __U, __m256 __A, __m256 __B,
4592			__m256 __C)
4593{
4594  return (__m256) __builtin_ia32_vfmaddps256_maskz (-(__v8sf) __A,
4595						    (__v8sf) __B,
4596						    (__v8sf) __C,
4597						    (__mmask8) __U);
4598}
4599
4600extern __inline __m128
4601__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4602_mm_mask_fnmadd_ps (__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
4603{
4604  return (__m128) __builtin_ia32_vfnmaddps128_mask ((__v4sf) __A,
4605						    (__v4sf) __B,
4606						    (__v4sf) __C,
4607						    (__mmask8) __U);
4608}
4609
4610extern __inline __m128
4611__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4612_mm_mask3_fnmadd_ps (__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
4613{
4614  return (__m128) __builtin_ia32_vfmaddps128_mask3 (-(__v4sf) __A,
4615						    (__v4sf) __B,
4616						    (__v4sf) __C,
4617						    (__mmask8) __U);
4618}
4619
4620extern __inline __m128
4621__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4622_mm_maskz_fnmadd_ps (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
4623{
4624  return (__m128) __builtin_ia32_vfmaddps128_maskz (-(__v4sf) __A,
4625						    (__v4sf) __B,
4626						    (__v4sf) __C,
4627						    (__mmask8) __U);
4628}
4629
4630extern __inline __m256d
4631__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4632_mm256_mask_fnmsub_pd (__m256d __A, __mmask8 __U, __m256d __B,
4633		       __m256d __C)
4634{
4635  return (__m256d) __builtin_ia32_vfnmsubpd256_mask ((__v4df) __A,
4636						     (__v4df) __B,
4637						     (__v4df) __C,
4638						     (__mmask8) __U);
4639}
4640
4641extern __inline __m256d
4642__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4643_mm256_mask3_fnmsub_pd (__m256d __A, __m256d __B, __m256d __C,
4644			__mmask8 __U)
4645{
4646  return (__m256d) __builtin_ia32_vfnmsubpd256_mask3 ((__v4df) __A,
4647						      (__v4df) __B,
4648						      (__v4df) __C,
4649						      (__mmask8) __U);
4650}
4651
4652extern __inline __m256d
4653__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4654_mm256_maskz_fnmsub_pd (__mmask8 __U, __m256d __A, __m256d __B,
4655			__m256d __C)
4656{
4657  return (__m256d) __builtin_ia32_vfmaddpd256_maskz (-(__v4df) __A,
4658						     (__v4df) __B,
4659						     -(__v4df) __C,
4660						     (__mmask8) __U);
4661}
4662
4663extern __inline __m128d
4664__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4665_mm_mask_fnmsub_pd (__m128d __A, __mmask8 __U, __m128d __B,
4666		    __m128d __C)
4667{
4668  return (__m128d) __builtin_ia32_vfnmsubpd128_mask ((__v2df) __A,
4669						     (__v2df) __B,
4670						     (__v2df) __C,
4671						     (__mmask8) __U);
4672}
4673
4674extern __inline __m128d
4675__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4676_mm_mask3_fnmsub_pd (__m128d __A, __m128d __B, __m128d __C,
4677		     __mmask8 __U)
4678{
4679  return (__m128d) __builtin_ia32_vfnmsubpd128_mask3 ((__v2df) __A,
4680						      (__v2df) __B,
4681						      (__v2df) __C,
4682						      (__mmask8) __U);
4683}
4684
4685extern __inline __m128d
4686__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4687_mm_maskz_fnmsub_pd (__mmask8 __U, __m128d __A, __m128d __B,
4688		     __m128d __C)
4689{
4690  return (__m128d) __builtin_ia32_vfmaddpd128_maskz (-(__v2df) __A,
4691						     (__v2df) __B,
4692						     -(__v2df) __C,
4693						     (__mmask8) __U);
4694}
4695
4696extern __inline __m256
4697__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4698_mm256_mask_fnmsub_ps (__m256 __A, __mmask8 __U, __m256 __B,
4699		       __m256 __C)
4700{
4701  return (__m256) __builtin_ia32_vfnmsubps256_mask ((__v8sf) __A,
4702						    (__v8sf) __B,
4703						    (__v8sf) __C,
4704						    (__mmask8) __U);
4705}
4706
4707extern __inline __m256
4708__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4709_mm256_mask3_fnmsub_ps (__m256 __A, __m256 __B, __m256 __C,
4710			__mmask8 __U)
4711{
4712  return (__m256) __builtin_ia32_vfnmsubps256_mask3 ((__v8sf) __A,
4713						     (__v8sf) __B,
4714						     (__v8sf) __C,
4715						     (__mmask8) __U);
4716}
4717
4718extern __inline __m256
4719__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4720_mm256_maskz_fnmsub_ps (__mmask8 __U, __m256 __A, __m256 __B,
4721			__m256 __C)
4722{
4723  return (__m256) __builtin_ia32_vfmaddps256_maskz (-(__v8sf) __A,
4724						    (__v8sf) __B,
4725						    -(__v8sf) __C,
4726						    (__mmask8) __U);
4727}
4728
4729extern __inline __m128
4730__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4731_mm_mask_fnmsub_ps (__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
4732{
4733  return (__m128) __builtin_ia32_vfnmsubps128_mask ((__v4sf) __A,
4734						    (__v4sf) __B,
4735						    (__v4sf) __C,
4736						    (__mmask8) __U);
4737}
4738
4739extern __inline __m128
4740__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4741_mm_mask3_fnmsub_ps (__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
4742{
4743  return (__m128) __builtin_ia32_vfnmsubps128_mask3 ((__v4sf) __A,
4744						     (__v4sf) __B,
4745						     (__v4sf) __C,
4746						     (__mmask8) __U);
4747}
4748
4749extern __inline __m128
4750__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4751_mm_maskz_fnmsub_ps (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
4752{
4753  return (__m128) __builtin_ia32_vfmaddps128_maskz (-(__v4sf) __A,
4754						    (__v4sf) __B,
4755						    -(__v4sf) __C,
4756						    (__mmask8) __U);
4757}
4758
4759extern __inline __m128i
4760__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4761_mm_mask_and_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
4762		    __m128i __B)
4763{
4764  return (__m128i) __builtin_ia32_pandd128_mask ((__v4si) __A,
4765						 (__v4si) __B,
4766						 (__v4si) __W,
4767						 (__mmask8) __U);
4768}
4769
4770extern __inline __m128i
4771__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4772_mm_maskz_and_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
4773{
4774  return (__m128i) __builtin_ia32_pandd128_mask ((__v4si) __A,
4775						 (__v4si) __B,
4776						 (__v4si)
4777						 _mm_setzero_si128 (),
4778						 (__mmask8) __U);
4779}
4780
4781extern __inline __m256i
4782__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4783_mm256_mask_andnot_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
4784			  __m256i __B)
4785{
4786  return (__m256i) __builtin_ia32_pandnd256_mask ((__v8si) __A,
4787						  (__v8si) __B,
4788						  (__v8si) __W,
4789						  (__mmask8) __U);
4790}
4791
4792extern __inline __m256i
4793__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4794_mm256_maskz_andnot_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
4795{
4796  return (__m256i) __builtin_ia32_pandnd256_mask ((__v8si) __A,
4797						  (__v8si) __B,
4798						  (__v8si)
4799						  _mm256_setzero_si256 (),
4800						  (__mmask8) __U);
4801}
4802
4803extern __inline __m128i
4804__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4805_mm_mask_andnot_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
4806		       __m128i __B)
4807{
4808  return (__m128i) __builtin_ia32_pandnd128_mask ((__v4si) __A,
4809						  (__v4si) __B,
4810						  (__v4si) __W,
4811						  (__mmask8) __U);
4812}
4813
4814extern __inline __m128i
4815__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4816_mm_maskz_andnot_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
4817{
4818  return (__m128i) __builtin_ia32_pandnd128_mask ((__v4si) __A,
4819						  (__v4si) __B,
4820						  (__v4si)
4821						  _mm_setzero_si128 (),
4822						  (__mmask8) __U);
4823}
4824
4825extern __inline __m256i
4826__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4827_mm256_mask_or_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
4828		      __m256i __B)
4829{
4830  return (__m256i) __builtin_ia32_pord256_mask ((__v8si) __A,
4831						(__v8si) __B,
4832						(__v8si) __W,
4833						(__mmask8) __U);
4834}
4835
4836extern __inline __m256i
4837__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4838_mm256_maskz_or_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
4839{
4840  return (__m256i) __builtin_ia32_pord256_mask ((__v8si) __A,
4841						(__v8si) __B,
4842						(__v8si)
4843						_mm256_setzero_si256 (),
4844						(__mmask8) __U);
4845}
4846
4847extern __inline __m128i
4848__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4849_mm_mask_or_epi32 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
4850{
4851  return (__m128i) __builtin_ia32_pord128_mask ((__v4si) __A,
4852						(__v4si) __B,
4853						(__v4si) __W,
4854						(__mmask8) __U);
4855}
4856
4857extern __inline __m128i
4858__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4859_mm_maskz_or_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
4860{
4861  return (__m128i) __builtin_ia32_pord128_mask ((__v4si) __A,
4862						(__v4si) __B,
4863						(__v4si)
4864						_mm_setzero_si128 (),
4865						(__mmask8) __U);
4866}
4867
4868extern __inline __m256i
4869__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4870_mm256_mask_xor_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
4871		       __m256i __B)
4872{
4873  return (__m256i) __builtin_ia32_pxord256_mask ((__v8si) __A,
4874						 (__v8si) __B,
4875						 (__v8si) __W,
4876						 (__mmask8) __U);
4877}
4878
4879extern __inline __m256i
4880__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4881_mm256_maskz_xor_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
4882{
4883  return (__m256i) __builtin_ia32_pxord256_mask ((__v8si) __A,
4884						 (__v8si) __B,
4885						 (__v8si)
4886						 _mm256_setzero_si256 (),
4887						 (__mmask8) __U);
4888}
4889
4890extern __inline __m128i
4891__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4892_mm_mask_xor_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
4893		    __m128i __B)
4894{
4895  return (__m128i) __builtin_ia32_pxord128_mask ((__v4si) __A,
4896						 (__v4si) __B,
4897						 (__v4si) __W,
4898						 (__mmask8) __U);
4899}
4900
4901extern __inline __m128i
4902__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4903_mm_maskz_xor_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
4904{
4905  return (__m128i) __builtin_ia32_pxord128_mask ((__v4si) __A,
4906						 (__v4si) __B,
4907						 (__v4si)
4908						 _mm_setzero_si128 (),
4909						 (__mmask8) __U);
4910}
4911
4912extern __inline __m128
4913__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4914_mm_mask_cvtpd_ps (__m128 __W, __mmask8 __U, __m128d __A)
4915{
4916  return (__m128) __builtin_ia32_cvtpd2ps_mask ((__v2df) __A,
4917						(__v4sf) __W,
4918						(__mmask8) __U);
4919}
4920
4921extern __inline __m128
4922__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4923_mm_maskz_cvtpd_ps (__mmask8 __U, __m128d __A)
4924{
4925  return (__m128) __builtin_ia32_cvtpd2ps_mask ((__v2df) __A,
4926						(__v4sf)
4927						_mm_setzero_ps (),
4928						(__mmask8) __U);
4929}
4930
4931extern __inline __m128
4932__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4933_mm256_mask_cvtpd_ps (__m128 __W, __mmask8 __U, __m256d __A)
4934{
4935  return (__m128) __builtin_ia32_cvtpd2ps256_mask ((__v4df) __A,
4936						   (__v4sf) __W,
4937						   (__mmask8) __U);
4938}
4939
4940extern __inline __m128
4941__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4942_mm256_maskz_cvtpd_ps (__mmask8 __U, __m256d __A)
4943{
4944  return (__m128) __builtin_ia32_cvtpd2ps256_mask ((__v4df) __A,
4945						   (__v4sf)
4946						   _mm_setzero_ps (),
4947						   (__mmask8) __U);
4948}
4949
4950extern __inline __m256i
4951__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4952_mm256_mask_cvtps_epi32 (__m256i __W, __mmask8 __U, __m256 __A)
4953{
4954  return (__m256i) __builtin_ia32_cvtps2dq256_mask ((__v8sf) __A,
4955						    (__v8si) __W,
4956						    (__mmask8) __U);
4957}
4958
4959extern __inline __m256i
4960__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4961_mm256_maskz_cvtps_epi32 (__mmask8 __U, __m256 __A)
4962{
4963  return (__m256i) __builtin_ia32_cvtps2dq256_mask ((__v8sf) __A,
4964						    (__v8si)
4965						    _mm256_setzero_si256 (),
4966						    (__mmask8) __U);
4967}
4968
4969extern __inline __m128i
4970__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4971_mm_mask_cvtps_epi32 (__m128i __W, __mmask8 __U, __m128 __A)
4972{
4973  return (__m128i) __builtin_ia32_cvtps2dq128_mask ((__v4sf) __A,
4974						    (__v4si) __W,
4975						    (__mmask8) __U);
4976}
4977
4978extern __inline __m128i
4979__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4980_mm_maskz_cvtps_epi32 (__mmask8 __U, __m128 __A)
4981{
4982  return (__m128i) __builtin_ia32_cvtps2dq128_mask ((__v4sf) __A,
4983						    (__v4si)
4984						    _mm_setzero_si128 (),
4985						    (__mmask8) __U);
4986}
4987
4988extern __inline __m256i
4989__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4990_mm256_cvtps_epu32 (__m256 __A)
4991{
4992  return (__m256i) __builtin_ia32_cvtps2udq256_mask ((__v8sf) __A,
4993						     (__v8si)
4994						     _mm256_setzero_si256 (),
4995						     (__mmask8) -1);
4996}
4997
4998extern __inline __m256i
4999__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5000_mm256_mask_cvtps_epu32 (__m256i __W, __mmask8 __U, __m256 __A)
5001{
5002  return (__m256i) __builtin_ia32_cvtps2udq256_mask ((__v8sf) __A,
5003						     (__v8si) __W,
5004						     (__mmask8) __U);
5005}
5006
5007extern __inline __m256i
5008__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5009_mm256_maskz_cvtps_epu32 (__mmask8 __U, __m256 __A)
5010{
5011  return (__m256i) __builtin_ia32_cvtps2udq256_mask ((__v8sf) __A,
5012						     (__v8si)
5013						     _mm256_setzero_si256 (),
5014						     (__mmask8) __U);
5015}
5016
5017extern __inline __m128i
5018__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5019_mm_cvtps_epu32 (__m128 __A)
5020{
5021  return (__m128i) __builtin_ia32_cvtps2udq128_mask ((__v4sf) __A,
5022						     (__v4si)
5023						     _mm_setzero_si128 (),
5024						     (__mmask8) -1);
5025}
5026
5027extern __inline __m128i
5028__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5029_mm_mask_cvtps_epu32 (__m128i __W, __mmask8 __U, __m128 __A)
5030{
5031  return (__m128i) __builtin_ia32_cvtps2udq128_mask ((__v4sf) __A,
5032						     (__v4si) __W,
5033						     (__mmask8) __U);
5034}
5035
5036extern __inline __m128i
5037__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5038_mm_maskz_cvtps_epu32 (__mmask8 __U, __m128 __A)
5039{
5040  return (__m128i) __builtin_ia32_cvtps2udq128_mask ((__v4sf) __A,
5041						     (__v4si)
5042						     _mm_setzero_si128 (),
5043						     (__mmask8) __U);
5044}
5045
5046extern __inline __m256d
5047__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5048_mm256_mask_movedup_pd (__m256d __W, __mmask8 __U, __m256d __A)
5049{
5050  return (__m256d) __builtin_ia32_movddup256_mask ((__v4df) __A,
5051						   (__v4df) __W,
5052						   (__mmask8) __U);
5053}
5054
5055extern __inline __m256d
5056__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5057_mm256_maskz_movedup_pd (__mmask8 __U, __m256d __A)
5058{
5059  return (__m256d) __builtin_ia32_movddup256_mask ((__v4df) __A,
5060						   (__v4df)
5061						   _mm256_setzero_pd (),
5062						   (__mmask8) __U);
5063}
5064
5065extern __inline __m128d
5066__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5067_mm_mask_movedup_pd (__m128d __W, __mmask8 __U, __m128d __A)
5068{
5069  return (__m128d) __builtin_ia32_movddup128_mask ((__v2df) __A,
5070						   (__v2df) __W,
5071						   (__mmask8) __U);
5072}
5073
5074extern __inline __m128d
5075__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5076_mm_maskz_movedup_pd (__mmask8 __U, __m128d __A)
5077{
5078  return (__m128d) __builtin_ia32_movddup128_mask ((__v2df) __A,
5079						   (__v2df)
5080						   _mm_setzero_pd (),
5081						   (__mmask8) __U);
5082}
5083
5084extern __inline __m256
5085__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5086_mm256_mask_movehdup_ps (__m256 __W, __mmask8 __U, __m256 __A)
5087{
5088  return (__m256) __builtin_ia32_movshdup256_mask ((__v8sf) __A,
5089						   (__v8sf) __W,
5090						   (__mmask8) __U);
5091}
5092
5093extern __inline __m256
5094__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5095_mm256_maskz_movehdup_ps (__mmask8 __U, __m256 __A)
5096{
5097  return (__m256) __builtin_ia32_movshdup256_mask ((__v8sf) __A,
5098						   (__v8sf)
5099						   _mm256_setzero_ps (),
5100						   (__mmask8) __U);
5101}
5102
5103extern __inline __m128
5104__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5105_mm_mask_movehdup_ps (__m128 __W, __mmask8 __U, __m128 __A)
5106{
5107  return (__m128) __builtin_ia32_movshdup128_mask ((__v4sf) __A,
5108						   (__v4sf) __W,
5109						   (__mmask8) __U);
5110}
5111
5112extern __inline __m128
5113__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5114_mm_maskz_movehdup_ps (__mmask8 __U, __m128 __A)
5115{
5116  return (__m128) __builtin_ia32_movshdup128_mask ((__v4sf) __A,
5117						   (__v4sf)
5118						   _mm_setzero_ps (),
5119						   (__mmask8) __U);
5120}
5121
5122extern __inline __m256
5123__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5124_mm256_mask_moveldup_ps (__m256 __W, __mmask8 __U, __m256 __A)
5125{
5126  return (__m256) __builtin_ia32_movsldup256_mask ((__v8sf) __A,
5127						   (__v8sf) __W,
5128						   (__mmask8) __U);
5129}
5130
5131extern __inline __m256
5132__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5133_mm256_maskz_moveldup_ps (__mmask8 __U, __m256 __A)
5134{
5135  return (__m256) __builtin_ia32_movsldup256_mask ((__v8sf) __A,
5136						   (__v8sf)
5137						   _mm256_setzero_ps (),
5138						   (__mmask8) __U);
5139}
5140
5141extern __inline __m128
5142__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5143_mm_mask_moveldup_ps (__m128 __W, __mmask8 __U, __m128 __A)
5144{
5145  return (__m128) __builtin_ia32_movsldup128_mask ((__v4sf) __A,
5146						   (__v4sf) __W,
5147						   (__mmask8) __U);
5148}
5149
5150extern __inline __m128
5151__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5152_mm_maskz_moveldup_ps (__mmask8 __U, __m128 __A)
5153{
5154  return (__m128) __builtin_ia32_movsldup128_mask ((__v4sf) __A,
5155						   (__v4sf)
5156						   _mm_setzero_ps (),
5157						   (__mmask8) __U);
5158}
5159
5160extern __inline __m128i
5161__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5162_mm_mask_unpackhi_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
5163			 __m128i __B)
5164{
5165  return (__m128i) __builtin_ia32_punpckhdq128_mask ((__v4si) __A,
5166						     (__v4si) __B,
5167						     (__v4si) __W,
5168						     (__mmask8) __U);
5169}
5170
5171extern __inline __m128i
5172__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5173_mm_maskz_unpackhi_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
5174{
5175  return (__m128i) __builtin_ia32_punpckhdq128_mask ((__v4si) __A,
5176						     (__v4si) __B,
5177						     (__v4si)
5178						     _mm_setzero_si128 (),
5179						     (__mmask8) __U);
5180}
5181
5182extern __inline __m256i
5183__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5184_mm256_mask_unpackhi_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
5185			    __m256i __B)
5186{
5187  return (__m256i) __builtin_ia32_punpckhdq256_mask ((__v8si) __A,
5188						     (__v8si) __B,
5189						     (__v8si) __W,
5190						     (__mmask8) __U);
5191}
5192
5193extern __inline __m256i
5194__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5195_mm256_maskz_unpackhi_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
5196{
5197  return (__m256i) __builtin_ia32_punpckhdq256_mask ((__v8si) __A,
5198						     (__v8si) __B,
5199						     (__v8si)
5200						     _mm256_setzero_si256 (),
5201						     (__mmask8) __U);
5202}
5203
5204extern __inline __m128i
5205__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5206_mm_mask_unpackhi_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
5207			 __m128i __B)
5208{
5209  return (__m128i) __builtin_ia32_punpckhqdq128_mask ((__v2di) __A,
5210						      (__v2di) __B,
5211						      (__v2di) __W,
5212						      (__mmask8) __U);
5213}
5214
5215extern __inline __m128i
5216__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5217_mm_maskz_unpackhi_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
5218{
5219  return (__m128i) __builtin_ia32_punpckhqdq128_mask ((__v2di) __A,
5220						      (__v2di) __B,
5221						      (__v2di)
5222						      _mm_setzero_di (),
5223						      (__mmask8) __U);
5224}
5225
5226extern __inline __m256i
5227__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5228_mm256_mask_unpackhi_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
5229			    __m256i __B)
5230{
5231  return (__m256i) __builtin_ia32_punpckhqdq256_mask ((__v4di) __A,
5232						      (__v4di) __B,
5233						      (__v4di) __W,
5234						      (__mmask8) __U);
5235}
5236
5237extern __inline __m256i
5238__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5239_mm256_maskz_unpackhi_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
5240{
5241  return (__m256i) __builtin_ia32_punpckhqdq256_mask ((__v4di) __A,
5242						      (__v4di) __B,
5243						      (__v4di)
5244						      _mm256_setzero_si256 (),
5245						      (__mmask8) __U);
5246}
5247
5248extern __inline __m128i
5249__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5250_mm_mask_unpacklo_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
5251			 __m128i __B)
5252{
5253  return (__m128i) __builtin_ia32_punpckldq128_mask ((__v4si) __A,
5254						     (__v4si) __B,
5255						     (__v4si) __W,
5256						     (__mmask8) __U);
5257}
5258
5259extern __inline __m128i
5260__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5261_mm_maskz_unpacklo_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
5262{
5263  return (__m128i) __builtin_ia32_punpckldq128_mask ((__v4si) __A,
5264						     (__v4si) __B,
5265						     (__v4si)
5266						     _mm_setzero_si128 (),
5267						     (__mmask8) __U);
5268}
5269
5270extern __inline __m256i
5271__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5272_mm256_mask_unpacklo_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
5273			    __m256i __B)
5274{
5275  return (__m256i) __builtin_ia32_punpckldq256_mask ((__v8si) __A,
5276						     (__v8si) __B,
5277						     (__v8si) __W,
5278						     (__mmask8) __U);
5279}
5280
5281extern __inline __m256i
5282__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5283_mm256_maskz_unpacklo_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
5284{
5285  return (__m256i) __builtin_ia32_punpckldq256_mask ((__v8si) __A,
5286						     (__v8si) __B,
5287						     (__v8si)
5288						     _mm256_setzero_si256 (),
5289						     (__mmask8) __U);
5290}
5291
5292extern __inline __m128i
5293__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5294_mm_mask_unpacklo_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
5295			 __m128i __B)
5296{
5297  return (__m128i) __builtin_ia32_punpcklqdq128_mask ((__v2di) __A,
5298						      (__v2di) __B,
5299						      (__v2di) __W,
5300						      (__mmask8) __U);
5301}
5302
5303extern __inline __m128i
5304__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5305_mm_maskz_unpacklo_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
5306{
5307  return (__m128i) __builtin_ia32_punpcklqdq128_mask ((__v2di) __A,
5308						      (__v2di) __B,
5309						      (__v2di)
5310						      _mm_setzero_di (),
5311						      (__mmask8) __U);
5312}
5313
5314extern __inline __m256i
5315__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5316_mm256_mask_unpacklo_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
5317			    __m256i __B)
5318{
5319  return (__m256i) __builtin_ia32_punpcklqdq256_mask ((__v4di) __A,
5320						      (__v4di) __B,
5321						      (__v4di) __W,
5322						      (__mmask8) __U);
5323}
5324
5325extern __inline __m256i
5326__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5327_mm256_maskz_unpacklo_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
5328{
5329  return (__m256i) __builtin_ia32_punpcklqdq256_mask ((__v4di) __A,
5330						      (__v4di) __B,
5331						      (__v4di)
5332						      _mm256_setzero_si256 (),
5333						      (__mmask8) __U);
5334}
5335
5336extern __inline __mmask8
5337__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5338_mm_cmpeq_epu32_mask (__m128i __A, __m128i __B)
5339{
5340  return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __A,
5341						   (__v4si) __B, 0,
5342						   (__mmask8) -1);
5343}
5344
5345extern __inline __mmask8
5346__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5347_mm_cmpeq_epi32_mask (__m128i __A, __m128i __B)
5348{
5349  return (__mmask8) __builtin_ia32_pcmpeqd128_mask ((__v4si) __A,
5350						    (__v4si) __B,
5351						    (__mmask8) -1);
5352}
5353
5354extern __inline __mmask8
5355__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5356_mm_mask_cmpeq_epu32_mask (__mmask8 __U, __m128i __A, __m128i __B)
5357{
5358  return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __A,
5359						   (__v4si) __B, 0, __U);
5360}
5361
5362extern __inline __mmask8
5363__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5364_mm_mask_cmpeq_epi32_mask (__mmask8 __U, __m128i __A, __m128i __B)
5365{
5366  return (__mmask8) __builtin_ia32_pcmpeqd128_mask ((__v4si) __A,
5367						    (__v4si) __B, __U);
5368}
5369
5370extern __inline __mmask8
5371__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5372_mm256_cmpeq_epu32_mask (__m256i __A, __m256i __B)
5373{
5374  return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __A,
5375						   (__v8si) __B, 0,
5376						   (__mmask8) -1);
5377}
5378
5379extern __inline __mmask8
5380__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5381_mm256_cmpeq_epi32_mask (__m256i __A, __m256i __B)
5382{
5383  return (__mmask8) __builtin_ia32_pcmpeqd256_mask ((__v8si) __A,
5384						    (__v8si) __B,
5385						    (__mmask8) -1);
5386}
5387
5388extern __inline __mmask8
5389__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5390_mm256_mask_cmpeq_epu32_mask (__mmask8 __U, __m256i __A, __m256i __B)
5391{
5392  return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __A,
5393						   (__v8si) __B, 0, __U);
5394}
5395
5396extern __inline __mmask8
5397__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5398_mm256_mask_cmpeq_epi32_mask (__mmask8 __U, __m256i __A, __m256i __B)
5399{
5400  return (__mmask8) __builtin_ia32_pcmpeqd256_mask ((__v8si) __A,
5401						    (__v8si) __B, __U);
5402}
5403
5404extern __inline __mmask8
5405__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5406_mm_cmpeq_epu64_mask (__m128i __A, __m128i __B)
5407{
5408  return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __A,
5409						   (__v2di) __B, 0,
5410						   (__mmask8) -1);
5411}
5412
5413extern __inline __mmask8
5414__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5415_mm_cmpeq_epi64_mask (__m128i __A, __m128i __B)
5416{
5417  return (__mmask8) __builtin_ia32_pcmpeqq128_mask ((__v2di) __A,
5418						    (__v2di) __B,
5419						    (__mmask8) -1);
5420}
5421
5422extern __inline __mmask8
5423__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5424_mm_mask_cmpeq_epu64_mask (__mmask8 __U, __m128i __A, __m128i __B)
5425{
5426  return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __A,
5427						   (__v2di) __B, 0, __U);
5428}
5429
5430extern __inline __mmask8
5431__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5432_mm_mask_cmpeq_epi64_mask (__mmask8 __U, __m128i __A, __m128i __B)
5433{
5434  return (__mmask8) __builtin_ia32_pcmpeqq128_mask ((__v2di) __A,
5435						    (__v2di) __B, __U);
5436}
5437
5438extern __inline __mmask8
5439__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5440_mm256_cmpeq_epu64_mask (__m256i __A, __m256i __B)
5441{
5442  return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __A,
5443						   (__v4di) __B, 0,
5444						   (__mmask8) -1);
5445}
5446
5447extern __inline __mmask8
5448__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5449_mm256_cmpeq_epi64_mask (__m256i __A, __m256i __B)
5450{
5451  return (__mmask8) __builtin_ia32_pcmpeqq256_mask ((__v4di) __A,
5452						    (__v4di) __B,
5453						    (__mmask8) -1);
5454}
5455
5456extern __inline __mmask8
5457__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5458_mm256_mask_cmpeq_epu64_mask (__mmask8 __U, __m256i __A, __m256i __B)
5459{
5460  return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __A,
5461						   (__v4di) __B, 0, __U);
5462}
5463
5464extern __inline __mmask8
5465__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5466_mm256_mask_cmpeq_epi64_mask (__mmask8 __U, __m256i __A, __m256i __B)
5467{
5468  return (__mmask8) __builtin_ia32_pcmpeqq256_mask ((__v4di) __A,
5469						    (__v4di) __B, __U);
5470}
5471
5472extern __inline __mmask8
5473__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5474_mm_cmpgt_epu32_mask (__m128i __A, __m128i __B)
5475{
5476  return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __A,
5477						   (__v4si) __B, 6,
5478						   (__mmask8) -1);
5479}
5480
5481extern __inline __mmask8
5482__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5483_mm_cmpgt_epi32_mask (__m128i __A, __m128i __B)
5484{
5485  return (__mmask8) __builtin_ia32_pcmpgtd128_mask ((__v4si) __A,
5486						    (__v4si) __B,
5487						    (__mmask8) -1);
5488}
5489
5490extern __inline __mmask8
5491__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5492_mm_mask_cmpgt_epu32_mask (__mmask8 __U, __m128i __A, __m128i __B)
5493{
5494  return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __A,
5495						   (__v4si) __B, 6, __U);
5496}
5497
5498extern __inline __mmask8
5499__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5500_mm_mask_cmpgt_epi32_mask (__mmask8 __U, __m128i __A, __m128i __B)
5501{
5502  return (__mmask8) __builtin_ia32_pcmpgtd128_mask ((__v4si) __A,
5503						    (__v4si) __B, __U);
5504}
5505
5506extern __inline __mmask8
5507__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5508_mm256_cmpgt_epu32_mask (__m256i __A, __m256i __B)
5509{
5510  return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __A,
5511						   (__v8si) __B, 6,
5512						   (__mmask8) -1);
5513}
5514
5515extern __inline __mmask8
5516__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5517_mm256_cmpgt_epi32_mask (__m256i __A, __m256i __B)
5518{
5519  return (__mmask8) __builtin_ia32_pcmpgtd256_mask ((__v8si) __A,
5520						    (__v8si) __B,
5521						    (__mmask8) -1);
5522}
5523
5524extern __inline __mmask8
5525__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5526_mm256_mask_cmpgt_epu32_mask (__mmask8 __U, __m256i __A, __m256i __B)
5527{
5528  return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __A,
5529						   (__v8si) __B, 6, __U);
5530}
5531
5532extern __inline __mmask8
5533__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5534_mm256_mask_cmpgt_epi32_mask (__mmask8 __U, __m256i __A, __m256i __B)
5535{
5536  return (__mmask8) __builtin_ia32_pcmpgtd256_mask ((__v8si) __A,
5537						    (__v8si) __B, __U);
5538}
5539
5540extern __inline __mmask8
5541__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5542_mm_cmpgt_epu64_mask (__m128i __A, __m128i __B)
5543{
5544  return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __A,
5545						   (__v2di) __B, 6,
5546						   (__mmask8) -1);
5547}
5548
5549extern __inline __mmask8
5550__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5551_mm_cmpgt_epi64_mask (__m128i __A, __m128i __B)
5552{
5553  return (__mmask8) __builtin_ia32_pcmpgtq128_mask ((__v2di) __A,
5554						    (__v2di) __B,
5555						    (__mmask8) -1);
5556}
5557
5558extern __inline __mmask8
5559__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5560_mm_mask_cmpgt_epu64_mask (__mmask8 __U, __m128i __A, __m128i __B)
5561{
5562  return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __A,
5563						   (__v2di) __B, 6, __U);
5564}
5565
5566extern __inline __mmask8
5567__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5568_mm_mask_cmpgt_epi64_mask (__mmask8 __U, __m128i __A, __m128i __B)
5569{
5570  return (__mmask8) __builtin_ia32_pcmpgtq128_mask ((__v2di) __A,
5571						    (__v2di) __B, __U);
5572}
5573
5574extern __inline __mmask8
5575__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5576_mm256_cmpgt_epu64_mask (__m256i __A, __m256i __B)
5577{
5578  return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __A,
5579						   (__v4di) __B, 6,
5580						   (__mmask8) -1);
5581}
5582
5583extern __inline __mmask8
5584__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5585_mm256_cmpgt_epi64_mask (__m256i __A, __m256i __B)
5586{
5587  return (__mmask8) __builtin_ia32_pcmpgtq256_mask ((__v4di) __A,
5588						    (__v4di) __B,
5589						    (__mmask8) -1);
5590}
5591
5592extern __inline __mmask8
5593__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5594_mm256_mask_cmpgt_epu64_mask (__mmask8 __U, __m256i __A, __m256i __B)
5595{
5596  return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __A,
5597						   (__v4di) __B, 6, __U);
5598}
5599
5600extern __inline __mmask8
5601__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5602_mm256_mask_cmpgt_epi64_mask (__mmask8 __U, __m256i __A, __m256i __B)
5603{
5604  return (__mmask8) __builtin_ia32_pcmpgtq256_mask ((__v4di) __A,
5605						    (__v4di) __B, __U);
5606}
5607
5608extern __inline __mmask8
5609__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5610_mm_test_epi32_mask (__m128i __A, __m128i __B)
5611{
5612  return (__mmask8) __builtin_ia32_ptestmd128 ((__v4si) __A,
5613					       (__v4si) __B,
5614					       (__mmask8) -1);
5615}
5616
5617extern __inline __mmask8
5618__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5619_mm_mask_test_epi32_mask (__mmask8 __U, __m128i __A, __m128i __B)
5620{
5621  return (__mmask8) __builtin_ia32_ptestmd128 ((__v4si) __A,
5622					       (__v4si) __B, __U);
5623}
5624
5625extern __inline __mmask8
5626__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5627_mm256_test_epi32_mask (__m256i __A, __m256i __B)
5628{
5629  return (__mmask8) __builtin_ia32_ptestmd256 ((__v8si) __A,
5630					       (__v8si) __B,
5631					       (__mmask8) -1);
5632}
5633
5634extern __inline __mmask8
5635__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5636_mm256_mask_test_epi32_mask (__mmask8 __U, __m256i __A, __m256i __B)
5637{
5638  return (__mmask8) __builtin_ia32_ptestmd256 ((__v8si) __A,
5639					       (__v8si) __B, __U);
5640}
5641
5642extern __inline __mmask8
5643__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5644_mm_test_epi64_mask (__m128i __A, __m128i __B)
5645{
5646  return (__mmask8) __builtin_ia32_ptestmq128 ((__v2di) __A,
5647					       (__v2di) __B,
5648					       (__mmask8) -1);
5649}
5650
5651extern __inline __mmask8
5652__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5653_mm_mask_test_epi64_mask (__mmask8 __U, __m128i __A, __m128i __B)
5654{
5655  return (__mmask8) __builtin_ia32_ptestmq128 ((__v2di) __A,
5656					       (__v2di) __B, __U);
5657}
5658
5659extern __inline __mmask8
5660__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5661_mm256_test_epi64_mask (__m256i __A, __m256i __B)
5662{
5663  return (__mmask8) __builtin_ia32_ptestmq256 ((__v4di) __A,
5664					       (__v4di) __B,
5665					       (__mmask8) -1);
5666}
5667
5668extern __inline __mmask8
5669__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5670_mm256_mask_test_epi64_mask (__mmask8 __U, __m256i __A, __m256i __B)
5671{
5672  return (__mmask8) __builtin_ia32_ptestmq256 ((__v4di) __A,
5673					       (__v4di) __B, __U);
5674}
5675
5676extern __inline __mmask8
5677__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5678_mm_testn_epi32_mask (__m128i __A, __m128i __B)
5679{
5680  return (__mmask8) __builtin_ia32_ptestnmd128 ((__v4si) __A,
5681						(__v4si) __B,
5682						(__mmask8) -1);
5683}
5684
5685extern __inline __mmask8
5686__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5687_mm_mask_testn_epi32_mask (__mmask8 __U, __m128i __A, __m128i __B)
5688{
5689  return (__mmask8) __builtin_ia32_ptestnmd128 ((__v4si) __A,
5690						(__v4si) __B, __U);
5691}
5692
5693extern __inline __mmask8
5694__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5695_mm256_testn_epi32_mask (__m256i __A, __m256i __B)
5696{
5697  return (__mmask8) __builtin_ia32_ptestnmd256 ((__v8si) __A,
5698						(__v8si) __B,
5699						(__mmask8) -1);
5700}
5701
5702extern __inline __mmask8
5703__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5704_mm256_mask_testn_epi32_mask (__mmask8 __U, __m256i __A, __m256i __B)
5705{
5706  return (__mmask8) __builtin_ia32_ptestnmd256 ((__v8si) __A,
5707						(__v8si) __B, __U);
5708}
5709
5710extern __inline __mmask8
5711__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5712_mm_testn_epi64_mask (__m128i __A, __m128i __B)
5713{
5714  return (__mmask8) __builtin_ia32_ptestnmq128 ((__v2di) __A,
5715						(__v2di) __B,
5716						(__mmask8) -1);
5717}
5718
5719extern __inline __mmask8
5720__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5721_mm_mask_testn_epi64_mask (__mmask8 __U, __m128i __A, __m128i __B)
5722{
5723  return (__mmask8) __builtin_ia32_ptestnmq128 ((__v2di) __A,
5724						(__v2di) __B, __U);
5725}
5726
5727extern __inline __mmask8
5728__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5729_mm256_testn_epi64_mask (__m256i __A, __m256i __B)
5730{
5731  return (__mmask8) __builtin_ia32_ptestnmq256 ((__v4di) __A,
5732						(__v4di) __B,
5733						(__mmask8) -1);
5734}
5735
5736extern __inline __mmask8
5737__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5738_mm256_mask_testn_epi64_mask (__mmask8 __U, __m256i __A, __m256i __B)
5739{
5740  return (__mmask8) __builtin_ia32_ptestnmq256 ((__v4di) __A,
5741						(__v4di) __B, __U);
5742}
5743
5744extern __inline __m256d
5745__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5746_mm256_mask_compress_pd (__m256d __W, __mmask8 __U, __m256d __A)
5747{
5748  return (__m256d) __builtin_ia32_compressdf256_mask ((__v4df) __A,
5749						      (__v4df) __W,
5750						      (__mmask8) __U);
5751}
5752
5753extern __inline __m256d
5754__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5755_mm256_maskz_compress_pd (__mmask8 __U, __m256d __A)
5756{
5757  return (__m256d) __builtin_ia32_compressdf256_mask ((__v4df) __A,
5758						      (__v4df)
5759						      _mm256_setzero_pd (),
5760						      (__mmask8) __U);
5761}
5762
5763extern __inline void
5764__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5765_mm256_mask_compressstoreu_pd (void *__P, __mmask8 __U, __m256d __A)
5766{
5767  __builtin_ia32_compressstoredf256_mask ((__v4df *) __P,
5768					  (__v4df) __A,
5769					  (__mmask8) __U);
5770}
5771
5772extern __inline __m128d
5773__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5774_mm_mask_compress_pd (__m128d __W, __mmask8 __U, __m128d __A)
5775{
5776  return (__m128d) __builtin_ia32_compressdf128_mask ((__v2df) __A,
5777						      (__v2df) __W,
5778						      (__mmask8) __U);
5779}
5780
5781extern __inline __m128d
5782__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5783_mm_maskz_compress_pd (__mmask8 __U, __m128d __A)
5784{
5785  return (__m128d) __builtin_ia32_compressdf128_mask ((__v2df) __A,
5786						      (__v2df)
5787						      _mm_setzero_pd (),
5788						      (__mmask8) __U);
5789}
5790
5791extern __inline void
5792__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5793_mm_mask_compressstoreu_pd (void *__P, __mmask8 __U, __m128d __A)
5794{
5795  __builtin_ia32_compressstoredf128_mask ((__v2df *) __P,
5796					  (__v2df) __A,
5797					  (__mmask8) __U);
5798}
5799
5800extern __inline __m256
5801__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5802_mm256_mask_compress_ps (__m256 __W, __mmask8 __U, __m256 __A)
5803{
5804  return (__m256) __builtin_ia32_compresssf256_mask ((__v8sf) __A,
5805						     (__v8sf) __W,
5806						     (__mmask8) __U);
5807}
5808
5809extern __inline __m256
5810__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5811_mm256_maskz_compress_ps (__mmask8 __U, __m256 __A)
5812{
5813  return (__m256) __builtin_ia32_compresssf256_mask ((__v8sf) __A,
5814						     (__v8sf)
5815						     _mm256_setzero_ps (),
5816						     (__mmask8) __U);
5817}
5818
5819extern __inline void
5820__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5821_mm256_mask_compressstoreu_ps (void *__P, __mmask8 __U, __m256 __A)
5822{
5823  __builtin_ia32_compressstoresf256_mask ((__v8sf *) __P,
5824					  (__v8sf) __A,
5825					  (__mmask8) __U);
5826}
5827
5828extern __inline __m128
5829__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5830_mm_mask_compress_ps (__m128 __W, __mmask8 __U, __m128 __A)
5831{
5832  return (__m128) __builtin_ia32_compresssf128_mask ((__v4sf) __A,
5833						     (__v4sf) __W,
5834						     (__mmask8) __U);
5835}
5836
5837extern __inline __m128
5838__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5839_mm_maskz_compress_ps (__mmask8 __U, __m128 __A)
5840{
5841  return (__m128) __builtin_ia32_compresssf128_mask ((__v4sf) __A,
5842						     (__v4sf)
5843						     _mm_setzero_ps (),
5844						     (__mmask8) __U);
5845}
5846
5847extern __inline void
5848__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5849_mm_mask_compressstoreu_ps (void *__P, __mmask8 __U, __m128 __A)
5850{
5851  __builtin_ia32_compressstoresf128_mask ((__v4sf *) __P,
5852					  (__v4sf) __A,
5853					  (__mmask8) __U);
5854}
5855
5856extern __inline __m256i
5857__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5858_mm256_mask_compress_epi64 (__m256i __W, __mmask8 __U, __m256i __A)
5859{
5860  return (__m256i) __builtin_ia32_compressdi256_mask ((__v4di) __A,
5861						      (__v4di) __W,
5862						      (__mmask8) __U);
5863}
5864
5865extern __inline __m256i
5866__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5867_mm256_maskz_compress_epi64 (__mmask8 __U, __m256i __A)
5868{
5869  return (__m256i) __builtin_ia32_compressdi256_mask ((__v4di) __A,
5870						      (__v4di)
5871						      _mm256_setzero_si256 (),
5872						      (__mmask8) __U);
5873}
5874
5875extern __inline void
5876__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5877_mm256_mask_compressstoreu_epi64 (void *__P, __mmask8 __U, __m256i __A)
5878{
5879  __builtin_ia32_compressstoredi256_mask ((__v4di *) __P,
5880					  (__v4di) __A,
5881					  (__mmask8) __U);
5882}
5883
5884extern __inline __m128i
5885__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5886_mm_mask_compress_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
5887{
5888  return (__m128i) __builtin_ia32_compressdi128_mask ((__v2di) __A,
5889						      (__v2di) __W,
5890						      (__mmask8) __U);
5891}
5892
5893extern __inline __m128i
5894__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5895_mm_maskz_compress_epi64 (__mmask8 __U, __m128i __A)
5896{
5897  return (__m128i) __builtin_ia32_compressdi128_mask ((__v2di) __A,
5898						      (__v2di)
5899						      _mm_setzero_di (),
5900						      (__mmask8) __U);
5901}
5902
5903extern __inline void
5904__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5905_mm_mask_compressstoreu_epi64 (void *__P, __mmask8 __U, __m128i __A)
5906{
5907  __builtin_ia32_compressstoredi128_mask ((__v2di *) __P,
5908					  (__v2di) __A,
5909					  (__mmask8) __U);
5910}
5911
5912extern __inline __m256i
5913__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5914_mm256_mask_compress_epi32 (__m256i __W, __mmask8 __U, __m256i __A)
5915{
5916  return (__m256i) __builtin_ia32_compresssi256_mask ((__v8si) __A,
5917						      (__v8si) __W,
5918						      (__mmask8) __U);
5919}
5920
5921extern __inline __m256i
5922__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5923_mm256_maskz_compress_epi32 (__mmask8 __U, __m256i __A)
5924{
5925  return (__m256i) __builtin_ia32_compresssi256_mask ((__v8si) __A,
5926						      (__v8si)
5927						      _mm256_setzero_si256 (),
5928						      (__mmask8) __U);
5929}
5930
5931extern __inline void
5932__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5933_mm256_mask_compressstoreu_epi32 (void *__P, __mmask8 __U, __m256i __A)
5934{
5935  __builtin_ia32_compressstoresi256_mask ((__v8si *) __P,
5936					  (__v8si) __A,
5937					  (__mmask8) __U);
5938}
5939
5940extern __inline __m128i
5941__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5942_mm_mask_compress_epi32 (__m128i __W, __mmask8 __U, __m128i __A)
5943{
5944  return (__m128i) __builtin_ia32_compresssi128_mask ((__v4si) __A,
5945						      (__v4si) __W,
5946						      (__mmask8) __U);
5947}
5948
5949extern __inline __m128i
5950__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5951_mm_maskz_compress_epi32 (__mmask8 __U, __m128i __A)
5952{
5953  return (__m128i) __builtin_ia32_compresssi128_mask ((__v4si) __A,
5954						      (__v4si)
5955						      _mm_setzero_si128 (),
5956						      (__mmask8) __U);
5957}
5958
5959extern __inline void
5960__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5961_mm_mask_compressstoreu_epi32 (void *__P, __mmask8 __U, __m128i __A)
5962{
5963  __builtin_ia32_compressstoresi128_mask ((__v4si *) __P,
5964					  (__v4si) __A,
5965					  (__mmask8) __U);
5966}
5967
5968extern __inline __m256d
5969__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5970_mm256_mask_expand_pd (__m256d __W, __mmask8 __U, __m256d __A)
5971{
5972  return (__m256d) __builtin_ia32_expanddf256_mask ((__v4df) __A,
5973						    (__v4df) __W,
5974						    (__mmask8) __U);
5975}
5976
5977extern __inline __m256d
5978__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5979_mm256_maskz_expand_pd (__mmask8 __U, __m256d __A)
5980{
5981  return (__m256d) __builtin_ia32_expanddf256_maskz ((__v4df) __A,
5982						     (__v4df)
5983						     _mm256_setzero_pd (),
5984						     (__mmask8) __U);
5985}
5986
5987extern __inline __m256d
5988__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5989_mm256_mask_expandloadu_pd (__m256d __W, __mmask8 __U, void const *__P)
5990{
5991  return (__m256d) __builtin_ia32_expandloaddf256_mask ((__v4df *) __P,
5992							(__v4df) __W,
5993							(__mmask8)
5994							__U);
5995}
5996
5997extern __inline __m256d
5998__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5999_mm256_maskz_expandloadu_pd (__mmask8 __U, void const *__P)
6000{
6001  return (__m256d) __builtin_ia32_expandloaddf256_maskz ((__v4df *) __P,
6002							 (__v4df)
6003							 _mm256_setzero_pd (),
6004							 (__mmask8)
6005							 __U);
6006}
6007
6008extern __inline __m128d
6009__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6010_mm_mask_expand_pd (__m128d __W, __mmask8 __U, __m128d __A)
6011{
6012  return (__m128d) __builtin_ia32_expanddf128_mask ((__v2df) __A,
6013						    (__v2df) __W,
6014						    (__mmask8) __U);
6015}
6016
6017extern __inline __m128d
6018__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6019_mm_maskz_expand_pd (__mmask8 __U, __m128d __A)
6020{
6021  return (__m128d) __builtin_ia32_expanddf128_maskz ((__v2df) __A,
6022						     (__v2df)
6023						     _mm_setzero_pd (),
6024						     (__mmask8) __U);
6025}
6026
6027extern __inline __m128d
6028__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6029_mm_mask_expandloadu_pd (__m128d __W, __mmask8 __U, void const *__P)
6030{
6031  return (__m128d) __builtin_ia32_expandloaddf128_mask ((__v2df *) __P,
6032							(__v2df) __W,
6033							(__mmask8)
6034							__U);
6035}
6036
6037extern __inline __m128d
6038__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6039_mm_maskz_expandloadu_pd (__mmask8 __U, void const *__P)
6040{
6041  return (__m128d) __builtin_ia32_expandloaddf128_maskz ((__v2df *) __P,
6042							 (__v2df)
6043							 _mm_setzero_pd (),
6044							 (__mmask8)
6045							 __U);
6046}
6047
6048extern __inline __m256
6049__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6050_mm256_mask_expand_ps (__m256 __W, __mmask8 __U, __m256 __A)
6051{
6052  return (__m256) __builtin_ia32_expandsf256_mask ((__v8sf) __A,
6053						   (__v8sf) __W,
6054						   (__mmask8) __U);
6055}
6056
6057extern __inline __m256
6058__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6059_mm256_maskz_expand_ps (__mmask8 __U, __m256 __A)
6060{
6061  return (__m256) __builtin_ia32_expandsf256_maskz ((__v8sf) __A,
6062						    (__v8sf)
6063						    _mm256_setzero_ps (),
6064						    (__mmask8) __U);
6065}
6066
6067extern __inline __m256
6068__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6069_mm256_mask_expandloadu_ps (__m256 __W, __mmask8 __U, void const *__P)
6070{
6071  return (__m256) __builtin_ia32_expandloadsf256_mask ((__v8sf *) __P,
6072						       (__v8sf) __W,
6073						       (__mmask8) __U);
6074}
6075
6076extern __inline __m256
6077__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6078_mm256_maskz_expandloadu_ps (__mmask8 __U, void const *__P)
6079{
6080  return (__m256) __builtin_ia32_expandloadsf256_maskz ((__v8sf *) __P,
6081							(__v8sf)
6082							_mm256_setzero_ps (),
6083							(__mmask8)
6084							__U);
6085}
6086
6087extern __inline __m128
6088__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6089_mm_mask_expand_ps (__m128 __W, __mmask8 __U, __m128 __A)
6090{
6091  return (__m128) __builtin_ia32_expandsf128_mask ((__v4sf) __A,
6092						   (__v4sf) __W,
6093						   (__mmask8) __U);
6094}
6095
6096extern __inline __m128
6097__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6098_mm_maskz_expand_ps (__mmask8 __U, __m128 __A)
6099{
6100  return (__m128) __builtin_ia32_expandsf128_maskz ((__v4sf) __A,
6101						    (__v4sf)
6102						    _mm_setzero_ps (),
6103						    (__mmask8) __U);
6104}
6105
6106extern __inline __m128
6107__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6108_mm_mask_expandloadu_ps (__m128 __W, __mmask8 __U, void const *__P)
6109{
6110  return (__m128) __builtin_ia32_expandloadsf128_mask ((__v4sf *) __P,
6111						       (__v4sf) __W,
6112						       (__mmask8) __U);
6113}
6114
6115extern __inline __m128
6116__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6117_mm_maskz_expandloadu_ps (__mmask8 __U, void const *__P)
6118{
6119  return (__m128) __builtin_ia32_expandloadsf128_maskz ((__v4sf *) __P,
6120							(__v4sf)
6121							_mm_setzero_ps (),
6122							(__mmask8)
6123							__U);
6124}
6125
6126extern __inline __m256i
6127__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6128_mm256_mask_expand_epi64 (__m256i __W, __mmask8 __U, __m256i __A)
6129{
6130  return (__m256i) __builtin_ia32_expanddi256_mask ((__v4di) __A,
6131						    (__v4di) __W,
6132						    (__mmask8) __U);
6133}
6134
6135extern __inline __m256i
6136__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6137_mm256_maskz_expand_epi64 (__mmask8 __U, __m256i __A)
6138{
6139  return (__m256i) __builtin_ia32_expanddi256_maskz ((__v4di) __A,
6140						     (__v4di)
6141						     _mm256_setzero_si256 (),
6142						     (__mmask8) __U);
6143}
6144
6145extern __inline __m256i
6146__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6147_mm256_mask_expandloadu_epi64 (__m256i __W, __mmask8 __U,
6148			       void const *__P)
6149{
6150  return (__m256i) __builtin_ia32_expandloaddi256_mask ((__v4di *) __P,
6151							(__v4di) __W,
6152							(__mmask8)
6153							__U);
6154}
6155
6156extern __inline __m256i
6157__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6158_mm256_maskz_expandloadu_epi64 (__mmask8 __U, void const *__P)
6159{
6160  return (__m256i) __builtin_ia32_expandloaddi256_maskz ((__v4di *) __P,
6161							 (__v4di)
6162							 _mm256_setzero_si256 (),
6163							 (__mmask8)
6164							 __U);
6165}
6166
6167extern __inline __m128i
6168__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6169_mm_mask_expand_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
6170{
6171  return (__m128i) __builtin_ia32_expanddi128_mask ((__v2di) __A,
6172						    (__v2di) __W,
6173						    (__mmask8) __U);
6174}
6175
6176extern __inline __m128i
6177__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6178_mm_maskz_expand_epi64 (__mmask8 __U, __m128i __A)
6179{
6180  return (__m128i) __builtin_ia32_expanddi128_maskz ((__v2di) __A,
6181						     (__v2di)
6182						     _mm_setzero_si128 (),
6183						     (__mmask8) __U);
6184}
6185
6186extern __inline __m128i
6187__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6188_mm_mask_expandloadu_epi64 (__m128i __W, __mmask8 __U, void const *__P)
6189{
6190  return (__m128i) __builtin_ia32_expandloaddi128_mask ((__v2di *) __P,
6191							(__v2di) __W,
6192							(__mmask8)
6193							__U);
6194}
6195
6196extern __inline __m128i
6197__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6198_mm_maskz_expandloadu_epi64 (__mmask8 __U, void const *__P)
6199{
6200  return (__m128i) __builtin_ia32_expandloaddi128_maskz ((__v2di *) __P,
6201							 (__v2di)
6202							 _mm_setzero_si128 (),
6203							 (__mmask8)
6204							 __U);
6205}
6206
6207extern __inline __m256i
6208__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6209_mm256_mask_expand_epi32 (__m256i __W, __mmask8 __U, __m256i __A)
6210{
6211  return (__m256i) __builtin_ia32_expandsi256_mask ((__v8si) __A,
6212						    (__v8si) __W,
6213						    (__mmask8) __U);
6214}
6215
6216extern __inline __m256i
6217__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6218_mm256_maskz_expand_epi32 (__mmask8 __U, __m256i __A)
6219{
6220  return (__m256i) __builtin_ia32_expandsi256_maskz ((__v8si) __A,
6221						     (__v8si)
6222						     _mm256_setzero_si256 (),
6223						     (__mmask8) __U);
6224}
6225
6226extern __inline __m256i
6227__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6228_mm256_mask_expandloadu_epi32 (__m256i __W, __mmask8 __U,
6229			       void const *__P)
6230{
6231  return (__m256i) __builtin_ia32_expandloadsi256_mask ((__v8si *) __P,
6232							(__v8si) __W,
6233							(__mmask8)
6234							__U);
6235}
6236
6237extern __inline __m256i
6238__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6239_mm256_maskz_expandloadu_epi32 (__mmask8 __U, void const *__P)
6240{
6241  return (__m256i) __builtin_ia32_expandloadsi256_maskz ((__v8si *) __P,
6242							 (__v8si)
6243							 _mm256_setzero_si256 (),
6244							 (__mmask8)
6245							 __U);
6246}
6247
6248extern __inline __m128i
6249__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6250_mm_mask_expand_epi32 (__m128i __W, __mmask8 __U, __m128i __A)
6251{
6252  return (__m128i) __builtin_ia32_expandsi128_mask ((__v4si) __A,
6253						    (__v4si) __W,
6254						    (__mmask8) __U);
6255}
6256
6257extern __inline __m128i
6258__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6259_mm_maskz_expand_epi32 (__mmask8 __U, __m128i __A)
6260{
6261  return (__m128i) __builtin_ia32_expandsi128_maskz ((__v4si) __A,
6262						     (__v4si)
6263						     _mm_setzero_si128 (),
6264						     (__mmask8) __U);
6265}
6266
6267extern __inline __m128i
6268__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6269_mm_mask_expandloadu_epi32 (__m128i __W, __mmask8 __U, void const *__P)
6270{
6271  return (__m128i) __builtin_ia32_expandloadsi128_mask ((__v4si *) __P,
6272							(__v4si) __W,
6273							(__mmask8)
6274							__U);
6275}
6276
6277extern __inline __m128i
6278__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6279_mm_maskz_expandloadu_epi32 (__mmask8 __U, void const *__P)
6280{
6281  return (__m128i) __builtin_ia32_expandloadsi128_maskz ((__v4si *) __P,
6282							 (__v4si)
6283							 _mm_setzero_si128 (),
6284							 (__mmask8)
6285							 __U);
6286}
6287
6288extern __inline __m256d
6289__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6290_mm256_permutex2var_pd (__m256d __A, __m256i __I, __m256d __B)
6291{
6292  return (__m256d) __builtin_ia32_vpermt2varpd256_mask ((__v4di) __I
6293							/* idx */ ,
6294							(__v4df) __A,
6295							(__v4df) __B,
6296							(__mmask8) -
6297							1);
6298}
6299
6300extern __inline __m256d
6301__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6302_mm256_mask_permutex2var_pd (__m256d __A, __mmask8 __U, __m256i __I,
6303			     __m256d __B)
6304{
6305  return (__m256d) __builtin_ia32_vpermt2varpd256_mask ((__v4di) __I
6306							/* idx */ ,
6307							(__v4df) __A,
6308							(__v4df) __B,
6309							(__mmask8)
6310							__U);
6311}
6312
6313extern __inline __m256d
6314__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6315_mm256_mask2_permutex2var_pd (__m256d __A, __m256i __I, __mmask8 __U,
6316			      __m256d __B)
6317{
6318  return (__m256d) __builtin_ia32_vpermi2varpd256_mask ((__v4df) __A,
6319							(__v4di) __I
6320							/* idx */ ,
6321							(__v4df) __B,
6322							(__mmask8)
6323							__U);
6324}
6325
6326extern __inline __m256d
6327__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6328_mm256_maskz_permutex2var_pd (__mmask8 __U, __m256d __A, __m256i __I,
6329			      __m256d __B)
6330{
6331  return (__m256d) __builtin_ia32_vpermt2varpd256_maskz ((__v4di) __I
6332							 /* idx */ ,
6333							 (__v4df) __A,
6334							 (__v4df) __B,
6335							 (__mmask8)
6336							 __U);
6337}
6338
6339extern __inline __m256
6340__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6341_mm256_permutex2var_ps (__m256 __A, __m256i __I, __m256 __B)
6342{
6343  return (__m256) __builtin_ia32_vpermt2varps256_mask ((__v8si) __I
6344						       /* idx */ ,
6345						       (__v8sf) __A,
6346						       (__v8sf) __B,
6347						       (__mmask8) -1);
6348}
6349
6350extern __inline __m256
6351__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6352_mm256_mask_permutex2var_ps (__m256 __A, __mmask8 __U, __m256i __I,
6353			     __m256 __B)
6354{
6355  return (__m256) __builtin_ia32_vpermt2varps256_mask ((__v8si) __I
6356						       /* idx */ ,
6357						       (__v8sf) __A,
6358						       (__v8sf) __B,
6359						       (__mmask8) __U);
6360}
6361
6362extern __inline __m256
6363__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6364_mm256_mask2_permutex2var_ps (__m256 __A, __m256i __I, __mmask8 __U,
6365			      __m256 __B)
6366{
6367  return (__m256) __builtin_ia32_vpermi2varps256_mask ((__v8sf) __A,
6368						       (__v8si) __I
6369						       /* idx */ ,
6370						       (__v8sf) __B,
6371						       (__mmask8) __U);
6372}
6373
6374extern __inline __m256
6375__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6376_mm256_maskz_permutex2var_ps (__mmask8 __U, __m256 __A, __m256i __I,
6377			      __m256 __B)
6378{
6379  return (__m256) __builtin_ia32_vpermt2varps256_maskz ((__v8si) __I
6380							/* idx */ ,
6381							(__v8sf) __A,
6382							(__v8sf) __B,
6383							(__mmask8)
6384							__U);
6385}
6386
6387extern __inline __m128i
6388__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6389_mm_permutex2var_epi64 (__m128i __A, __m128i __I, __m128i __B)
6390{
6391  return (__m128i) __builtin_ia32_vpermt2varq128_mask ((__v2di) __I
6392						       /* idx */ ,
6393						       (__v2di) __A,
6394						       (__v2di) __B,
6395						       (__mmask8) -1);
6396}
6397
6398extern __inline __m128i
6399__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6400_mm_mask_permutex2var_epi64 (__m128i __A, __mmask8 __U, __m128i __I,
6401			     __m128i __B)
6402{
6403  return (__m128i) __builtin_ia32_vpermt2varq128_mask ((__v2di) __I
6404						       /* idx */ ,
6405						       (__v2di) __A,
6406						       (__v2di) __B,
6407						       (__mmask8) __U);
6408}
6409
6410extern __inline __m128i
6411__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6412_mm_mask2_permutex2var_epi64 (__m128i __A, __m128i __I, __mmask8 __U,
6413			      __m128i __B)
6414{
6415  return (__m128i) __builtin_ia32_vpermi2varq128_mask ((__v2di) __A,
6416						       (__v2di) __I
6417						       /* idx */ ,
6418						       (__v2di) __B,
6419						       (__mmask8) __U);
6420}
6421
6422extern __inline __m128i
6423__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6424_mm_maskz_permutex2var_epi64 (__mmask8 __U, __m128i __A, __m128i __I,
6425			      __m128i __B)
6426{
6427  return (__m128i) __builtin_ia32_vpermt2varq128_maskz ((__v2di) __I
6428							/* idx */ ,
6429							(__v2di) __A,
6430							(__v2di) __B,
6431							(__mmask8)
6432							__U);
6433}
6434
6435extern __inline __m128i
6436__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6437_mm_permutex2var_epi32 (__m128i __A, __m128i __I, __m128i __B)
6438{
6439  return (__m128i) __builtin_ia32_vpermt2vard128_mask ((__v4si) __I
6440						       /* idx */ ,
6441						       (__v4si) __A,
6442						       (__v4si) __B,
6443						       (__mmask8) -1);
6444}
6445
6446extern __inline __m128i
6447__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6448_mm_mask_permutex2var_epi32 (__m128i __A, __mmask8 __U, __m128i __I,
6449			     __m128i __B)
6450{
6451  return (__m128i) __builtin_ia32_vpermt2vard128_mask ((__v4si) __I
6452						       /* idx */ ,
6453						       (__v4si) __A,
6454						       (__v4si) __B,
6455						       (__mmask8) __U);
6456}
6457
6458extern __inline __m128i
6459__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6460_mm_mask2_permutex2var_epi32 (__m128i __A, __m128i __I, __mmask8 __U,
6461			      __m128i __B)
6462{
6463  return (__m128i) __builtin_ia32_vpermi2vard128_mask ((__v4si) __A,
6464						       (__v4si) __I
6465						       /* idx */ ,
6466						       (__v4si) __B,
6467						       (__mmask8) __U);
6468}
6469
6470extern __inline __m128i
6471__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6472_mm_maskz_permutex2var_epi32 (__mmask8 __U, __m128i __A, __m128i __I,
6473			      __m128i __B)
6474{
6475  return (__m128i) __builtin_ia32_vpermt2vard128_maskz ((__v4si) __I
6476							/* idx */ ,
6477							(__v4si) __A,
6478							(__v4si) __B,
6479							(__mmask8)
6480							__U);
6481}
6482
6483extern __inline __m256i
6484__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6485_mm256_permutex2var_epi64 (__m256i __A, __m256i __I, __m256i __B)
6486{
6487  return (__m256i) __builtin_ia32_vpermt2varq256_mask ((__v4di) __I
6488						       /* idx */ ,
6489						       (__v4di) __A,
6490						       (__v4di) __B,
6491						       (__mmask8) -1);
6492}
6493
6494extern __inline __m256i
6495__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6496_mm256_mask_permutex2var_epi64 (__m256i __A, __mmask8 __U, __m256i __I,
6497				__m256i __B)
6498{
6499  return (__m256i) __builtin_ia32_vpermt2varq256_mask ((__v4di) __I
6500						       /* idx */ ,
6501						       (__v4di) __A,
6502						       (__v4di) __B,
6503						       (__mmask8) __U);
6504}
6505
6506extern __inline __m256i
6507__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6508_mm256_mask2_permutex2var_epi64 (__m256i __A, __m256i __I,
6509				 __mmask8 __U, __m256i __B)
6510{
6511  return (__m256i) __builtin_ia32_vpermi2varq256_mask ((__v4di) __A,
6512						       (__v4di) __I
6513						       /* idx */ ,
6514						       (__v4di) __B,
6515						       (__mmask8) __U);
6516}
6517
6518extern __inline __m256i
6519__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6520_mm256_maskz_permutex2var_epi64 (__mmask8 __U, __m256i __A,
6521				 __m256i __I, __m256i __B)
6522{
6523  return (__m256i) __builtin_ia32_vpermt2varq256_maskz ((__v4di) __I
6524							/* idx */ ,
6525							(__v4di) __A,
6526							(__v4di) __B,
6527							(__mmask8)
6528							__U);
6529}
6530
6531extern __inline __m256i
6532__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6533_mm256_permutex2var_epi32 (__m256i __A, __m256i __I, __m256i __B)
6534{
6535  return (__m256i) __builtin_ia32_vpermt2vard256_mask ((__v8si) __I
6536						       /* idx */ ,
6537						       (__v8si) __A,
6538						       (__v8si) __B,
6539						       (__mmask8) -1);
6540}
6541
6542extern __inline __m256i
6543__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6544_mm256_mask_permutex2var_epi32 (__m256i __A, __mmask8 __U, __m256i __I,
6545				__m256i __B)
6546{
6547  return (__m256i) __builtin_ia32_vpermt2vard256_mask ((__v8si) __I
6548						       /* idx */ ,
6549						       (__v8si) __A,
6550						       (__v8si) __B,
6551						       (__mmask8) __U);
6552}
6553
6554extern __inline __m256i
6555__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6556_mm256_mask2_permutex2var_epi32 (__m256i __A, __m256i __I,
6557				 __mmask8 __U, __m256i __B)
6558{
6559  return (__m256i) __builtin_ia32_vpermi2vard256_mask ((__v8si) __A,
6560						       (__v8si) __I
6561						       /* idx */ ,
6562						       (__v8si) __B,
6563						       (__mmask8) __U);
6564}
6565
6566extern __inline __m256i
6567__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6568_mm256_maskz_permutex2var_epi32 (__mmask8 __U, __m256i __A,
6569				 __m256i __I, __m256i __B)
6570{
6571  return (__m256i) __builtin_ia32_vpermt2vard256_maskz ((__v8si) __I
6572							/* idx */ ,
6573							(__v8si) __A,
6574							(__v8si) __B,
6575							(__mmask8)
6576							__U);
6577}
6578
6579extern __inline __m128d
6580__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6581_mm_permutex2var_pd (__m128d __A, __m128i __I, __m128d __B)
6582{
6583  return (__m128d) __builtin_ia32_vpermt2varpd128_mask ((__v2di) __I
6584							/* idx */ ,
6585							(__v2df) __A,
6586							(__v2df) __B,
6587							(__mmask8) -
6588							1);
6589}
6590
6591extern __inline __m128d
6592__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6593_mm_mask_permutex2var_pd (__m128d __A, __mmask8 __U, __m128i __I,
6594			  __m128d __B)
6595{
6596  return (__m128d) __builtin_ia32_vpermt2varpd128_mask ((__v2di) __I
6597							/* idx */ ,
6598							(__v2df) __A,
6599							(__v2df) __B,
6600							(__mmask8)
6601							__U);
6602}
6603
6604extern __inline __m128d
6605__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6606_mm_mask2_permutex2var_pd (__m128d __A, __m128i __I, __mmask8 __U,
6607			   __m128d __B)
6608{
6609  return (__m128d) __builtin_ia32_vpermi2varpd128_mask ((__v2df) __A,
6610							(__v2di) __I
6611							/* idx */ ,
6612							(__v2df) __B,
6613							(__mmask8)
6614							__U);
6615}
6616
6617extern __inline __m128d
6618__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6619_mm_maskz_permutex2var_pd (__mmask8 __U, __m128d __A, __m128i __I,
6620			   __m128d __B)
6621{
6622  return (__m128d) __builtin_ia32_vpermt2varpd128_maskz ((__v2di) __I
6623							 /* idx */ ,
6624							 (__v2df) __A,
6625							 (__v2df) __B,
6626							 (__mmask8)
6627							 __U);
6628}
6629
6630extern __inline __m128
6631__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6632_mm_permutex2var_ps (__m128 __A, __m128i __I, __m128 __B)
6633{
6634  return (__m128) __builtin_ia32_vpermt2varps128_mask ((__v4si) __I
6635						       /* idx */ ,
6636						       (__v4sf) __A,
6637						       (__v4sf) __B,
6638						       (__mmask8) -1);
6639}
6640
6641extern __inline __m128
6642__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6643_mm_mask_permutex2var_ps (__m128 __A, __mmask8 __U, __m128i __I,
6644			  __m128 __B)
6645{
6646  return (__m128) __builtin_ia32_vpermt2varps128_mask ((__v4si) __I
6647						       /* idx */ ,
6648						       (__v4sf) __A,
6649						       (__v4sf) __B,
6650						       (__mmask8) __U);
6651}
6652
6653extern __inline __m128
6654__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6655_mm_mask2_permutex2var_ps (__m128 __A, __m128i __I, __mmask8 __U,
6656			   __m128 __B)
6657{
6658  return (__m128) __builtin_ia32_vpermi2varps128_mask ((__v4sf) __A,
6659						       (__v4si) __I
6660						       /* idx */ ,
6661						       (__v4sf) __B,
6662						       (__mmask8) __U);
6663}
6664
6665extern __inline __m128
6666__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6667_mm_maskz_permutex2var_ps (__mmask8 __U, __m128 __A, __m128i __I,
6668			   __m128 __B)
6669{
6670  return (__m128) __builtin_ia32_vpermt2varps128_maskz ((__v4si) __I
6671							/* idx */ ,
6672							(__v4sf) __A,
6673							(__v4sf) __B,
6674							(__mmask8)
6675							__U);
6676}
6677
6678extern __inline __m128i
6679__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6680_mm_srav_epi64 (__m128i __X, __m128i __Y)
6681{
6682  return (__m128i) __builtin_ia32_psravq128_mask ((__v2di) __X,
6683						  (__v2di) __Y,
6684						  (__v2di)
6685						  _mm_setzero_di (),
6686						  (__mmask8) -1);
6687}
6688
6689extern __inline __m128i
6690__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6691_mm_mask_srav_epi64 (__m128i __W, __mmask8 __U, __m128i __X,
6692		     __m128i __Y)
6693{
6694  return (__m128i) __builtin_ia32_psravq128_mask ((__v2di) __X,
6695						  (__v2di) __Y,
6696						  (__v2di) __W,
6697						  (__mmask8) __U);
6698}
6699
6700extern __inline __m128i
6701__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6702_mm_maskz_srav_epi64 (__mmask8 __U, __m128i __X, __m128i __Y)
6703{
6704  return (__m128i) __builtin_ia32_psravq128_mask ((__v2di) __X,
6705						  (__v2di) __Y,
6706						  (__v2di)
6707						  _mm_setzero_di (),
6708						  (__mmask8) __U);
6709}
6710
6711extern __inline __m256i
6712__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6713_mm256_mask_sllv_epi32 (__m256i __W, __mmask8 __U, __m256i __X,
6714			__m256i __Y)
6715{
6716  return (__m256i) __builtin_ia32_psllv8si_mask ((__v8si) __X,
6717						 (__v8si) __Y,
6718						 (__v8si) __W,
6719						 (__mmask8) __U);
6720}
6721
6722extern __inline __m256i
6723__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6724_mm256_maskz_sllv_epi32 (__mmask8 __U, __m256i __X, __m256i __Y)
6725{
6726  return (__m256i) __builtin_ia32_psllv8si_mask ((__v8si) __X,
6727						 (__v8si) __Y,
6728						 (__v8si)
6729						 _mm256_setzero_si256 (),
6730						 (__mmask8) __U);
6731}
6732
6733extern __inline __m128i
6734__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6735_mm_mask_sllv_epi32 (__m128i __W, __mmask8 __U, __m128i __X,
6736		     __m128i __Y)
6737{
6738  return (__m128i) __builtin_ia32_psllv4si_mask ((__v4si) __X,
6739						 (__v4si) __Y,
6740						 (__v4si) __W,
6741						 (__mmask8) __U);
6742}
6743
6744extern __inline __m128i
6745__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6746_mm_maskz_sllv_epi32 (__mmask8 __U, __m128i __X, __m128i __Y)
6747{
6748  return (__m128i) __builtin_ia32_psllv4si_mask ((__v4si) __X,
6749						 (__v4si) __Y,
6750						 (__v4si)
6751						 _mm_setzero_si128 (),
6752						 (__mmask8) __U);
6753}
6754
6755extern __inline __m256i
6756__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6757_mm256_mask_sllv_epi64 (__m256i __W, __mmask8 __U, __m256i __X,
6758			__m256i __Y)
6759{
6760  return (__m256i) __builtin_ia32_psllv4di_mask ((__v4di) __X,
6761						 (__v4di) __Y,
6762						 (__v4di) __W,
6763						 (__mmask8) __U);
6764}
6765
6766extern __inline __m256i
6767__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6768_mm256_maskz_sllv_epi64 (__mmask8 __U, __m256i __X, __m256i __Y)
6769{
6770  return (__m256i) __builtin_ia32_psllv4di_mask ((__v4di) __X,
6771						 (__v4di) __Y,
6772						 (__v4di)
6773						 _mm256_setzero_si256 (),
6774						 (__mmask8) __U);
6775}
6776
6777extern __inline __m128i
6778__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6779_mm_mask_sllv_epi64 (__m128i __W, __mmask8 __U, __m128i __X,
6780		     __m128i __Y)
6781{
6782  return (__m128i) __builtin_ia32_psllv2di_mask ((__v2di) __X,
6783						 (__v2di) __Y,
6784						 (__v2di) __W,
6785						 (__mmask8) __U);
6786}
6787
6788extern __inline __m128i
6789__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6790_mm_maskz_sllv_epi64 (__mmask8 __U, __m128i __X, __m128i __Y)
6791{
6792  return (__m128i) __builtin_ia32_psllv2di_mask ((__v2di) __X,
6793						 (__v2di) __Y,
6794						 (__v2di)
6795						 _mm_setzero_di (),
6796						 (__mmask8) __U);
6797}
6798
6799extern __inline __m256i
6800__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6801_mm256_mask_srav_epi32 (__m256i __W, __mmask8 __U, __m256i __X,
6802			__m256i __Y)
6803{
6804  return (__m256i) __builtin_ia32_psrav8si_mask ((__v8si) __X,
6805						 (__v8si) __Y,
6806						 (__v8si) __W,
6807						 (__mmask8) __U);
6808}
6809
6810extern __inline __m256i
6811__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6812_mm256_maskz_srav_epi32 (__mmask8 __U, __m256i __X, __m256i __Y)
6813{
6814  return (__m256i) __builtin_ia32_psrav8si_mask ((__v8si) __X,
6815						 (__v8si) __Y,
6816						 (__v8si)
6817						 _mm256_setzero_si256 (),
6818						 (__mmask8) __U);
6819}
6820
6821extern __inline __m128i
6822__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6823_mm_mask_srav_epi32 (__m128i __W, __mmask8 __U, __m128i __X,
6824		     __m128i __Y)
6825{
6826  return (__m128i) __builtin_ia32_psrav4si_mask ((__v4si) __X,
6827						 (__v4si) __Y,
6828						 (__v4si) __W,
6829						 (__mmask8) __U);
6830}
6831
6832extern __inline __m128i
6833__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6834_mm_maskz_srav_epi32 (__mmask8 __U, __m128i __X, __m128i __Y)
6835{
6836  return (__m128i) __builtin_ia32_psrav4si_mask ((__v4si) __X,
6837						 (__v4si) __Y,
6838						 (__v4si)
6839						 _mm_setzero_si128 (),
6840						 (__mmask8) __U);
6841}
6842
6843extern __inline __m256i
6844__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6845_mm256_mask_srlv_epi32 (__m256i __W, __mmask8 __U, __m256i __X,
6846			__m256i __Y)
6847{
6848  return (__m256i) __builtin_ia32_psrlv8si_mask ((__v8si) __X,
6849						 (__v8si) __Y,
6850						 (__v8si) __W,
6851						 (__mmask8) __U);
6852}
6853
6854extern __inline __m256i
6855__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6856_mm256_maskz_srlv_epi32 (__mmask8 __U, __m256i __X, __m256i __Y)
6857{
6858  return (__m256i) __builtin_ia32_psrlv8si_mask ((__v8si) __X,
6859						 (__v8si) __Y,
6860						 (__v8si)
6861						 _mm256_setzero_si256 (),
6862						 (__mmask8) __U);
6863}
6864
6865extern __inline __m128i
6866__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6867_mm_mask_srlv_epi32 (__m128i __W, __mmask8 __U, __m128i __X,
6868		     __m128i __Y)
6869{
6870  return (__m128i) __builtin_ia32_psrlv4si_mask ((__v4si) __X,
6871						 (__v4si) __Y,
6872						 (__v4si) __W,
6873						 (__mmask8) __U);
6874}
6875
6876extern __inline __m128i
6877__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6878_mm_maskz_srlv_epi32 (__mmask8 __U, __m128i __X, __m128i __Y)
6879{
6880  return (__m128i) __builtin_ia32_psrlv4si_mask ((__v4si) __X,
6881						 (__v4si) __Y,
6882						 (__v4si)
6883						 _mm_setzero_si128 (),
6884						 (__mmask8) __U);
6885}
6886
6887extern __inline __m256i
6888__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6889_mm256_mask_srlv_epi64 (__m256i __W, __mmask8 __U, __m256i __X,
6890			__m256i __Y)
6891{
6892  return (__m256i) __builtin_ia32_psrlv4di_mask ((__v4di) __X,
6893						 (__v4di) __Y,
6894						 (__v4di) __W,
6895						 (__mmask8) __U);
6896}
6897
6898extern __inline __m256i
6899__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6900_mm256_maskz_srlv_epi64 (__mmask8 __U, __m256i __X, __m256i __Y)
6901{
6902  return (__m256i) __builtin_ia32_psrlv4di_mask ((__v4di) __X,
6903						 (__v4di) __Y,
6904						 (__v4di)
6905						 _mm256_setzero_si256 (),
6906						 (__mmask8) __U);
6907}
6908
6909extern __inline __m128i
6910__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6911_mm_mask_srlv_epi64 (__m128i __W, __mmask8 __U, __m128i __X,
6912		     __m128i __Y)
6913{
6914  return (__m128i) __builtin_ia32_psrlv2di_mask ((__v2di) __X,
6915						 (__v2di) __Y,
6916						 (__v2di) __W,
6917						 (__mmask8) __U);
6918}
6919
6920extern __inline __m128i
6921__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6922_mm_maskz_srlv_epi64 (__mmask8 __U, __m128i __X, __m128i __Y)
6923{
6924  return (__m128i) __builtin_ia32_psrlv2di_mask ((__v2di) __X,
6925						 (__v2di) __Y,
6926						 (__v2di)
6927						 _mm_setzero_di (),
6928						 (__mmask8) __U);
6929}
6930
6931extern __inline __m256i
6932__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6933_mm256_rolv_epi32 (__m256i __A, __m256i __B)
6934{
6935  return (__m256i) __builtin_ia32_prolvd256_mask ((__v8si) __A,
6936						  (__v8si) __B,
6937						  (__v8si)
6938						  _mm256_setzero_si256 (),
6939						  (__mmask8) -1);
6940}
6941
6942extern __inline __m256i
6943__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6944_mm256_mask_rolv_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
6945			__m256i __B)
6946{
6947  return (__m256i) __builtin_ia32_prolvd256_mask ((__v8si) __A,
6948						  (__v8si) __B,
6949						  (__v8si) __W,
6950						  (__mmask8) __U);
6951}
6952
6953extern __inline __m256i
6954__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6955_mm256_maskz_rolv_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
6956{
6957  return (__m256i) __builtin_ia32_prolvd256_mask ((__v8si) __A,
6958						  (__v8si) __B,
6959						  (__v8si)
6960						  _mm256_setzero_si256 (),
6961						  (__mmask8) __U);
6962}
6963
6964extern __inline __m128i
6965__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6966_mm_rolv_epi32 (__m128i __A, __m128i __B)
6967{
6968  return (__m128i) __builtin_ia32_prolvd128_mask ((__v4si) __A,
6969						  (__v4si) __B,
6970						  (__v4si)
6971						  _mm_setzero_si128 (),
6972						  (__mmask8) -1);
6973}
6974
6975extern __inline __m128i
6976__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6977_mm_mask_rolv_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
6978		     __m128i __B)
6979{
6980  return (__m128i) __builtin_ia32_prolvd128_mask ((__v4si) __A,
6981						  (__v4si) __B,
6982						  (__v4si) __W,
6983						  (__mmask8) __U);
6984}
6985
6986extern __inline __m128i
6987__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6988_mm_maskz_rolv_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
6989{
6990  return (__m128i) __builtin_ia32_prolvd128_mask ((__v4si) __A,
6991						  (__v4si) __B,
6992						  (__v4si)
6993						  _mm_setzero_si128 (),
6994						  (__mmask8) __U);
6995}
6996
6997extern __inline __m256i
6998__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6999_mm256_rorv_epi32 (__m256i __A, __m256i __B)
7000{
7001  return (__m256i) __builtin_ia32_prorvd256_mask ((__v8si) __A,
7002						  (__v8si) __B,
7003						  (__v8si)
7004						  _mm256_setzero_si256 (),
7005						  (__mmask8) -1);
7006}
7007
7008extern __inline __m256i
7009__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7010_mm256_mask_rorv_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
7011			__m256i __B)
7012{
7013  return (__m256i) __builtin_ia32_prorvd256_mask ((__v8si) __A,
7014						  (__v8si) __B,
7015						  (__v8si) __W,
7016						  (__mmask8) __U);
7017}
7018
7019extern __inline __m256i
7020__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7021_mm256_maskz_rorv_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
7022{
7023  return (__m256i) __builtin_ia32_prorvd256_mask ((__v8si) __A,
7024						  (__v8si) __B,
7025						  (__v8si)
7026						  _mm256_setzero_si256 (),
7027						  (__mmask8) __U);
7028}
7029
7030extern __inline __m128i
7031__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7032_mm_rorv_epi32 (__m128i __A, __m128i __B)
7033{
7034  return (__m128i) __builtin_ia32_prorvd128_mask ((__v4si) __A,
7035						  (__v4si) __B,
7036						  (__v4si)
7037						  _mm_setzero_si128 (),
7038						  (__mmask8) -1);
7039}
7040
7041extern __inline __m128i
7042__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7043_mm_mask_rorv_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
7044		     __m128i __B)
7045{
7046  return (__m128i) __builtin_ia32_prorvd128_mask ((__v4si) __A,
7047						  (__v4si) __B,
7048						  (__v4si) __W,
7049						  (__mmask8) __U);
7050}
7051
7052extern __inline __m128i
7053__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7054_mm_maskz_rorv_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
7055{
7056  return (__m128i) __builtin_ia32_prorvd128_mask ((__v4si) __A,
7057						  (__v4si) __B,
7058						  (__v4si)
7059						  _mm_setzero_si128 (),
7060						  (__mmask8) __U);
7061}
7062
7063extern __inline __m256i
7064__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7065_mm256_rolv_epi64 (__m256i __A, __m256i __B)
7066{
7067  return (__m256i) __builtin_ia32_prolvq256_mask ((__v4di) __A,
7068						  (__v4di) __B,
7069						  (__v4di)
7070						  _mm256_setzero_si256 (),
7071						  (__mmask8) -1);
7072}
7073
7074extern __inline __m256i
7075__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7076_mm256_mask_rolv_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
7077			__m256i __B)
7078{
7079  return (__m256i) __builtin_ia32_prolvq256_mask ((__v4di) __A,
7080						  (__v4di) __B,
7081						  (__v4di) __W,
7082						  (__mmask8) __U);
7083}
7084
7085extern __inline __m256i
7086__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7087_mm256_maskz_rolv_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
7088{
7089  return (__m256i) __builtin_ia32_prolvq256_mask ((__v4di) __A,
7090						  (__v4di) __B,
7091						  (__v4di)
7092						  _mm256_setzero_si256 (),
7093						  (__mmask8) __U);
7094}
7095
7096extern __inline __m128i
7097__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7098_mm_rolv_epi64 (__m128i __A, __m128i __B)
7099{
7100  return (__m128i) __builtin_ia32_prolvq128_mask ((__v2di) __A,
7101						  (__v2di) __B,
7102						  (__v2di)
7103						  _mm_setzero_di (),
7104						  (__mmask8) -1);
7105}
7106
7107extern __inline __m128i
7108__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7109_mm_mask_rolv_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
7110		     __m128i __B)
7111{
7112  return (__m128i) __builtin_ia32_prolvq128_mask ((__v2di) __A,
7113						  (__v2di) __B,
7114						  (__v2di) __W,
7115						  (__mmask8) __U);
7116}
7117
7118extern __inline __m128i
7119__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7120_mm_maskz_rolv_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
7121{
7122  return (__m128i) __builtin_ia32_prolvq128_mask ((__v2di) __A,
7123						  (__v2di) __B,
7124						  (__v2di)
7125						  _mm_setzero_di (),
7126						  (__mmask8) __U);
7127}
7128
7129extern __inline __m256i
7130__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7131_mm256_rorv_epi64 (__m256i __A, __m256i __B)
7132{
7133  return (__m256i) __builtin_ia32_prorvq256_mask ((__v4di) __A,
7134						  (__v4di) __B,
7135						  (__v4di)
7136						  _mm256_setzero_si256 (),
7137						  (__mmask8) -1);
7138}
7139
7140extern __inline __m256i
7141__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7142_mm256_mask_rorv_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
7143			__m256i __B)
7144{
7145  return (__m256i) __builtin_ia32_prorvq256_mask ((__v4di) __A,
7146						  (__v4di) __B,
7147						  (__v4di) __W,
7148						  (__mmask8) __U);
7149}
7150
7151extern __inline __m256i
7152__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7153_mm256_maskz_rorv_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
7154{
7155  return (__m256i) __builtin_ia32_prorvq256_mask ((__v4di) __A,
7156						  (__v4di) __B,
7157						  (__v4di)
7158						  _mm256_setzero_si256 (),
7159						  (__mmask8) __U);
7160}
7161
7162extern __inline __m128i
7163__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7164_mm_rorv_epi64 (__m128i __A, __m128i __B)
7165{
7166  return (__m128i) __builtin_ia32_prorvq128_mask ((__v2di) __A,
7167						  (__v2di) __B,
7168						  (__v2di)
7169						  _mm_setzero_di (),
7170						  (__mmask8) -1);
7171}
7172
7173extern __inline __m128i
7174__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7175_mm_mask_rorv_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
7176		     __m128i __B)
7177{
7178  return (__m128i) __builtin_ia32_prorvq128_mask ((__v2di) __A,
7179						  (__v2di) __B,
7180						  (__v2di) __W,
7181						  (__mmask8) __U);
7182}
7183
7184extern __inline __m128i
7185__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7186_mm_maskz_rorv_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
7187{
7188  return (__m128i) __builtin_ia32_prorvq128_mask ((__v2di) __A,
7189						  (__v2di) __B,
7190						  (__v2di)
7191						  _mm_setzero_di (),
7192						  (__mmask8) __U);
7193}
7194
7195extern __inline __m256i
7196__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7197_mm256_srav_epi64 (__m256i __X, __m256i __Y)
7198{
7199  return (__m256i) __builtin_ia32_psravq256_mask ((__v4di) __X,
7200						  (__v4di) __Y,
7201						  (__v4di)
7202						  _mm256_setzero_si256 (),
7203						  (__mmask8) -1);
7204}
7205
7206extern __inline __m256i
7207__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7208_mm256_mask_srav_epi64 (__m256i __W, __mmask8 __U, __m256i __X,
7209			__m256i __Y)
7210{
7211  return (__m256i) __builtin_ia32_psravq256_mask ((__v4di) __X,
7212						  (__v4di) __Y,
7213						  (__v4di) __W,
7214						  (__mmask8) __U);
7215}
7216
7217extern __inline __m256i
7218__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7219_mm256_maskz_srav_epi64 (__mmask8 __U, __m256i __X, __m256i __Y)
7220{
7221  return (__m256i) __builtin_ia32_psravq256_mask ((__v4di) __X,
7222						  (__v4di) __Y,
7223						  (__v4di)
7224						  _mm256_setzero_si256 (),
7225						  (__mmask8) __U);
7226}
7227
7228extern __inline __m256i
7229__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7230_mm256_mask_and_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
7231		       __m256i __B)
7232{
7233  return (__m256i) __builtin_ia32_pandq256_mask ((__v4di) __A,
7234						 (__v4di) __B,
7235						 (__v4di) __W, __U);
7236}
7237
7238extern __inline __m256i
7239__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7240_mm256_maskz_and_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
7241{
7242  return (__m256i) __builtin_ia32_pandq256_mask ((__v4di) __A,
7243						 (__v4di) __B,
7244						 (__v4di)
7245						 _mm256_setzero_pd (),
7246						 __U);
7247}
7248
7249extern __inline __m128i
7250__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7251_mm_mask_and_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
7252		    __m128i __B)
7253{
7254  return (__m128i) __builtin_ia32_pandq128_mask ((__v2di) __A,
7255						 (__v2di) __B,
7256						 (__v2di) __W, __U);
7257}
7258
7259extern __inline __m128i
7260__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7261_mm_maskz_and_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
7262{
7263  return (__m128i) __builtin_ia32_pandq128_mask ((__v2di) __A,
7264						 (__v2di) __B,
7265						 (__v2di)
7266						 _mm_setzero_pd (),
7267						 __U);
7268}
7269
7270extern __inline __m256i
7271__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7272_mm256_mask_andnot_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
7273			  __m256i __B)
7274{
7275  return (__m256i) __builtin_ia32_pandnq256_mask ((__v4di) __A,
7276						  (__v4di) __B,
7277						  (__v4di) __W, __U);
7278}
7279
7280extern __inline __m256i
7281__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7282_mm256_maskz_andnot_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
7283{
7284  return (__m256i) __builtin_ia32_pandnq256_mask ((__v4di) __A,
7285						  (__v4di) __B,
7286						  (__v4di)
7287						  _mm256_setzero_pd (),
7288						  __U);
7289}
7290
7291extern __inline __m128i
7292__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7293_mm_mask_andnot_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
7294		       __m128i __B)
7295{
7296  return (__m128i) __builtin_ia32_pandnq128_mask ((__v2di) __A,
7297						  (__v2di) __B,
7298						  (__v2di) __W, __U);
7299}
7300
7301extern __inline __m128i
7302__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7303_mm_maskz_andnot_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
7304{
7305  return (__m128i) __builtin_ia32_pandnq128_mask ((__v2di) __A,
7306						  (__v2di) __B,
7307						  (__v2di)
7308						  _mm_setzero_pd (),
7309						  __U);
7310}
7311
7312extern __inline __m256i
7313__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7314_mm256_mask_or_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
7315		      __m256i __B)
7316{
7317  return (__m256i) __builtin_ia32_porq256_mask ((__v4di) __A,
7318						(__v4di) __B,
7319						(__v4di) __W,
7320						(__mmask8) __U);
7321}
7322
7323extern __inline __m256i
7324__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7325_mm256_maskz_or_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
7326{
7327  return (__m256i) __builtin_ia32_porq256_mask ((__v4di) __A,
7328						(__v4di) __B,
7329						(__v4di)
7330						_mm256_setzero_si256 (),
7331						(__mmask8) __U);
7332}
7333
7334extern __inline __m128i
7335__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7336_mm_mask_or_epi64 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
7337{
7338  return (__m128i) __builtin_ia32_porq128_mask ((__v2di) __A,
7339						(__v2di) __B,
7340						(__v2di) __W,
7341						(__mmask8) __U);
7342}
7343
7344extern __inline __m128i
7345__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7346_mm_maskz_or_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
7347{
7348  return (__m128i) __builtin_ia32_porq128_mask ((__v2di) __A,
7349						(__v2di) __B,
7350						(__v2di)
7351						_mm_setzero_si128 (),
7352						(__mmask8) __U);
7353}
7354
7355extern __inline __m256i
7356__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7357_mm256_mask_xor_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
7358		       __m256i __B)
7359{
7360  return (__m256i) __builtin_ia32_pxorq256_mask ((__v4di) __A,
7361						 (__v4di) __B,
7362						 (__v4di) __W,
7363						 (__mmask8) __U);
7364}
7365
7366extern __inline __m256i
7367__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7368_mm256_maskz_xor_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
7369{
7370  return (__m256i) __builtin_ia32_pxorq256_mask ((__v4di) __A,
7371						 (__v4di) __B,
7372						 (__v4di)
7373						 _mm256_setzero_si256 (),
7374						 (__mmask8) __U);
7375}
7376
7377extern __inline __m128i
7378__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7379_mm_mask_xor_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
7380		    __m128i __B)
7381{
7382  return (__m128i) __builtin_ia32_pxorq128_mask ((__v2di) __A,
7383						 (__v2di) __B,
7384						 (__v2di) __W,
7385						 (__mmask8) __U);
7386}
7387
7388extern __inline __m128i
7389__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7390_mm_maskz_xor_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
7391{
7392  return (__m128i) __builtin_ia32_pxorq128_mask ((__v2di) __A,
7393						 (__v2di) __B,
7394						 (__v2di)
7395						 _mm_setzero_si128 (),
7396						 (__mmask8) __U);
7397}
7398
7399extern __inline __m256d
7400__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7401_mm256_mask_max_pd (__m256d __W, __mmask8 __U, __m256d __A,
7402		    __m256d __B)
7403{
7404  return (__m256d) __builtin_ia32_maxpd256_mask ((__v4df) __A,
7405						 (__v4df) __B,
7406						 (__v4df) __W,
7407						 (__mmask8) __U);
7408}
7409
7410extern __inline __m256d
7411__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7412_mm256_maskz_max_pd (__mmask8 __U, __m256d __A, __m256d __B)
7413{
7414  return (__m256d) __builtin_ia32_maxpd256_mask ((__v4df) __A,
7415						 (__v4df) __B,
7416						 (__v4df)
7417						 _mm256_setzero_pd (),
7418						 (__mmask8) __U);
7419}
7420
7421extern __inline __m256
7422__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7423_mm256_mask_max_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
7424{
7425  return (__m256) __builtin_ia32_maxps256_mask ((__v8sf) __A,
7426						(__v8sf) __B,
7427						(__v8sf) __W,
7428						(__mmask8) __U);
7429}
7430
7431extern __inline __m256
7432__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7433_mm256_maskz_max_ps (__mmask8 __U, __m256 __A, __m256 __B)
7434{
7435  return (__m256) __builtin_ia32_maxps256_mask ((__v8sf) __A,
7436						(__v8sf) __B,
7437						(__v8sf)
7438						_mm256_setzero_ps (),
7439						(__mmask8) __U);
7440}
7441
7442extern __inline __m128
7443__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7444_mm_mask_div_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
7445{
7446  return (__m128) __builtin_ia32_divps_mask ((__v4sf) __A,
7447					     (__v4sf) __B,
7448					     (__v4sf) __W,
7449					     (__mmask8) __U);
7450}
7451
7452extern __inline __m128
7453__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7454_mm_maskz_div_ps (__mmask8 __U, __m128 __A, __m128 __B)
7455{
7456  return (__m128) __builtin_ia32_divps_mask ((__v4sf) __A,
7457					     (__v4sf) __B,
7458					     (__v4sf)
7459					     _mm_setzero_ps (),
7460					     (__mmask8) __U);
7461}
7462
7463extern __inline __m128d
7464__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7465_mm_mask_div_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
7466{
7467  return (__m128d) __builtin_ia32_divpd_mask ((__v2df) __A,
7468					      (__v2df) __B,
7469					      (__v2df) __W,
7470					      (__mmask8) __U);
7471}
7472
7473extern __inline __m128d
7474__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7475_mm_maskz_div_pd (__mmask8 __U, __m128d __A, __m128d __B)
7476{
7477  return (__m128d) __builtin_ia32_divpd_mask ((__v2df) __A,
7478					      (__v2df) __B,
7479					      (__v2df)
7480					      _mm_setzero_pd (),
7481					      (__mmask8) __U);
7482}
7483
7484extern __inline __m256d
7485__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7486_mm256_mask_min_pd (__m256d __W, __mmask8 __U, __m256d __A,
7487		    __m256d __B)
7488{
7489  return (__m256d) __builtin_ia32_minpd256_mask ((__v4df) __A,
7490						 (__v4df) __B,
7491						 (__v4df) __W,
7492						 (__mmask8) __U);
7493}
7494
7495extern __inline __m256d
7496__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7497_mm256_mask_div_pd (__m256d __W, __mmask8 __U, __m256d __A,
7498		    __m256d __B)
7499{
7500  return (__m256d) __builtin_ia32_divpd256_mask ((__v4df) __A,
7501						 (__v4df) __B,
7502						 (__v4df) __W,
7503						 (__mmask8) __U);
7504}
7505
7506extern __inline __m256d
7507__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7508_mm256_maskz_min_pd (__mmask8 __U, __m256d __A, __m256d __B)
7509{
7510  return (__m256d) __builtin_ia32_minpd256_mask ((__v4df) __A,
7511						 (__v4df) __B,
7512						 (__v4df)
7513						 _mm256_setzero_pd (),
7514						 (__mmask8) __U);
7515}
7516
7517extern __inline __m256
7518__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7519_mm256_mask_min_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
7520{
7521  return (__m256) __builtin_ia32_minps256_mask ((__v8sf) __A,
7522						(__v8sf) __B,
7523						(__v8sf) __W,
7524						(__mmask8) __U);
7525}
7526
7527extern __inline __m256d
7528__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7529_mm256_maskz_div_pd (__mmask8 __U, __m256d __A, __m256d __B)
7530{
7531  return (__m256d) __builtin_ia32_divpd256_mask ((__v4df) __A,
7532						 (__v4df) __B,
7533						 (__v4df)
7534						 _mm256_setzero_pd (),
7535						 (__mmask8) __U);
7536}
7537
7538extern __inline __m256
7539__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7540_mm256_mask_div_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
7541{
7542  return (__m256) __builtin_ia32_divps256_mask ((__v8sf) __A,
7543						(__v8sf) __B,
7544						(__v8sf) __W,
7545						(__mmask8) __U);
7546}
7547
7548extern __inline __m256
7549__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7550_mm256_maskz_min_ps (__mmask8 __U, __m256 __A, __m256 __B)
7551{
7552  return (__m256) __builtin_ia32_minps256_mask ((__v8sf) __A,
7553						(__v8sf) __B,
7554						(__v8sf)
7555						_mm256_setzero_ps (),
7556						(__mmask8) __U);
7557}
7558
7559extern __inline __m256
7560__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7561_mm256_maskz_div_ps (__mmask8 __U, __m256 __A, __m256 __B)
7562{
7563  return (__m256) __builtin_ia32_divps256_mask ((__v8sf) __A,
7564						(__v8sf) __B,
7565						(__v8sf)
7566						_mm256_setzero_ps (),
7567						(__mmask8) __U);
7568}
7569
7570extern __inline __m128
7571__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7572_mm_mask_min_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
7573{
7574  return (__m128) __builtin_ia32_minps_mask ((__v4sf) __A,
7575					     (__v4sf) __B,
7576					     (__v4sf) __W,
7577					     (__mmask8) __U);
7578}
7579
7580extern __inline __m128
7581__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7582_mm_mask_mul_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
7583{
7584  return (__m128) __builtin_ia32_mulps_mask ((__v4sf) __A,
7585					     (__v4sf) __B,
7586					     (__v4sf) __W,
7587					     (__mmask8) __U);
7588}
7589
7590extern __inline __m128
7591__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7592_mm_maskz_min_ps (__mmask8 __U, __m128 __A, __m128 __B)
7593{
7594  return (__m128) __builtin_ia32_minps_mask ((__v4sf) __A,
7595					     (__v4sf) __B,
7596					     (__v4sf)
7597					     _mm_setzero_ps (),
7598					     (__mmask8) __U);
7599}
7600
7601extern __inline __m128
7602__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7603_mm_maskz_mul_ps (__mmask8 __U, __m128 __A, __m128 __B)
7604{
7605  return (__m128) __builtin_ia32_mulps_mask ((__v4sf) __A,
7606					     (__v4sf) __B,
7607					     (__v4sf)
7608					     _mm_setzero_ps (),
7609					     (__mmask8) __U);
7610}
7611
7612extern __inline __m128
7613__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7614_mm_mask_max_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
7615{
7616  return (__m128) __builtin_ia32_maxps_mask ((__v4sf) __A,
7617					     (__v4sf) __B,
7618					     (__v4sf) __W,
7619					     (__mmask8) __U);
7620}
7621
7622extern __inline __m128
7623__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7624_mm_maskz_max_ps (__mmask8 __U, __m128 __A, __m128 __B)
7625{
7626  return (__m128) __builtin_ia32_maxps_mask ((__v4sf) __A,
7627					     (__v4sf) __B,
7628					     (__v4sf)
7629					     _mm_setzero_ps (),
7630					     (__mmask8) __U);
7631}
7632
7633extern __inline __m128d
7634__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7635_mm_mask_min_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
7636{
7637  return (__m128d) __builtin_ia32_minpd_mask ((__v2df) __A,
7638					      (__v2df) __B,
7639					      (__v2df) __W,
7640					      (__mmask8) __U);
7641}
7642
7643extern __inline __m128d
7644__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7645_mm_maskz_min_pd (__mmask8 __U, __m128d __A, __m128d __B)
7646{
7647  return (__m128d) __builtin_ia32_minpd_mask ((__v2df) __A,
7648					      (__v2df) __B,
7649					      (__v2df)
7650					      _mm_setzero_pd (),
7651					      (__mmask8) __U);
7652}
7653
7654extern __inline __m128d
7655__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7656_mm_mask_max_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
7657{
7658  return (__m128d) __builtin_ia32_maxpd_mask ((__v2df) __A,
7659					      (__v2df) __B,
7660					      (__v2df) __W,
7661					      (__mmask8) __U);
7662}
7663
7664extern __inline __m128d
7665__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7666_mm_maskz_max_pd (__mmask8 __U, __m128d __A, __m128d __B)
7667{
7668  return (__m128d) __builtin_ia32_maxpd_mask ((__v2df) __A,
7669					      (__v2df) __B,
7670					      (__v2df)
7671					      _mm_setzero_pd (),
7672					      (__mmask8) __U);
7673}
7674
7675extern __inline __m128d
7676__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7677_mm_mask_mul_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
7678{
7679  return (__m128d) __builtin_ia32_mulpd_mask ((__v2df) __A,
7680					      (__v2df) __B,
7681					      (__v2df) __W,
7682					      (__mmask8) __U);
7683}
7684
7685extern __inline __m128d
7686__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7687_mm_maskz_mul_pd (__mmask8 __U, __m128d __A, __m128d __B)
7688{
7689  return (__m128d) __builtin_ia32_mulpd_mask ((__v2df) __A,
7690					      (__v2df) __B,
7691					      (__v2df)
7692					      _mm_setzero_pd (),
7693					      (__mmask8) __U);
7694}
7695
7696extern __inline __m256
7697__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7698_mm256_mask_mul_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
7699{
7700  return (__m256) __builtin_ia32_mulps256_mask ((__v8sf) __A,
7701						(__v8sf) __B,
7702						(__v8sf) __W,
7703						(__mmask8) __U);
7704}
7705
7706extern __inline __m256
7707__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7708_mm256_maskz_mul_ps (__mmask8 __U, __m256 __A, __m256 __B)
7709{
7710  return (__m256) __builtin_ia32_mulps256_mask ((__v8sf) __A,
7711						(__v8sf) __B,
7712						(__v8sf)
7713						_mm256_setzero_ps (),
7714						(__mmask8) __U);
7715}
7716
7717extern __inline __m256d
7718__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7719_mm256_mask_mul_pd (__m256d __W, __mmask8 __U, __m256d __A,
7720		    __m256d __B)
7721{
7722  return (__m256d) __builtin_ia32_mulpd256_mask ((__v4df) __A,
7723						 (__v4df) __B,
7724						 (__v4df) __W,
7725						 (__mmask8) __U);
7726}
7727
7728extern __inline __m256d
7729__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7730_mm256_maskz_mul_pd (__mmask8 __U, __m256d __A, __m256d __B)
7731{
7732  return (__m256d) __builtin_ia32_mulpd256_mask ((__v4df) __A,
7733						 (__v4df) __B,
7734						 (__v4df)
7735						 _mm256_setzero_pd (),
7736						 (__mmask8) __U);
7737}
7738
7739extern __inline __m256i
7740__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7741_mm256_maskz_max_epi64 (__mmask8 __M, __m256i __A, __m256i __B)
7742{
7743  return (__m256i) __builtin_ia32_pmaxsq256_mask ((__v4di) __A,
7744						  (__v4di) __B,
7745						  (__v4di)
7746						  _mm256_setzero_si256 (),
7747						  __M);
7748}
7749
7750extern __inline __m256i
7751__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7752_mm256_mask_max_epi64 (__m256i __W, __mmask8 __M, __m256i __A,
7753		       __m256i __B)
7754{
7755  return (__m256i) __builtin_ia32_pmaxsq256_mask ((__v4di) __A,
7756						  (__v4di) __B,
7757						  (__v4di) __W, __M);
7758}
7759
7760extern __inline __m256i
7761__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7762_mm256_min_epi64 (__m256i __A, __m256i __B)
7763{
7764  return (__m256i) __builtin_ia32_pminsq256_mask ((__v4di) __A,
7765						  (__v4di) __B,
7766						  (__v4di)
7767						  _mm256_setzero_si256 (),
7768						  (__mmask8) -1);
7769}
7770
7771extern __inline __m256i
7772__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7773_mm256_mask_min_epi64 (__m256i __W, __mmask8 __M, __m256i __A,
7774		       __m256i __B)
7775{
7776  return (__m256i) __builtin_ia32_pminsq256_mask ((__v4di) __A,
7777						  (__v4di) __B,
7778						  (__v4di) __W, __M);
7779}
7780
7781extern __inline __m256i
7782__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7783_mm256_maskz_min_epi64 (__mmask8 __M, __m256i __A, __m256i __B)
7784{
7785  return (__m256i) __builtin_ia32_pminsq256_mask ((__v4di) __A,
7786						  (__v4di) __B,
7787						  (__v4di)
7788						  _mm256_setzero_si256 (),
7789						  __M);
7790}
7791
7792extern __inline __m256i
7793__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7794_mm256_maskz_max_epu64 (__mmask8 __M, __m256i __A, __m256i __B)
7795{
7796  return (__m256i) __builtin_ia32_pmaxuq256_mask ((__v4di) __A,
7797						  (__v4di) __B,
7798						  (__v4di)
7799						  _mm256_setzero_si256 (),
7800						  __M);
7801}
7802
7803extern __inline __m256i
7804__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7805_mm256_max_epi64 (__m256i __A, __m256i __B)
7806{
7807  return (__m256i) __builtin_ia32_pmaxsq256_mask ((__v4di) __A,
7808						  (__v4di) __B,
7809						  (__v4di)
7810						  _mm256_setzero_si256 (),
7811						  (__mmask8) -1);
7812}
7813
7814extern __inline __m256i
7815__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7816_mm256_max_epu64 (__m256i __A, __m256i __B)
7817{
7818  return (__m256i) __builtin_ia32_pmaxuq256_mask ((__v4di) __A,
7819						  (__v4di) __B,
7820						  (__v4di)
7821						  _mm256_setzero_si256 (),
7822						  (__mmask8) -1);
7823}
7824
7825extern __inline __m256i
7826__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7827_mm256_mask_max_epu64 (__m256i __W, __mmask8 __M, __m256i __A,
7828		       __m256i __B)
7829{
7830  return (__m256i) __builtin_ia32_pmaxuq256_mask ((__v4di) __A,
7831						  (__v4di) __B,
7832						  (__v4di) __W, __M);
7833}
7834
7835extern __inline __m256i
7836__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7837_mm256_min_epu64 (__m256i __A, __m256i __B)
7838{
7839  return (__m256i) __builtin_ia32_pminuq256_mask ((__v4di) __A,
7840						  (__v4di) __B,
7841						  (__v4di)
7842						  _mm256_setzero_si256 (),
7843						  (__mmask8) -1);
7844}
7845
7846extern __inline __m256i
7847__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7848_mm256_mask_min_epu64 (__m256i __W, __mmask8 __M, __m256i __A,
7849		       __m256i __B)
7850{
7851  return (__m256i) __builtin_ia32_pminuq256_mask ((__v4di) __A,
7852						  (__v4di) __B,
7853						  (__v4di) __W, __M);
7854}
7855
7856extern __inline __m256i
7857__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7858_mm256_maskz_min_epu64 (__mmask8 __M, __m256i __A, __m256i __B)
7859{
7860  return (__m256i) __builtin_ia32_pminuq256_mask ((__v4di) __A,
7861						  (__v4di) __B,
7862						  (__v4di)
7863						  _mm256_setzero_si256 (),
7864						  __M);
7865}
7866
7867extern __inline __m256i
7868__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7869_mm256_maskz_max_epi32 (__mmask8 __M, __m256i __A, __m256i __B)
7870{
7871  return (__m256i) __builtin_ia32_pmaxsd256_mask ((__v8si) __A,
7872						  (__v8si) __B,
7873						  (__v8si)
7874						  _mm256_setzero_si256 (),
7875						  __M);
7876}
7877
7878extern __inline __m256i
7879__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7880_mm256_mask_max_epi32 (__m256i __W, __mmask8 __M, __m256i __A,
7881		       __m256i __B)
7882{
7883  return (__m256i) __builtin_ia32_pmaxsd256_mask ((__v8si) __A,
7884						  (__v8si) __B,
7885						  (__v8si) __W, __M);
7886}
7887
7888extern __inline __m256i
7889__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7890_mm256_maskz_min_epi32 (__mmask8 __M, __m256i __A, __m256i __B)
7891{
7892  return (__m256i) __builtin_ia32_pminsd256_mask ((__v8si) __A,
7893						  (__v8si) __B,
7894						  (__v8si)
7895						  _mm256_setzero_si256 (),
7896						  __M);
7897}
7898
7899extern __inline __m256i
7900__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7901_mm256_mask_min_epi32 (__m256i __W, __mmask8 __M, __m256i __A,
7902		       __m256i __B)
7903{
7904  return (__m256i) __builtin_ia32_pminsd256_mask ((__v8si) __A,
7905						  (__v8si) __B,
7906						  (__v8si) __W, __M);
7907}
7908
7909extern __inline __m256i
7910__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7911_mm256_maskz_max_epu32 (__mmask8 __M, __m256i __A, __m256i __B)
7912{
7913  return (__m256i) __builtin_ia32_pmaxud256_mask ((__v8si) __A,
7914						  (__v8si) __B,
7915						  (__v8si)
7916						  _mm256_setzero_si256 (),
7917						  __M);
7918}
7919
7920extern __inline __m256i
7921__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7922_mm256_mask_max_epu32 (__m256i __W, __mmask8 __M, __m256i __A,
7923		       __m256i __B)
7924{
7925  return (__m256i) __builtin_ia32_pmaxud256_mask ((__v8si) __A,
7926						  (__v8si) __B,
7927						  (__v8si) __W, __M);
7928}
7929
7930extern __inline __m256i
7931__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7932_mm256_maskz_min_epu32 (__mmask8 __M, __m256i __A, __m256i __B)
7933{
7934  return (__m256i) __builtin_ia32_pminud256_mask ((__v8si) __A,
7935						  (__v8si) __B,
7936						  (__v8si)
7937						  _mm256_setzero_si256 (),
7938						  __M);
7939}
7940
7941extern __inline __m256i
7942__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7943_mm256_mask_min_epu32 (__m256i __W, __mmask8 __M, __m256i __A,
7944		       __m256i __B)
7945{
7946  return (__m256i) __builtin_ia32_pminud256_mask ((__v8si) __A,
7947						  (__v8si) __B,
7948						  (__v8si) __W, __M);
7949}
7950
7951extern __inline __m128i
7952__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7953_mm_maskz_max_epi64 (__mmask8 __M, __m128i __A, __m128i __B)
7954{
7955  return (__m128i) __builtin_ia32_pmaxsq128_mask ((__v2di) __A,
7956						  (__v2di) __B,
7957						  (__v2di)
7958						  _mm_setzero_si128 (),
7959						  __M);
7960}
7961
7962extern __inline __m128i
7963__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7964_mm_mask_max_epi64 (__m128i __W, __mmask8 __M, __m128i __A,
7965		    __m128i __B)
7966{
7967  return (__m128i) __builtin_ia32_pmaxsq128_mask ((__v2di) __A,
7968						  (__v2di) __B,
7969						  (__v2di) __W, __M);
7970}
7971
7972extern __inline __m128i
7973__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7974_mm_min_epi64 (__m128i __A, __m128i __B)
7975{
7976  return (__m128i) __builtin_ia32_pminsq128_mask ((__v2di) __A,
7977						  (__v2di) __B,
7978						  (__v2di)
7979						  _mm_setzero_di (),
7980						  (__mmask8) -1);
7981}
7982
7983extern __inline __m128i
7984__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7985_mm_mask_min_epi64 (__m128i __W, __mmask8 __M, __m128i __A,
7986		    __m128i __B)
7987{
7988  return (__m128i) __builtin_ia32_pminsq128_mask ((__v2di) __A,
7989						  (__v2di) __B,
7990						  (__v2di) __W, __M);
7991}
7992
7993extern __inline __m128i
7994__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7995_mm_maskz_min_epi64 (__mmask8 __M, __m128i __A, __m128i __B)
7996{
7997  return (__m128i) __builtin_ia32_pminsq128_mask ((__v2di) __A,
7998						  (__v2di) __B,
7999						  (__v2di)
8000						  _mm_setzero_si128 (),
8001						  __M);
8002}
8003
8004extern __inline __m128i
8005__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8006_mm_maskz_max_epu64 (__mmask8 __M, __m128i __A, __m128i __B)
8007{
8008  return (__m128i) __builtin_ia32_pmaxuq128_mask ((__v2di) __A,
8009						  (__v2di) __B,
8010						  (__v2di)
8011						  _mm_setzero_si128 (),
8012						  __M);
8013}
8014
8015extern __inline __m128i
8016__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8017_mm_max_epi64 (__m128i __A, __m128i __B)
8018{
8019  return (__m128i) __builtin_ia32_pmaxsq128_mask ((__v2di) __A,
8020						  (__v2di) __B,
8021						  (__v2di)
8022						  _mm_setzero_di (),
8023						  (__mmask8) -1);
8024}
8025
8026extern __inline __m128i
8027__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8028_mm_max_epu64 (__m128i __A, __m128i __B)
8029{
8030  return (__m128i) __builtin_ia32_pmaxuq128_mask ((__v2di) __A,
8031						  (__v2di) __B,
8032						  (__v2di)
8033						  _mm_setzero_di (),
8034						  (__mmask8) -1);
8035}
8036
8037extern __inline __m128i
8038__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8039_mm_mask_max_epu64 (__m128i __W, __mmask8 __M, __m128i __A,
8040		    __m128i __B)
8041{
8042  return (__m128i) __builtin_ia32_pmaxuq128_mask ((__v2di) __A,
8043						  (__v2di) __B,
8044						  (__v2di) __W, __M);
8045}
8046
8047extern __inline __m128i
8048__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8049_mm_min_epu64 (__m128i __A, __m128i __B)
8050{
8051  return (__m128i) __builtin_ia32_pminuq128_mask ((__v2di) __A,
8052						  (__v2di) __B,
8053						  (__v2di)
8054						  _mm_setzero_di (),
8055						  (__mmask8) -1);
8056}
8057
8058extern __inline __m128i
8059__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8060_mm_mask_min_epu64 (__m128i __W, __mmask8 __M, __m128i __A,
8061		    __m128i __B)
8062{
8063  return (__m128i) __builtin_ia32_pminuq128_mask ((__v2di) __A,
8064						  (__v2di) __B,
8065						  (__v2di) __W, __M);
8066}
8067
8068extern __inline __m128i
8069__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8070_mm_maskz_min_epu64 (__mmask8 __M, __m128i __A, __m128i __B)
8071{
8072  return (__m128i) __builtin_ia32_pminuq128_mask ((__v2di) __A,
8073						  (__v2di) __B,
8074						  (__v2di)
8075						  _mm_setzero_si128 (),
8076						  __M);
8077}
8078
8079extern __inline __m128i
8080__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8081_mm_maskz_max_epi32 (__mmask8 __M, __m128i __A, __m128i __B)
8082{
8083  return (__m128i) __builtin_ia32_pmaxsd128_mask ((__v4si) __A,
8084						  (__v4si) __B,
8085						  (__v4si)
8086						  _mm_setzero_si128 (),
8087						  __M);
8088}
8089
8090extern __inline __m128i
8091__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8092_mm_mask_max_epi32 (__m128i __W, __mmask8 __M, __m128i __A,
8093		    __m128i __B)
8094{
8095  return (__m128i) __builtin_ia32_pmaxsd128_mask ((__v4si) __A,
8096						  (__v4si) __B,
8097						  (__v4si) __W, __M);
8098}
8099
8100extern __inline __m128i
8101__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8102_mm_maskz_min_epi32 (__mmask8 __M, __m128i __A, __m128i __B)
8103{
8104  return (__m128i) __builtin_ia32_pminsd128_mask ((__v4si) __A,
8105						  (__v4si) __B,
8106						  (__v4si)
8107						  _mm_setzero_si128 (),
8108						  __M);
8109}
8110
8111extern __inline __m128i
8112__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8113_mm_mask_min_epi32 (__m128i __W, __mmask8 __M, __m128i __A,
8114		    __m128i __B)
8115{
8116  return (__m128i) __builtin_ia32_pminsd128_mask ((__v4si) __A,
8117						  (__v4si) __B,
8118						  (__v4si) __W, __M);
8119}
8120
8121extern __inline __m128i
8122__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8123_mm_maskz_max_epu32 (__mmask8 __M, __m128i __A, __m128i __B)
8124{
8125  return (__m128i) __builtin_ia32_pmaxud128_mask ((__v4si) __A,
8126						  (__v4si) __B,
8127						  (__v4si)
8128						  _mm_setzero_si128 (),
8129						  __M);
8130}
8131
8132extern __inline __m128i
8133__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8134_mm_mask_max_epu32 (__m128i __W, __mmask8 __M, __m128i __A,
8135		    __m128i __B)
8136{
8137  return (__m128i) __builtin_ia32_pmaxud128_mask ((__v4si) __A,
8138						  (__v4si) __B,
8139						  (__v4si) __W, __M);
8140}
8141
8142extern __inline __m128i
8143__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8144_mm_maskz_min_epu32 (__mmask8 __M, __m128i __A, __m128i __B)
8145{
8146  return (__m128i) __builtin_ia32_pminud128_mask ((__v4si) __A,
8147						  (__v4si) __B,
8148						  (__v4si)
8149						  _mm_setzero_si128 (),
8150						  __M);
8151}
8152
8153extern __inline __m128i
8154__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8155_mm_mask_min_epu32 (__m128i __W, __mmask8 __M, __m128i __A,
8156		    __m128i __B)
8157{
8158  return (__m128i) __builtin_ia32_pminud128_mask ((__v4si) __A,
8159						  (__v4si) __B,
8160						  (__v4si) __W, __M);
8161}
8162
8163#ifndef __AVX512CD__
8164#pragma GCC push_options
8165#pragma GCC target("avx512vl,avx512cd")
8166#define __DISABLE_AVX512VLCD__
8167#endif
8168
8169extern __inline __m128i
8170__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8171_mm_broadcastmb_epi64 (__mmask8 __A)
8172{
8173  return (__m128i) __builtin_ia32_broadcastmb128 (__A);
8174}
8175
8176extern __inline __m256i
8177__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8178_mm256_broadcastmb_epi64 (__mmask8 __A)
8179{
8180  return (__m256i) __builtin_ia32_broadcastmb256 (__A);
8181}
8182
8183extern __inline __m128i
8184__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8185_mm_broadcastmw_epi32 (__mmask16 __A)
8186{
8187  return (__m128i) __builtin_ia32_broadcastmw128 (__A);
8188}
8189
8190extern __inline __m256i
8191__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8192_mm256_broadcastmw_epi32 (__mmask16 __A)
8193{
8194  return (__m256i) __builtin_ia32_broadcastmw256 (__A);
8195}
8196
8197extern __inline __m256i
8198__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8199_mm256_lzcnt_epi32 (__m256i __A)
8200{
8201  return (__m256i) __builtin_ia32_vplzcntd_256_mask ((__v8si) __A,
8202						     (__v8si)
8203						     _mm256_setzero_si256 (),
8204						     (__mmask8) -1);
8205}
8206
8207extern __inline __m256i
8208__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8209_mm256_mask_lzcnt_epi32 (__m256i __W, __mmask8 __U, __m256i __A)
8210{
8211  return (__m256i) __builtin_ia32_vplzcntd_256_mask ((__v8si) __A,
8212						     (__v8si) __W,
8213						     (__mmask8) __U);
8214}
8215
8216extern __inline __m256i
8217__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8218_mm256_maskz_lzcnt_epi32 (__mmask8 __U, __m256i __A)
8219{
8220  return (__m256i) __builtin_ia32_vplzcntd_256_mask ((__v8si) __A,
8221						     (__v8si)
8222						     _mm256_setzero_si256 (),
8223						     (__mmask8) __U);
8224}
8225
8226extern __inline __m256i
8227__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8228_mm256_lzcnt_epi64 (__m256i __A)
8229{
8230  return (__m256i) __builtin_ia32_vplzcntq_256_mask ((__v4di) __A,
8231						     (__v4di)
8232						     _mm256_setzero_si256 (),
8233						     (__mmask8) -1);
8234}
8235
8236extern __inline __m256i
8237__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8238_mm256_mask_lzcnt_epi64 (__m256i __W, __mmask8 __U, __m256i __A)
8239{
8240  return (__m256i) __builtin_ia32_vplzcntq_256_mask ((__v4di) __A,
8241						     (__v4di) __W,
8242						     (__mmask8) __U);
8243}
8244
8245extern __inline __m256i
8246__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8247_mm256_maskz_lzcnt_epi64 (__mmask8 __U, __m256i __A)
8248{
8249  return (__m256i) __builtin_ia32_vplzcntq_256_mask ((__v4di) __A,
8250						     (__v4di)
8251						     _mm256_setzero_si256 (),
8252						     (__mmask8) __U);
8253}
8254
8255extern __inline __m256i
8256__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8257_mm256_conflict_epi64 (__m256i __A)
8258{
8259  return (__m256i) __builtin_ia32_vpconflictdi_256_mask ((__v4di) __A,
8260							 (__v4di)
8261							 _mm256_setzero_si256 (),
8262							 (__mmask8) -
8263							 1);
8264}
8265
8266extern __inline __m256i
8267__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8268_mm256_mask_conflict_epi64 (__m256i __W, __mmask8 __U, __m256i __A)
8269{
8270  return (__m256i) __builtin_ia32_vpconflictdi_256_mask ((__v4di) __A,
8271							 (__v4di) __W,
8272							 (__mmask8)
8273							 __U);
8274}
8275
8276extern __inline __m256i
8277__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8278_mm256_maskz_conflict_epi64 (__mmask8 __U, __m256i __A)
8279{
8280  return (__m256i) __builtin_ia32_vpconflictdi_256_mask ((__v4di) __A,
8281							 (__v4di)
8282							 _mm256_setzero_si256 (),
8283							 (__mmask8)
8284							 __U);
8285}
8286
8287extern __inline __m256i
8288__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8289_mm256_conflict_epi32 (__m256i __A)
8290{
8291  return (__m256i) __builtin_ia32_vpconflictsi_256_mask ((__v8si) __A,
8292							 (__v8si)
8293							 _mm256_setzero_si256 (),
8294							 (__mmask8) -
8295							 1);
8296}
8297
8298extern __inline __m256i
8299__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8300_mm256_mask_conflict_epi32 (__m256i __W, __mmask8 __U, __m256i __A)
8301{
8302  return (__m256i) __builtin_ia32_vpconflictsi_256_mask ((__v8si) __A,
8303							 (__v8si) __W,
8304							 (__mmask8)
8305							 __U);
8306}
8307
8308extern __inline __m256i
8309__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8310_mm256_maskz_conflict_epi32 (__mmask8 __U, __m256i __A)
8311{
8312  return (__m256i) __builtin_ia32_vpconflictsi_256_mask ((__v8si) __A,
8313							 (__v8si)
8314							 _mm256_setzero_si256 (),
8315							 (__mmask8)
8316							 __U);
8317}
8318
8319extern __inline __m128i
8320__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8321_mm_lzcnt_epi32 (__m128i __A)
8322{
8323  return (__m128i) __builtin_ia32_vplzcntd_128_mask ((__v4si) __A,
8324						     (__v4si)
8325						     _mm_setzero_si128 (),
8326						     (__mmask8) -1);
8327}
8328
8329extern __inline __m128i
8330__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8331_mm_mask_lzcnt_epi32 (__m128i __W, __mmask8 __U, __m128i __A)
8332{
8333  return (__m128i) __builtin_ia32_vplzcntd_128_mask ((__v4si) __A,
8334						     (__v4si) __W,
8335						     (__mmask8) __U);
8336}
8337
8338extern __inline __m128i
8339__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8340_mm_maskz_lzcnt_epi32 (__mmask8 __U, __m128i __A)
8341{
8342  return (__m128i) __builtin_ia32_vplzcntd_128_mask ((__v4si) __A,
8343						     (__v4si)
8344						     _mm_setzero_si128 (),
8345						     (__mmask8) __U);
8346}
8347
8348extern __inline __m128i
8349__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8350_mm_lzcnt_epi64 (__m128i __A)
8351{
8352  return (__m128i) __builtin_ia32_vplzcntq_128_mask ((__v2di) __A,
8353						     (__v2di)
8354						     _mm_setzero_di (),
8355						     (__mmask8) -1);
8356}
8357
8358extern __inline __m128i
8359__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8360_mm_mask_lzcnt_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
8361{
8362  return (__m128i) __builtin_ia32_vplzcntq_128_mask ((__v2di) __A,
8363						     (__v2di) __W,
8364						     (__mmask8) __U);
8365}
8366
8367extern __inline __m128i
8368__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8369_mm_maskz_lzcnt_epi64 (__mmask8 __U, __m128i __A)
8370{
8371  return (__m128i) __builtin_ia32_vplzcntq_128_mask ((__v2di) __A,
8372						     (__v2di)
8373						     _mm_setzero_di (),
8374						     (__mmask8) __U);
8375}
8376
8377extern __inline __m128i
8378__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8379_mm_conflict_epi64 (__m128i __A)
8380{
8381  return (__m128i) __builtin_ia32_vpconflictdi_128_mask ((__v2di) __A,
8382							 (__v2di)
8383							 _mm_setzero_di (),
8384							 (__mmask8) -
8385							 1);
8386}
8387
8388extern __inline __m128i
8389__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8390_mm_mask_conflict_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
8391{
8392  return (__m128i) __builtin_ia32_vpconflictdi_128_mask ((__v2di) __A,
8393							 (__v2di) __W,
8394							 (__mmask8)
8395							 __U);
8396}
8397
8398extern __inline __m128i
8399__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8400_mm_maskz_conflict_epi64 (__mmask8 __U, __m128i __A)
8401{
8402  return (__m128i) __builtin_ia32_vpconflictdi_128_mask ((__v2di) __A,
8403							 (__v2di)
8404							 _mm_setzero_di (),
8405							 (__mmask8)
8406							 __U);
8407}
8408
8409extern __inline __m128i
8410__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8411_mm_conflict_epi32 (__m128i __A)
8412{
8413  return (__m128i) __builtin_ia32_vpconflictsi_128_mask ((__v4si) __A,
8414							 (__v4si)
8415							 _mm_setzero_si128 (),
8416							 (__mmask8) -
8417							 1);
8418}
8419
8420extern __inline __m128i
8421__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8422_mm_mask_conflict_epi32 (__m128i __W, __mmask8 __U, __m128i __A)
8423{
8424  return (__m128i) __builtin_ia32_vpconflictsi_128_mask ((__v4si) __A,
8425							 (__v4si) __W,
8426							 (__mmask8)
8427							 __U);
8428}
8429
8430extern __inline __m128i
8431__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8432_mm_maskz_conflict_epi32 (__mmask8 __U, __m128i __A)
8433{
8434  return (__m128i) __builtin_ia32_vpconflictsi_128_mask ((__v4si) __A,
8435							 (__v4si)
8436							 _mm_setzero_si128 (),
8437							 (__mmask8)
8438							 __U);
8439}
8440
8441#ifdef __DISABLE_AVX512VLCD__
8442#pragma GCC pop_options
8443#endif
8444
8445extern __inline __m256d
8446__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8447_mm256_mask_unpacklo_pd (__m256d __W, __mmask8 __U, __m256d __A,
8448			 __m256d __B)
8449{
8450  return (__m256d) __builtin_ia32_unpcklpd256_mask ((__v4df) __A,
8451						    (__v4df) __B,
8452						    (__v4df) __W,
8453						    (__mmask8) __U);
8454}
8455
8456extern __inline __m256d
8457__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8458_mm256_maskz_unpacklo_pd (__mmask8 __U, __m256d __A, __m256d __B)
8459{
8460  return (__m256d) __builtin_ia32_unpcklpd256_mask ((__v4df) __A,
8461						    (__v4df) __B,
8462						    (__v4df)
8463						    _mm256_setzero_pd (),
8464						    (__mmask8) __U);
8465}
8466
8467extern __inline __m128d
8468__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8469_mm_mask_unpacklo_pd (__m128d __W, __mmask8 __U, __m128d __A,
8470		      __m128d __B)
8471{
8472  return (__m128d) __builtin_ia32_unpcklpd128_mask ((__v2df) __A,
8473						    (__v2df) __B,
8474						    (__v2df) __W,
8475						    (__mmask8) __U);
8476}
8477
8478extern __inline __m128d
8479__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8480_mm_maskz_unpacklo_pd (__mmask8 __U, __m128d __A, __m128d __B)
8481{
8482  return (__m128d) __builtin_ia32_unpcklpd128_mask ((__v2df) __A,
8483						    (__v2df) __B,
8484						    (__v2df)
8485						    _mm_setzero_pd (),
8486						    (__mmask8) __U);
8487}
8488
8489extern __inline __m256
8490__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8491_mm256_mask_unpacklo_ps (__m256 __W, __mmask8 __U, __m256 __A,
8492			 __m256 __B)
8493{
8494  return (__m256) __builtin_ia32_unpcklps256_mask ((__v8sf) __A,
8495						   (__v8sf) __B,
8496						   (__v8sf) __W,
8497						   (__mmask8) __U);
8498}
8499
8500extern __inline __m256d
8501__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8502_mm256_mask_unpackhi_pd (__m256d __W, __mmask8 __U, __m256d __A,
8503			 __m256d __B)
8504{
8505  return (__m256d) __builtin_ia32_unpckhpd256_mask ((__v4df) __A,
8506						    (__v4df) __B,
8507						    (__v4df) __W,
8508						    (__mmask8) __U);
8509}
8510
8511extern __inline __m256d
8512__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8513_mm256_maskz_unpackhi_pd (__mmask8 __U, __m256d __A, __m256d __B)
8514{
8515  return (__m256d) __builtin_ia32_unpckhpd256_mask ((__v4df) __A,
8516						    (__v4df) __B,
8517						    (__v4df)
8518						    _mm256_setzero_pd (),
8519						    (__mmask8) __U);
8520}
8521
8522extern __inline __m128d
8523__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8524_mm_mask_unpackhi_pd (__m128d __W, __mmask8 __U, __m128d __A,
8525		      __m128d __B)
8526{
8527  return (__m128d) __builtin_ia32_unpckhpd128_mask ((__v2df) __A,
8528						    (__v2df) __B,
8529						    (__v2df) __W,
8530						    (__mmask8) __U);
8531}
8532
8533extern __inline __m128d
8534__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8535_mm_maskz_unpackhi_pd (__mmask8 __U, __m128d __A, __m128d __B)
8536{
8537  return (__m128d) __builtin_ia32_unpckhpd128_mask ((__v2df) __A,
8538						    (__v2df) __B,
8539						    (__v2df)
8540						    _mm_setzero_pd (),
8541						    (__mmask8) __U);
8542}
8543
8544extern __inline __m256
8545__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8546_mm256_mask_unpackhi_ps (__m256 __W, __mmask8 __U, __m256 __A,
8547			 __m256 __B)
8548{
8549  return (__m256) __builtin_ia32_unpckhps256_mask ((__v8sf) __A,
8550						   (__v8sf) __B,
8551						   (__v8sf) __W,
8552						   (__mmask8) __U);
8553}
8554
8555extern __inline __m256
8556__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8557_mm256_maskz_unpackhi_ps (__mmask8 __U, __m256 __A, __m256 __B)
8558{
8559  return (__m256) __builtin_ia32_unpckhps256_mask ((__v8sf) __A,
8560						   (__v8sf) __B,
8561						   (__v8sf)
8562						   _mm256_setzero_ps (),
8563						   (__mmask8) __U);
8564}
8565
8566extern __inline __m128
8567__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8568_mm_mask_unpackhi_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
8569{
8570  return (__m128) __builtin_ia32_unpckhps128_mask ((__v4sf) __A,
8571						   (__v4sf) __B,
8572						   (__v4sf) __W,
8573						   (__mmask8) __U);
8574}
8575
8576extern __inline __m128
8577__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8578_mm_maskz_unpackhi_ps (__mmask8 __U, __m128 __A, __m128 __B)
8579{
8580  return (__m128) __builtin_ia32_unpckhps128_mask ((__v4sf) __A,
8581						   (__v4sf) __B,
8582						   (__v4sf)
8583						   _mm_setzero_ps (),
8584						   (__mmask8) __U);
8585}
8586
8587extern __inline __m128
8588__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8589_mm_mask_cvtph_ps (__m128 __W, __mmask8 __U, __m128i __A)
8590{
8591  return (__m128) __builtin_ia32_vcvtph2ps_mask ((__v8hi) __A,
8592						 (__v4sf) __W,
8593						 (__mmask8) __U);
8594}
8595
8596extern __inline __m128
8597__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8598_mm_maskz_cvtph_ps (__mmask8 __U, __m128i __A)
8599{
8600  return (__m128) __builtin_ia32_vcvtph2ps_mask ((__v8hi) __A,
8601						 (__v4sf)
8602						 _mm_setzero_ps (),
8603						 (__mmask8) __U);
8604}
8605
8606extern __inline __m256
8607__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8608_mm256_maskz_unpacklo_ps (__mmask8 __U, __m256 __A, __m256 __B)
8609{
8610  return (__m256) __builtin_ia32_unpcklps256_mask ((__v8sf) __A,
8611						   (__v8sf) __B,
8612						   (__v8sf)
8613						   _mm256_setzero_ps (),
8614						   (__mmask8) __U);
8615}
8616
8617extern __inline __m256
8618__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8619_mm256_mask_cvtph_ps (__m256 __W, __mmask8 __U, __m128i __A)
8620{
8621  return (__m256) __builtin_ia32_vcvtph2ps256_mask ((__v8hi) __A,
8622						    (__v8sf) __W,
8623						    (__mmask8) __U);
8624}
8625
8626extern __inline __m256
8627__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8628_mm256_maskz_cvtph_ps (__mmask8 __U, __m128i __A)
8629{
8630  return (__m256) __builtin_ia32_vcvtph2ps256_mask ((__v8hi) __A,
8631						    (__v8sf)
8632						    _mm256_setzero_ps (),
8633						    (__mmask8) __U);
8634}
8635
8636extern __inline __m128
8637__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8638_mm_mask_unpacklo_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
8639{
8640  return (__m128) __builtin_ia32_unpcklps128_mask ((__v4sf) __A,
8641						   (__v4sf) __B,
8642						   (__v4sf) __W,
8643						   (__mmask8) __U);
8644}
8645
8646extern __inline __m128
8647__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8648_mm_maskz_unpacklo_ps (__mmask8 __U, __m128 __A, __m128 __B)
8649{
8650  return (__m128) __builtin_ia32_unpcklps128_mask ((__v4sf) __A,
8651						   (__v4sf) __B,
8652						   (__v4sf)
8653						   _mm_setzero_ps (),
8654						   (__mmask8) __U);
8655}
8656
8657extern __inline __m256i
8658__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8659_mm256_mask_sra_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
8660		       __m128i __B)
8661{
8662  return (__m256i) __builtin_ia32_psrad256_mask ((__v8si) __A,
8663						 (__v4si) __B,
8664						 (__v8si) __W,
8665						 (__mmask8) __U);
8666}
8667
8668extern __inline __m256i
8669__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8670_mm256_maskz_sra_epi32 (__mmask8 __U, __m256i __A, __m128i __B)
8671{
8672  return (__m256i) __builtin_ia32_psrad256_mask ((__v8si) __A,
8673						 (__v4si) __B,
8674						 (__v8si)
8675						 _mm256_setzero_si256 (),
8676						 (__mmask8) __U);
8677}
8678
8679extern __inline __m128i
8680__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8681_mm_mask_sra_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
8682		    __m128i __B)
8683{
8684  return (__m128i) __builtin_ia32_psrad128_mask ((__v4si) __A,
8685						 (__v4si) __B,
8686						 (__v4si) __W,
8687						 (__mmask8) __U);
8688}
8689
8690extern __inline __m128i
8691__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8692_mm_maskz_sra_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
8693{
8694  return (__m128i) __builtin_ia32_psrad128_mask ((__v4si) __A,
8695						 (__v4si) __B,
8696						 (__v4si)
8697						 _mm_setzero_si128 (),
8698						 (__mmask8) __U);
8699}
8700
8701extern __inline __m256i
8702__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8703_mm256_sra_epi64 (__m256i __A, __m128i __B)
8704{
8705  return (__m256i) __builtin_ia32_psraq256_mask ((__v4di) __A,
8706						 (__v2di) __B,
8707						 (__v4di)
8708						 _mm256_setzero_si256 (),
8709						 (__mmask8) -1);
8710}
8711
8712extern __inline __m256i
8713__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8714_mm256_mask_sra_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
8715		       __m128i __B)
8716{
8717  return (__m256i) __builtin_ia32_psraq256_mask ((__v4di) __A,
8718						 (__v2di) __B,
8719						 (__v4di) __W,
8720						 (__mmask8) __U);
8721}
8722
8723extern __inline __m256i
8724__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8725_mm256_maskz_sra_epi64 (__mmask8 __U, __m256i __A, __m128i __B)
8726{
8727  return (__m256i) __builtin_ia32_psraq256_mask ((__v4di) __A,
8728						 (__v2di) __B,
8729						 (__v4di)
8730						 _mm256_setzero_si256 (),
8731						 (__mmask8) __U);
8732}
8733
8734extern __inline __m128i
8735__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8736_mm_sra_epi64 (__m128i __A, __m128i __B)
8737{
8738  return (__m128i) __builtin_ia32_psraq128_mask ((__v2di) __A,
8739						 (__v2di) __B,
8740						 (__v2di)
8741						 _mm_setzero_di (),
8742						 (__mmask8) -1);
8743}
8744
8745extern __inline __m128i
8746__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8747_mm_mask_sra_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
8748		    __m128i __B)
8749{
8750  return (__m128i) __builtin_ia32_psraq128_mask ((__v2di) __A,
8751						 (__v2di) __B,
8752						 (__v2di) __W,
8753						 (__mmask8) __U);
8754}
8755
8756extern __inline __m128i
8757__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8758_mm_maskz_sra_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
8759{
8760  return (__m128i) __builtin_ia32_psraq128_mask ((__v2di) __A,
8761						 (__v2di) __B,
8762						 (__v2di)
8763						 _mm_setzero_di (),
8764						 (__mmask8) __U);
8765}
8766
8767extern __inline __m128i
8768__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8769_mm_mask_sll_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
8770		    __m128i __B)
8771{
8772  return (__m128i) __builtin_ia32_pslld128_mask ((__v4si) __A,
8773						 (__v4si) __B,
8774						 (__v4si) __W,
8775						 (__mmask8) __U);
8776}
8777
8778extern __inline __m128i
8779__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8780_mm_maskz_sll_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
8781{
8782  return (__m128i) __builtin_ia32_pslld128_mask ((__v4si) __A,
8783						 (__v4si) __B,
8784						 (__v4si)
8785						 _mm_setzero_si128 (),
8786						 (__mmask8) __U);
8787}
8788
8789extern __inline __m128i
8790__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8791_mm_mask_sll_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
8792		    __m128i __B)
8793{
8794  return (__m128i) __builtin_ia32_psllq128_mask ((__v2di) __A,
8795						 (__v2di) __B,
8796						 (__v2di) __W,
8797						 (__mmask8) __U);
8798}
8799
8800extern __inline __m128i
8801__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8802_mm_maskz_sll_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
8803{
8804  return (__m128i) __builtin_ia32_psllq128_mask ((__v2di) __A,
8805						 (__v2di) __B,
8806						 (__v2di)
8807						 _mm_setzero_di (),
8808						 (__mmask8) __U);
8809}
8810
8811extern __inline __m256i
8812__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8813_mm256_mask_sll_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
8814		       __m128i __B)
8815{
8816  return (__m256i) __builtin_ia32_pslld256_mask ((__v8si) __A,
8817						 (__v4si) __B,
8818						 (__v8si) __W,
8819						 (__mmask8) __U);
8820}
8821
8822extern __inline __m256i
8823__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8824_mm256_maskz_sll_epi32 (__mmask8 __U, __m256i __A, __m128i __B)
8825{
8826  return (__m256i) __builtin_ia32_pslld256_mask ((__v8si) __A,
8827						 (__v4si) __B,
8828						 (__v8si)
8829						 _mm256_setzero_si256 (),
8830						 (__mmask8) __U);
8831}
8832
8833extern __inline __m256i
8834__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8835_mm256_mask_sll_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
8836		       __m128i __B)
8837{
8838  return (__m256i) __builtin_ia32_psllq256_mask ((__v4di) __A,
8839						 (__v2di) __B,
8840						 (__v4di) __W,
8841						 (__mmask8) __U);
8842}
8843
8844extern __inline __m256i
8845__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8846_mm256_maskz_sll_epi64 (__mmask8 __U, __m256i __A, __m128i __B)
8847{
8848  return (__m256i) __builtin_ia32_psllq256_mask ((__v4di) __A,
8849						 (__v2di) __B,
8850						 (__v4di)
8851						 _mm256_setzero_si256 (),
8852						 (__mmask8) __U);
8853}
8854
8855extern __inline __m256
8856__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8857_mm256_mask_permutexvar_ps (__m256 __W, __mmask8 __U, __m256i __X,
8858			    __m256 __Y)
8859{
8860  return (__m256) __builtin_ia32_permvarsf256_mask ((__v8sf) __Y,
8861						    (__v8si) __X,
8862						    (__v8sf) __W,
8863						    (__mmask8) __U);
8864}
8865
8866extern __inline __m256
8867__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8868_mm256_maskz_permutexvar_ps (__mmask8 __U, __m256i __X, __m256 __Y)
8869{
8870  return (__m256) __builtin_ia32_permvarsf256_mask ((__v8sf) __Y,
8871						    (__v8si) __X,
8872						    (__v8sf)
8873						    _mm256_setzero_ps (),
8874						    (__mmask8) __U);
8875}
8876
8877extern __inline __m256d
8878__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8879_mm256_permutexvar_pd (__m256i __X, __m256d __Y)
8880{
8881  return (__m256d) __builtin_ia32_permvardf256_mask ((__v4df) __Y,
8882						     (__v4di) __X,
8883						     (__v4df)
8884						     _mm256_setzero_pd (),
8885						     (__mmask8) -1);
8886}
8887
8888extern __inline __m256d
8889__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8890_mm256_mask_permutexvar_pd (__m256d __W, __mmask8 __U, __m256i __X,
8891			    __m256d __Y)
8892{
8893  return (__m256d) __builtin_ia32_permvardf256_mask ((__v4df) __Y,
8894						     (__v4di) __X,
8895						     (__v4df) __W,
8896						     (__mmask8) __U);
8897}
8898
8899extern __inline __m256d
8900__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8901_mm256_maskz_permutexvar_pd (__mmask8 __U, __m256i __X, __m256d __Y)
8902{
8903  return (__m256d) __builtin_ia32_permvardf256_mask ((__v4df) __Y,
8904						     (__v4di) __X,
8905						     (__v4df)
8906						     _mm256_setzero_pd (),
8907						     (__mmask8) __U);
8908}
8909
8910extern __inline __m256d
8911__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8912_mm256_mask_permutevar_pd (__m256d __W, __mmask8 __U, __m256d __A,
8913			   __m256i __C)
8914{
8915  return (__m256d) __builtin_ia32_vpermilvarpd256_mask ((__v4df) __A,
8916							(__v4di) __C,
8917							(__v4df) __W,
8918							(__mmask8)
8919							__U);
8920}
8921
8922extern __inline __m256d
8923__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8924_mm256_maskz_permutevar_pd (__mmask8 __U, __m256d __A, __m256i __C)
8925{
8926  return (__m256d) __builtin_ia32_vpermilvarpd256_mask ((__v4df) __A,
8927							(__v4di) __C,
8928							(__v4df)
8929							_mm256_setzero_pd (),
8930							(__mmask8)
8931							__U);
8932}
8933
8934extern __inline __m256
8935__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8936_mm256_mask_permutevar_ps (__m256 __W, __mmask8 __U, __m256 __A,
8937			   __m256i __C)
8938{
8939  return (__m256) __builtin_ia32_vpermilvarps256_mask ((__v8sf) __A,
8940						       (__v8si) __C,
8941						       (__v8sf) __W,
8942						       (__mmask8) __U);
8943}
8944
8945extern __inline __m256
8946__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8947_mm256_maskz_permutevar_ps (__mmask8 __U, __m256 __A, __m256i __C)
8948{
8949  return (__m256) __builtin_ia32_vpermilvarps256_mask ((__v8sf) __A,
8950						       (__v8si) __C,
8951						       (__v8sf)
8952						       _mm256_setzero_ps (),
8953						       (__mmask8) __U);
8954}
8955
8956extern __inline __m128d
8957__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8958_mm_mask_permutevar_pd (__m128d __W, __mmask8 __U, __m128d __A,
8959			__m128i __C)
8960{
8961  return (__m128d) __builtin_ia32_vpermilvarpd_mask ((__v2df) __A,
8962						     (__v2di) __C,
8963						     (__v2df) __W,
8964						     (__mmask8) __U);
8965}
8966
8967extern __inline __m128d
8968__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8969_mm_maskz_permutevar_pd (__mmask8 __U, __m128d __A, __m128i __C)
8970{
8971  return (__m128d) __builtin_ia32_vpermilvarpd_mask ((__v2df) __A,
8972						     (__v2di) __C,
8973						     (__v2df)
8974						     _mm_setzero_pd (),
8975						     (__mmask8) __U);
8976}
8977
8978extern __inline __m128
8979__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8980_mm_mask_permutevar_ps (__m128 __W, __mmask8 __U, __m128 __A,
8981			__m128i __C)
8982{
8983  return (__m128) __builtin_ia32_vpermilvarps_mask ((__v4sf) __A,
8984						    (__v4si) __C,
8985						    (__v4sf) __W,
8986						    (__mmask8) __U);
8987}
8988
8989extern __inline __m128
8990__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8991_mm_maskz_permutevar_ps (__mmask8 __U, __m128 __A, __m128i __C)
8992{
8993  return (__m128) __builtin_ia32_vpermilvarps_mask ((__v4sf) __A,
8994						    (__v4si) __C,
8995						    (__v4sf)
8996						    _mm_setzero_ps (),
8997						    (__mmask8) __U);
8998}
8999
9000extern __inline __m256i
9001__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9002_mm256_maskz_mullo_epi32 (__mmask8 __M, __m256i __A, __m256i __B)
9003{
9004  return (__m256i) __builtin_ia32_pmulld256_mask ((__v8si) __A,
9005						  (__v8si) __B,
9006						  (__v8si)
9007						  _mm256_setzero_si256 (),
9008						  __M);
9009}
9010
9011extern __inline __m256i
9012__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9013_mm256_maskz_permutexvar_epi64 (__mmask8 __M, __m256i __X, __m256i __Y)
9014{
9015  return (__m256i) __builtin_ia32_permvardi256_mask ((__v4di) __Y,
9016						     (__v4di) __X,
9017						     (__v4di)
9018						     _mm256_setzero_si256 (),
9019						     __M);
9020}
9021
9022extern __inline __m256i
9023__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9024_mm256_mask_mullo_epi32 (__m256i __W, __mmask8 __M, __m256i __A,
9025			 __m256i __B)
9026{
9027  return (__m256i) __builtin_ia32_pmulld256_mask ((__v8si) __A,
9028						  (__v8si) __B,
9029						  (__v8si) __W, __M);
9030}
9031
9032extern __inline __m128i
9033__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9034_mm_maskz_mullo_epi32 (__mmask8 __M, __m128i __A, __m128i __B)
9035{
9036  return (__m128i) __builtin_ia32_pmulld128_mask ((__v4si) __A,
9037						  (__v4si) __B,
9038						  (__v4si)
9039						  _mm_setzero_si128 (),
9040						  __M);
9041}
9042
9043extern __inline __m128i
9044__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9045_mm_mask_mullo_epi32 (__m128i __W, __mmask16 __M, __m128i __A,
9046		      __m128i __B)
9047{
9048  return (__m128i) __builtin_ia32_pmulld128_mask ((__v4si) __A,
9049						  (__v4si) __B,
9050						  (__v4si) __W, __M);
9051}
9052
9053extern __inline __m256i
9054__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9055_mm256_mask_mul_epi32 (__m256i __W, __mmask8 __M, __m256i __X,
9056		       __m256i __Y)
9057{
9058  return (__m256i) __builtin_ia32_pmuldq256_mask ((__v8si) __X,
9059						  (__v8si) __Y,
9060						  (__v4di) __W, __M);
9061}
9062
9063extern __inline __m256i
9064__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9065_mm256_maskz_mul_epi32 (__mmask8 __M, __m256i __X, __m256i __Y)
9066{
9067  return (__m256i) __builtin_ia32_pmuldq256_mask ((__v8si) __X,
9068						  (__v8si) __Y,
9069						  (__v4di)
9070						  _mm256_setzero_si256 (),
9071						  __M);
9072}
9073
9074extern __inline __m128i
9075__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9076_mm_mask_mul_epi32 (__m128i __W, __mmask8 __M, __m128i __X,
9077		    __m128i __Y)
9078{
9079  return (__m128i) __builtin_ia32_pmuldq128_mask ((__v4si) __X,
9080						  (__v4si) __Y,
9081						  (__v2di) __W, __M);
9082}
9083
9084extern __inline __m128i
9085__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9086_mm_maskz_mul_epi32 (__mmask8 __M, __m128i __X, __m128i __Y)
9087{
9088  return (__m128i) __builtin_ia32_pmuldq128_mask ((__v4si) __X,
9089						  (__v4si) __Y,
9090						  (__v2di)
9091						  _mm_setzero_si128 (),
9092						  __M);
9093}
9094
9095extern __inline __m256i
9096__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9097_mm256_mask_permutexvar_epi64 (__m256i __W, __mmask8 __M, __m256i __X,
9098			       __m256i __Y)
9099{
9100  return (__m256i) __builtin_ia32_permvardi256_mask ((__v4di) __Y,
9101						     (__v4di) __X,
9102						     (__v4di) __W,
9103						     __M);
9104}
9105
9106extern __inline __m256i
9107__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9108_mm256_mask_mul_epu32 (__m256i __W, __mmask8 __M, __m256i __X,
9109		       __m256i __Y)
9110{
9111  return (__m256i) __builtin_ia32_pmuludq256_mask ((__v8si) __X,
9112						   (__v8si) __Y,
9113						   (__v4di) __W, __M);
9114}
9115
9116extern __inline __m256i
9117__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9118_mm256_maskz_permutexvar_epi32 (__mmask8 __M, __m256i __X, __m256i __Y)
9119{
9120  return (__m256i) __builtin_ia32_permvarsi256_mask ((__v8si) __Y,
9121						     (__v8si) __X,
9122						     (__v8si)
9123						     _mm256_setzero_si256 (),
9124						     __M);
9125}
9126
9127extern __inline __m256i
9128__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9129_mm256_maskz_mul_epu32 (__mmask8 __M, __m256i __X, __m256i __Y)
9130{
9131  return (__m256i) __builtin_ia32_pmuludq256_mask ((__v8si) __X,
9132						   (__v8si) __Y,
9133						   (__v4di)
9134						   _mm256_setzero_si256 (),
9135						   __M);
9136}
9137
9138extern __inline __m128i
9139__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9140_mm_mask_mul_epu32 (__m128i __W, __mmask8 __M, __m128i __X,
9141		    __m128i __Y)
9142{
9143  return (__m128i) __builtin_ia32_pmuludq128_mask ((__v4si) __X,
9144						   (__v4si) __Y,
9145						   (__v2di) __W, __M);
9146}
9147
9148extern __inline __m128i
9149__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9150_mm_maskz_mul_epu32 (__mmask8 __M, __m128i __X, __m128i __Y)
9151{
9152  return (__m128i) __builtin_ia32_pmuludq128_mask ((__v4si) __X,
9153						   (__v4si) __Y,
9154						   (__v2di)
9155						   _mm_setzero_si128 (),
9156						   __M);
9157}
9158
9159extern __inline __m256i
9160__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9161_mm256_mask_permutexvar_epi32 (__m256i __W, __mmask8 __M, __m256i __X,
9162			       __m256i __Y)
9163{
9164  return (__m256i) __builtin_ia32_permvarsi256_mask ((__v8si) __Y,
9165						     (__v8si) __X,
9166						     (__v8si) __W,
9167						     __M);
9168}
9169
9170#ifdef __OPTIMIZE__
9171extern __inline __m256i
9172__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9173_mm256_mask_permutex_epi64 (__m256i __W, __mmask8 __M,
9174			    __m256i __X, const int __I)
9175{
9176  return (__m256i) __builtin_ia32_permdi256_mask ((__v4di) __X,
9177						  __I,
9178						  (__v4di) __W,
9179						  (__mmask8) __M);
9180}
9181
9182extern __inline __m256i
9183__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9184_mm256_maskz_permutex_epi64 (__mmask8 __M, __m256i __X, const int __I)
9185{
9186  return (__m256i) __builtin_ia32_permdi256_mask ((__v4di) __X,
9187						  __I,
9188						  (__v4di)
9189						  _mm256_setzero_si256 (),
9190						  (__mmask8) __M);
9191}
9192
9193extern __inline __m256d
9194__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9195_mm256_mask_shuffle_pd (__m256d __W, __mmask8 __U, __m256d __A,
9196			__m256d __B, const int __imm)
9197{
9198  return (__m256d) __builtin_ia32_shufpd256_mask ((__v4df) __A,
9199						  (__v4df) __B, __imm,
9200						  (__v4df) __W,
9201						  (__mmask8) __U);
9202}
9203
9204extern __inline __m256d
9205__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9206_mm256_maskz_shuffle_pd (__mmask8 __U, __m256d __A, __m256d __B,
9207			 const int __imm)
9208{
9209  return (__m256d) __builtin_ia32_shufpd256_mask ((__v4df) __A,
9210						  (__v4df) __B, __imm,
9211						  (__v4df)
9212						  _mm256_setzero_pd (),
9213						  (__mmask8) __U);
9214}
9215
9216extern __inline __m128d
9217__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9218_mm_mask_shuffle_pd (__m128d __W, __mmask8 __U, __m128d __A,
9219		     __m128d __B, const int __imm)
9220{
9221  return (__m128d) __builtin_ia32_shufpd128_mask ((__v2df) __A,
9222						  (__v2df) __B, __imm,
9223						  (__v2df) __W,
9224						  (__mmask8) __U);
9225}
9226
9227extern __inline __m128d
9228__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9229_mm_maskz_shuffle_pd (__mmask8 __U, __m128d __A, __m128d __B,
9230		      const int __imm)
9231{
9232  return (__m128d) __builtin_ia32_shufpd128_mask ((__v2df) __A,
9233						  (__v2df) __B, __imm,
9234						  (__v2df)
9235						  _mm_setzero_pd (),
9236						  (__mmask8) __U);
9237}
9238
9239extern __inline __m256
9240__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9241_mm256_mask_shuffle_ps (__m256 __W, __mmask8 __U, __m256 __A,
9242			__m256 __B, const int __imm)
9243{
9244  return (__m256) __builtin_ia32_shufps256_mask ((__v8sf) __A,
9245						 (__v8sf) __B, __imm,
9246						 (__v8sf) __W,
9247						 (__mmask8) __U);
9248}
9249
9250extern __inline __m256
9251__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9252_mm256_maskz_shuffle_ps (__mmask8 __U, __m256 __A, __m256 __B,
9253			 const int __imm)
9254{
9255  return (__m256) __builtin_ia32_shufps256_mask ((__v8sf) __A,
9256						 (__v8sf) __B, __imm,
9257						 (__v8sf)
9258						 _mm256_setzero_ps (),
9259						 (__mmask8) __U);
9260}
9261
9262extern __inline __m128
9263__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9264_mm_mask_shuffle_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B,
9265		     const int __imm)
9266{
9267  return (__m128) __builtin_ia32_shufps128_mask ((__v4sf) __A,
9268						 (__v4sf) __B, __imm,
9269						 (__v4sf) __W,
9270						 (__mmask8) __U);
9271}
9272
9273extern __inline __m128
9274__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9275_mm_maskz_shuffle_ps (__mmask8 __U, __m128 __A, __m128 __B,
9276		      const int __imm)
9277{
9278  return (__m128) __builtin_ia32_shufps128_mask ((__v4sf) __A,
9279						 (__v4sf) __B, __imm,
9280						 (__v4sf)
9281						 _mm_setzero_ps (),
9282						 (__mmask8) __U);
9283}
9284
9285extern __inline __m256i
9286__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9287_mm256_inserti32x4 (__m256i __A, __m128i __B, const int __imm)
9288{
9289  return (__m256i) __builtin_ia32_inserti32x4_256_mask ((__v8si) __A,
9290							(__v4si) __B,
9291							__imm,
9292							(__v8si)
9293							_mm256_setzero_si256 (),
9294							(__mmask8) -
9295							1);
9296}
9297
9298extern __inline __m256i
9299__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9300_mm256_mask_inserti32x4 (__m256i __W, __mmask8 __U, __m256i __A,
9301			 __m128i __B, const int __imm)
9302{
9303  return (__m256i) __builtin_ia32_inserti32x4_256_mask ((__v8si) __A,
9304							(__v4si) __B,
9305							__imm,
9306							(__v8si) __W,
9307							(__mmask8)
9308							__U);
9309}
9310
9311extern __inline __m256i
9312__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9313_mm256_maskz_inserti32x4 (__mmask8 __U, __m256i __A, __m128i __B,
9314			  const int __imm)
9315{
9316  return (__m256i) __builtin_ia32_inserti32x4_256_mask ((__v8si) __A,
9317							(__v4si) __B,
9318							__imm,
9319							(__v8si)
9320							_mm256_setzero_si256 (),
9321							(__mmask8)
9322							__U);
9323}
9324
9325extern __inline __m256
9326__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9327_mm256_insertf32x4 (__m256 __A, __m128 __B, const int __imm)
9328{
9329  return (__m256) __builtin_ia32_insertf32x4_256_mask ((__v8sf) __A,
9330						       (__v4sf) __B,
9331						       __imm,
9332						       (__v8sf)
9333						       _mm256_setzero_ps (),
9334						       (__mmask8) -1);
9335}
9336
9337extern __inline __m256
9338__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9339_mm256_mask_insertf32x4 (__m256 __W, __mmask8 __U, __m256 __A,
9340			 __m128 __B, const int __imm)
9341{
9342  return (__m256) __builtin_ia32_insertf32x4_256_mask ((__v8sf) __A,
9343						       (__v4sf) __B,
9344						       __imm,
9345						       (__v8sf) __W,
9346						       (__mmask8) __U);
9347}
9348
9349extern __inline __m256
9350__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9351_mm256_maskz_insertf32x4 (__mmask8 __U, __m256 __A, __m128 __B,
9352			  const int __imm)
9353{
9354  return (__m256) __builtin_ia32_insertf32x4_256_mask ((__v8sf) __A,
9355						       (__v4sf) __B,
9356						       __imm,
9357						       (__v8sf)
9358						       _mm256_setzero_ps (),
9359						       (__mmask8) __U);
9360}
9361
9362extern __inline __m128i
9363__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9364_mm256_extracti32x4_epi32 (__m256i __A, const int __imm)
9365{
9366  return (__m128i) __builtin_ia32_extracti32x4_256_mask ((__v8si) __A,
9367							 __imm,
9368							 (__v4si)
9369							 _mm_setzero_si128 (),
9370							 (__mmask8) -
9371							 1);
9372}
9373
9374extern __inline __m128i
9375__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9376_mm256_mask_extracti32x4_epi32 (__m128i __W, __mmask8 __U, __m256i __A,
9377				const int __imm)
9378{
9379  return (__m128i) __builtin_ia32_extracti32x4_256_mask ((__v8si) __A,
9380							 __imm,
9381							 (__v4si) __W,
9382							 (__mmask8)
9383							 __U);
9384}
9385
9386extern __inline __m128i
9387__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9388_mm256_maskz_extracti32x4_epi32 (__mmask8 __U, __m256i __A,
9389				 const int __imm)
9390{
9391  return (__m128i) __builtin_ia32_extracti32x4_256_mask ((__v8si) __A,
9392							 __imm,
9393							 (__v4si)
9394							 _mm_setzero_si128 (),
9395							 (__mmask8)
9396							 __U);
9397}
9398
9399extern __inline __m128
9400__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9401_mm256_extractf32x4_ps (__m256 __A, const int __imm)
9402{
9403  return (__m128) __builtin_ia32_extractf32x4_256_mask ((__v8sf) __A,
9404							__imm,
9405							(__v4sf)
9406							_mm_setzero_ps (),
9407							(__mmask8) -
9408							1);
9409}
9410
9411extern __inline __m128
9412__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9413_mm256_mask_extractf32x4_ps (__m128 __W, __mmask8 __U, __m256 __A,
9414			     const int __imm)
9415{
9416  return (__m128) __builtin_ia32_extractf32x4_256_mask ((__v8sf) __A,
9417							__imm,
9418							(__v4sf) __W,
9419							(__mmask8)
9420							__U);
9421}
9422
9423extern __inline __m128
9424__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9425_mm256_maskz_extractf32x4_ps (__mmask8 __U, __m256 __A,
9426			      const int __imm)
9427{
9428  return (__m128) __builtin_ia32_extractf32x4_256_mask ((__v8sf) __A,
9429							__imm,
9430							(__v4sf)
9431							_mm_setzero_ps (),
9432							(__mmask8)
9433							__U);
9434}
9435
9436extern __inline __m256i
9437__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9438_mm256_shuffle_i64x2 (__m256i __A, __m256i __B, const int __imm)
9439{
9440  return (__m256i) __builtin_ia32_shuf_i64x2_256_mask ((__v4di) __A,
9441						       (__v4di) __B,
9442						       __imm,
9443						       (__v4di)
9444						       _mm256_setzero_si256 (),
9445						       (__mmask8) -1);
9446}
9447
9448extern __inline __m256i
9449__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9450_mm256_mask_shuffle_i64x2 (__m256i __W, __mmask8 __U, __m256i __A,
9451			   __m256i __B, const int __imm)
9452{
9453  return (__m256i) __builtin_ia32_shuf_i64x2_256_mask ((__v4di) __A,
9454						       (__v4di) __B,
9455						       __imm,
9456						       (__v4di) __W,
9457						       (__mmask8) __U);
9458}
9459
9460extern __inline __m256i
9461__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9462_mm256_maskz_shuffle_i64x2 (__mmask8 __U, __m256i __A, __m256i __B,
9463			    const int __imm)
9464{
9465  return (__m256i) __builtin_ia32_shuf_i64x2_256_mask ((__v4di) __A,
9466						       (__v4di) __B,
9467						       __imm,
9468						       (__v4di)
9469						       _mm256_setzero_si256 (),
9470						       (__mmask8) __U);
9471}
9472
9473extern __inline __m256i
9474__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9475_mm256_shuffle_i32x4 (__m256i __A, __m256i __B, const int __imm)
9476{
9477  return (__m256i) __builtin_ia32_shuf_i32x4_256_mask ((__v8si) __A,
9478						       (__v8si) __B,
9479						       __imm,
9480						       (__v8si)
9481						       _mm256_setzero_si256 (),
9482						       (__mmask8) -1);
9483}
9484
9485extern __inline __m256i
9486__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9487_mm256_mask_shuffle_i32x4 (__m256i __W, __mmask8 __U, __m256i __A,
9488			   __m256i __B, const int __imm)
9489{
9490  return (__m256i) __builtin_ia32_shuf_i32x4_256_mask ((__v8si) __A,
9491						       (__v8si) __B,
9492						       __imm,
9493						       (__v8si) __W,
9494						       (__mmask8) __U);
9495}
9496
9497extern __inline __m256i
9498__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9499_mm256_maskz_shuffle_i32x4 (__mmask8 __U, __m256i __A, __m256i __B,
9500			    const int __imm)
9501{
9502  return (__m256i) __builtin_ia32_shuf_i32x4_256_mask ((__v8si) __A,
9503						       (__v8si) __B,
9504						       __imm,
9505						       (__v8si)
9506						       _mm256_setzero_si256 (),
9507						       (__mmask8) __U);
9508}
9509
9510extern __inline __m256d
9511__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9512_mm256_shuffle_f64x2 (__m256d __A, __m256d __B, const int __imm)
9513{
9514  return (__m256d) __builtin_ia32_shuf_f64x2_256_mask ((__v4df) __A,
9515						       (__v4df) __B,
9516						       __imm,
9517						       (__v4df)
9518						       _mm256_setzero_pd (),
9519						       (__mmask8) -1);
9520}
9521
9522extern __inline __m256d
9523__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9524_mm256_mask_shuffle_f64x2 (__m256d __W, __mmask8 __U, __m256d __A,
9525			   __m256d __B, const int __imm)
9526{
9527  return (__m256d) __builtin_ia32_shuf_f64x2_256_mask ((__v4df) __A,
9528						       (__v4df) __B,
9529						       __imm,
9530						       (__v4df) __W,
9531						       (__mmask8) __U);
9532}
9533
9534extern __inline __m256d
9535__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9536_mm256_maskz_shuffle_f64x2 (__mmask8 __U, __m256d __A, __m256d __B,
9537			    const int __imm)
9538{
9539  return (__m256d) __builtin_ia32_shuf_f64x2_256_mask ((__v4df) __A,
9540						       (__v4df) __B,
9541						       __imm,
9542						       (__v4df)
9543						       _mm256_setzero_pd (),
9544						       (__mmask8) __U);
9545}
9546
9547extern __inline __m256
9548__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9549_mm256_shuffle_f32x4 (__m256 __A, __m256 __B, const int __imm)
9550{
9551  return (__m256) __builtin_ia32_shuf_f32x4_256_mask ((__v8sf) __A,
9552						      (__v8sf) __B,
9553						      __imm,
9554						      (__v8sf)
9555						      _mm256_setzero_ps (),
9556						      (__mmask8) -1);
9557}
9558
9559extern __inline __m256
9560__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9561_mm256_mask_shuffle_f32x4 (__m256 __W, __mmask8 __U, __m256 __A,
9562			   __m256 __B, const int __imm)
9563{
9564  return (__m256) __builtin_ia32_shuf_f32x4_256_mask ((__v8sf) __A,
9565						      (__v8sf) __B,
9566						      __imm,
9567						      (__v8sf) __W,
9568						      (__mmask8) __U);
9569}
9570
9571extern __inline __m256
9572__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9573_mm256_maskz_shuffle_f32x4 (__mmask8 __U, __m256 __A, __m256 __B,
9574			    const int __imm)
9575{
9576  return (__m256) __builtin_ia32_shuf_f32x4_256_mask ((__v8sf) __A,
9577						      (__v8sf) __B,
9578						      __imm,
9579						      (__v8sf)
9580						      _mm256_setzero_ps (),
9581						      (__mmask8) __U);
9582}
9583
9584extern __inline __m256d
9585__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9586_mm256_fixupimm_pd (__m256d __A, __m256d __B, __m256i __C,
9587		    const int __imm)
9588{
9589  return (__m256d) __builtin_ia32_fixupimmpd256_mask ((__v4df) __A,
9590						      (__v4df) __B,
9591						      (__v4di) __C,
9592						      __imm,
9593						      (__mmask8) -1);
9594}
9595
9596extern __inline __m256d
9597__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9598_mm256_mask_fixupimm_pd (__m256d __A, __mmask8 __U, __m256d __B,
9599			 __m256i __C, const int __imm)
9600{
9601  return (__m256d) __builtin_ia32_fixupimmpd256_mask ((__v4df) __A,
9602						      (__v4df) __B,
9603						      (__v4di) __C,
9604						      __imm,
9605						      (__mmask8) __U);
9606}
9607
9608extern __inline __m256d
9609__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9610_mm256_maskz_fixupimm_pd (__mmask8 __U, __m256d __A, __m256d __B,
9611			  __m256i __C, const int __imm)
9612{
9613  return (__m256d) __builtin_ia32_fixupimmpd256_maskz ((__v4df) __A,
9614						       (__v4df) __B,
9615						       (__v4di) __C,
9616						       __imm,
9617						       (__mmask8) __U);
9618}
9619
9620extern __inline __m256
9621__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9622_mm256_fixupimm_ps (__m256 __A, __m256 __B, __m256i __C,
9623		    const int __imm)
9624{
9625  return (__m256) __builtin_ia32_fixupimmps256_mask ((__v8sf) __A,
9626						     (__v8sf) __B,
9627						     (__v8si) __C,
9628						     __imm,
9629						     (__mmask8) -1);
9630}
9631
9632extern __inline __m256
9633__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9634_mm256_mask_fixupimm_ps (__m256 __A, __mmask8 __U, __m256 __B,
9635			 __m256i __C, const int __imm)
9636{
9637  return (__m256) __builtin_ia32_fixupimmps256_mask ((__v8sf) __A,
9638						     (__v8sf) __B,
9639						     (__v8si) __C,
9640						     __imm,
9641						     (__mmask8) __U);
9642}
9643
9644extern __inline __m256
9645__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9646_mm256_maskz_fixupimm_ps (__mmask8 __U, __m256 __A, __m256 __B,
9647			  __m256i __C, const int __imm)
9648{
9649  return (__m256) __builtin_ia32_fixupimmps256_maskz ((__v8sf) __A,
9650						      (__v8sf) __B,
9651						      (__v8si) __C,
9652						      __imm,
9653						      (__mmask8) __U);
9654}
9655
9656extern __inline __m128d
9657__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9658_mm_fixupimm_pd (__m128d __A, __m128d __B, __m128i __C,
9659		 const int __imm)
9660{
9661  return (__m128d) __builtin_ia32_fixupimmpd128_mask ((__v2df) __A,
9662						      (__v2df) __B,
9663						      (__v2di) __C,
9664						      __imm,
9665						      (__mmask8) -1);
9666}
9667
9668extern __inline __m128d
9669__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9670_mm_mask_fixupimm_pd (__m128d __A, __mmask8 __U, __m128d __B,
9671		      __m128i __C, const int __imm)
9672{
9673  return (__m128d) __builtin_ia32_fixupimmpd128_mask ((__v2df) __A,
9674						      (__v2df) __B,
9675						      (__v2di) __C,
9676						      __imm,
9677						      (__mmask8) __U);
9678}
9679
9680extern __inline __m128d
9681__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9682_mm_maskz_fixupimm_pd (__mmask8 __U, __m128d __A, __m128d __B,
9683		       __m128i __C, const int __imm)
9684{
9685  return (__m128d) __builtin_ia32_fixupimmpd128_maskz ((__v2df) __A,
9686						       (__v2df) __B,
9687						       (__v2di) __C,
9688						       __imm,
9689						       (__mmask8) __U);
9690}
9691
9692extern __inline __m128
9693__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9694_mm_fixupimm_ps (__m128 __A, __m128 __B, __m128i __C, const int __imm)
9695{
9696  return (__m128) __builtin_ia32_fixupimmps128_mask ((__v4sf) __A,
9697						     (__v4sf) __B,
9698						     (__v4si) __C,
9699						     __imm,
9700						     (__mmask8) -1);
9701}
9702
9703extern __inline __m128
9704__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9705_mm_mask_fixupimm_ps (__m128 __A, __mmask8 __U, __m128 __B,
9706		      __m128i __C, const int __imm)
9707{
9708  return (__m128) __builtin_ia32_fixupimmps128_mask ((__v4sf) __A,
9709						     (__v4sf) __B,
9710						     (__v4si) __C,
9711						     __imm,
9712						     (__mmask8) __U);
9713}
9714
9715extern __inline __m128
9716__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9717_mm_maskz_fixupimm_ps (__mmask8 __U, __m128 __A, __m128 __B,
9718		       __m128i __C, const int __imm)
9719{
9720  return (__m128) __builtin_ia32_fixupimmps128_maskz ((__v4sf) __A,
9721						      (__v4sf) __B,
9722						      (__v4si) __C,
9723						      __imm,
9724						      (__mmask8) __U);
9725}
9726
9727extern __inline __m256i
9728__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9729_mm256_mask_srli_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
9730			const int __imm)
9731{
9732  return (__m256i) __builtin_ia32_psrldi256_mask ((__v8si) __A, __imm,
9733						  (__v8si) __W,
9734						  (__mmask8) __U);
9735}
9736
9737extern __inline __m256i
9738__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9739_mm256_maskz_srli_epi32 (__mmask8 __U, __m256i __A, const int __imm)
9740{
9741  return (__m256i) __builtin_ia32_psrldi256_mask ((__v8si) __A, __imm,
9742						  (__v8si)
9743						  _mm256_setzero_si256 (),
9744						  (__mmask8) __U);
9745}
9746
9747extern __inline __m128i
9748__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9749_mm_mask_srli_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
9750		     const int __imm)
9751{
9752  return (__m128i) __builtin_ia32_psrldi128_mask ((__v4si) __A, __imm,
9753						  (__v4si) __W,
9754						  (__mmask8) __U);
9755}
9756
9757extern __inline __m128i
9758__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9759_mm_maskz_srli_epi32 (__mmask8 __U, __m128i __A, const int __imm)
9760{
9761  return (__m128i) __builtin_ia32_psrldi128_mask ((__v4si) __A, __imm,
9762						  (__v4si)
9763						  _mm_setzero_si128 (),
9764						  (__mmask8) __U);
9765}
9766
9767extern __inline __m256i
9768__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9769_mm256_mask_srli_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
9770			const int __imm)
9771{
9772  return (__m256i) __builtin_ia32_psrlqi256_mask ((__v4di) __A, __imm,
9773						  (__v4di) __W,
9774						  (__mmask8) __U);
9775}
9776
9777extern __inline __m256i
9778__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9779_mm256_maskz_srli_epi64 (__mmask8 __U, __m256i __A, const int __imm)
9780{
9781  return (__m256i) __builtin_ia32_psrlqi256_mask ((__v4di) __A, __imm,
9782						  (__v4di)
9783						  _mm256_setzero_si256 (),
9784						  (__mmask8) __U);
9785}
9786
9787extern __inline __m128i
9788__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9789_mm_mask_srli_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
9790		     const int __imm)
9791{
9792  return (__m128i) __builtin_ia32_psrlqi128_mask ((__v2di) __A, __imm,
9793						  (__v2di) __W,
9794						  (__mmask8) __U);
9795}
9796
9797extern __inline __m128i
9798__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9799_mm_maskz_srli_epi64 (__mmask8 __U, __m128i __A, const int __imm)
9800{
9801  return (__m128i) __builtin_ia32_psrlqi128_mask ((__v2di) __A, __imm,
9802						  (__v2di)
9803						  _mm_setzero_si128 (),
9804						  (__mmask8) __U);
9805}
9806
9807extern __inline __m256i
9808__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9809_mm256_ternarylogic_epi64 (__m256i __A, __m256i __B, __m256i __C,
9810			   const int imm)
9811{
9812  return (__m256i) __builtin_ia32_pternlogq256_mask ((__v4di) __A,
9813						     (__v4di) __B,
9814						     (__v4di) __C, imm,
9815						     (__mmask8) -1);
9816}
9817
9818extern __inline __m256i
9819__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9820_mm256_mask_ternarylogic_epi64 (__m256i __A, __mmask8 __U,
9821				__m256i __B, __m256i __C,
9822				const int imm)
9823{
9824  return (__m256i) __builtin_ia32_pternlogq256_mask ((__v4di) __A,
9825						     (__v4di) __B,
9826						     (__v4di) __C, imm,
9827						     (__mmask8) __U);
9828}
9829
9830extern __inline __m256i
9831__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9832_mm256_maskz_ternarylogic_epi64 (__mmask8 __U, __m256i __A,
9833				 __m256i __B, __m256i __C,
9834				 const int imm)
9835{
9836  return (__m256i) __builtin_ia32_pternlogq256_maskz ((__v4di) __A,
9837						      (__v4di) __B,
9838						      (__v4di) __C,
9839						      imm,
9840						      (__mmask8) __U);
9841}
9842
9843extern __inline __m256i
9844__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9845_mm256_ternarylogic_epi32 (__m256i __A, __m256i __B, __m256i __C,
9846			   const int imm)
9847{
9848  return (__m256i) __builtin_ia32_pternlogd256_mask ((__v8si) __A,
9849						     (__v8si) __B,
9850						     (__v8si) __C, imm,
9851						     (__mmask8) -1);
9852}
9853
9854extern __inline __m256i
9855__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9856_mm256_mask_ternarylogic_epi32 (__m256i __A, __mmask8 __U,
9857				__m256i __B, __m256i __C,
9858				const int imm)
9859{
9860  return (__m256i) __builtin_ia32_pternlogd256_mask ((__v8si) __A,
9861						     (__v8si) __B,
9862						     (__v8si) __C, imm,
9863						     (__mmask8) __U);
9864}
9865
9866extern __inline __m256i
9867__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9868_mm256_maskz_ternarylogic_epi32 (__mmask8 __U, __m256i __A,
9869				 __m256i __B, __m256i __C,
9870				 const int imm)
9871{
9872  return (__m256i) __builtin_ia32_pternlogd256_maskz ((__v8si) __A,
9873						      (__v8si) __B,
9874						      (__v8si) __C,
9875						      imm,
9876						      (__mmask8) __U);
9877}
9878
9879extern __inline __m128i
9880__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9881_mm_ternarylogic_epi64 (__m128i __A, __m128i __B, __m128i __C,
9882			const int imm)
9883{
9884  return (__m128i) __builtin_ia32_pternlogq128_mask ((__v2di) __A,
9885						     (__v2di) __B,
9886						     (__v2di) __C, imm,
9887						     (__mmask8) -1);
9888}
9889
9890extern __inline __m128i
9891__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9892_mm_mask_ternarylogic_epi64 (__m128i __A, __mmask8 __U,
9893			     __m128i __B, __m128i __C, const int imm)
9894{
9895  return (__m128i) __builtin_ia32_pternlogq128_mask ((__v2di) __A,
9896						     (__v2di) __B,
9897						     (__v2di) __C, imm,
9898						     (__mmask8) __U);
9899}
9900
9901extern __inline __m128i
9902__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9903_mm_maskz_ternarylogic_epi64 (__mmask8 __U, __m128i __A,
9904			      __m128i __B, __m128i __C, const int imm)
9905{
9906  return (__m128i) __builtin_ia32_pternlogq128_maskz ((__v2di) __A,
9907						      (__v2di) __B,
9908						      (__v2di) __C,
9909						      imm,
9910						      (__mmask8) __U);
9911}
9912
9913extern __inline __m128i
9914__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9915_mm_ternarylogic_epi32 (__m128i __A, __m128i __B, __m128i __C,
9916			const int imm)
9917{
9918  return (__m128i) __builtin_ia32_pternlogd128_mask ((__v4si) __A,
9919						     (__v4si) __B,
9920						     (__v4si) __C, imm,
9921						     (__mmask8) -1);
9922}
9923
9924extern __inline __m128i
9925__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9926_mm_mask_ternarylogic_epi32 (__m128i __A, __mmask8 __U,
9927			     __m128i __B, __m128i __C, const int imm)
9928{
9929  return (__m128i) __builtin_ia32_pternlogd128_mask ((__v4si) __A,
9930						     (__v4si) __B,
9931						     (__v4si) __C, imm,
9932						     (__mmask8) __U);
9933}
9934
9935extern __inline __m128i
9936__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9937_mm_maskz_ternarylogic_epi32 (__mmask8 __U, __m128i __A,
9938			      __m128i __B, __m128i __C, const int imm)
9939{
9940  return (__m128i) __builtin_ia32_pternlogd128_maskz ((__v4si) __A,
9941						      (__v4si) __B,
9942						      (__v4si) __C,
9943						      imm,
9944						      (__mmask8) __U);
9945}
9946
9947extern __inline __m256
9948__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9949_mm256_roundscale_ps (__m256 __A, const int __imm)
9950{
9951  return (__m256) __builtin_ia32_rndscaleps_256_mask ((__v8sf) __A,
9952						      __imm,
9953						      (__v8sf)
9954						      _mm256_setzero_ps (),
9955						      (__mmask8) -1);
9956}
9957
9958extern __inline __m256
9959__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9960_mm256_mask_roundscale_ps (__m256 __W, __mmask8 __U, __m256 __A,
9961			   const int __imm)
9962{
9963  return (__m256) __builtin_ia32_rndscaleps_256_mask ((__v8sf) __A,
9964						      __imm,
9965						      (__v8sf) __W,
9966						      (__mmask8) __U);
9967}
9968
9969extern __inline __m256
9970__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9971_mm256_maskz_roundscale_ps (__mmask8 __U, __m256 __A, const int __imm)
9972{
9973  return (__m256) __builtin_ia32_rndscaleps_256_mask ((__v8sf) __A,
9974						      __imm,
9975						      (__v8sf)
9976						      _mm256_setzero_ps (),
9977						      (__mmask8) __U);
9978}
9979
9980extern __inline __m256d
9981__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9982_mm256_roundscale_pd (__m256d __A, const int __imm)
9983{
9984  return (__m256d) __builtin_ia32_rndscalepd_256_mask ((__v4df) __A,
9985						       __imm,
9986						       (__v4df)
9987						       _mm256_setzero_pd (),
9988						       (__mmask8) -1);
9989}
9990
9991extern __inline __m256d
9992__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9993_mm256_mask_roundscale_pd (__m256d __W, __mmask8 __U, __m256d __A,
9994			   const int __imm)
9995{
9996  return (__m256d) __builtin_ia32_rndscalepd_256_mask ((__v4df) __A,
9997						       __imm,
9998						       (__v4df) __W,
9999						       (__mmask8) __U);
10000}
10001
10002extern __inline __m256d
10003__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10004_mm256_maskz_roundscale_pd (__mmask8 __U, __m256d __A, const int __imm)
10005{
10006  return (__m256d) __builtin_ia32_rndscalepd_256_mask ((__v4df) __A,
10007						       __imm,
10008						       (__v4df)
10009						       _mm256_setzero_pd (),
10010						       (__mmask8) __U);
10011}
10012
10013extern __inline __m128
10014__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10015_mm_roundscale_ps (__m128 __A, const int __imm)
10016{
10017  return (__m128) __builtin_ia32_rndscaleps_128_mask ((__v4sf) __A,
10018						      __imm,
10019						      (__v4sf)
10020						      _mm_setzero_ps (),
10021						      (__mmask8) -1);
10022}
10023
10024extern __inline __m128
10025__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10026_mm_mask_roundscale_ps (__m128 __W, __mmask8 __U, __m128 __A,
10027			const int __imm)
10028{
10029  return (__m128) __builtin_ia32_rndscaleps_128_mask ((__v4sf) __A,
10030						      __imm,
10031						      (__v4sf) __W,
10032						      (__mmask8) __U);
10033}
10034
10035extern __inline __m128
10036__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10037_mm_maskz_roundscale_ps (__mmask8 __U, __m128 __A, const int __imm)
10038{
10039  return (__m128) __builtin_ia32_rndscaleps_128_mask ((__v4sf) __A,
10040						      __imm,
10041						      (__v4sf)
10042						      _mm_setzero_ps (),
10043						      (__mmask8) __U);
10044}
10045
10046extern __inline __m128d
10047__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10048_mm_roundscale_pd (__m128d __A, const int __imm)
10049{
10050  return (__m128d) __builtin_ia32_rndscalepd_128_mask ((__v2df) __A,
10051						       __imm,
10052						       (__v2df)
10053						       _mm_setzero_pd (),
10054						       (__mmask8) -1);
10055}
10056
10057extern __inline __m128d
10058__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10059_mm_mask_roundscale_pd (__m128d __W, __mmask8 __U, __m128d __A,
10060			const int __imm)
10061{
10062  return (__m128d) __builtin_ia32_rndscalepd_128_mask ((__v2df) __A,
10063						       __imm,
10064						       (__v2df) __W,
10065						       (__mmask8) __U);
10066}
10067
10068extern __inline __m128d
10069__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10070_mm_maskz_roundscale_pd (__mmask8 __U, __m128d __A, const int __imm)
10071{
10072  return (__m128d) __builtin_ia32_rndscalepd_128_mask ((__v2df) __A,
10073						       __imm,
10074						       (__v2df)
10075						       _mm_setzero_pd (),
10076						       (__mmask8) __U);
10077}
10078
10079extern __inline __m256
10080__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10081_mm256_getmant_ps (__m256 __A, _MM_MANTISSA_NORM_ENUM __B,
10082		   _MM_MANTISSA_SIGN_ENUM __C)
10083{
10084  return (__m256) __builtin_ia32_getmantps256_mask ((__v8sf) __A,
10085						    (__C << 2) | __B,
10086						    (__v8sf)
10087						    _mm256_setzero_ps (),
10088						    (__mmask8) -1);
10089}
10090
10091extern __inline __m256
10092__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10093_mm256_mask_getmant_ps (__m256 __W, __mmask8 __U, __m256 __A,
10094			_MM_MANTISSA_NORM_ENUM __B,
10095			_MM_MANTISSA_SIGN_ENUM __C)
10096{
10097  return (__m256) __builtin_ia32_getmantps256_mask ((__v8sf) __A,
10098						    (__C << 2) | __B,
10099						    (__v8sf) __W,
10100						    (__mmask8) __U);
10101}
10102
10103extern __inline __m256
10104__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10105_mm256_maskz_getmant_ps (__mmask8 __U, __m256 __A,
10106			 _MM_MANTISSA_NORM_ENUM __B,
10107			 _MM_MANTISSA_SIGN_ENUM __C)
10108{
10109  return (__m256) __builtin_ia32_getmantps256_mask ((__v8sf) __A,
10110						    (__C << 2) | __B,
10111						    (__v8sf)
10112						    _mm256_setzero_ps (),
10113						    (__mmask8) __U);
10114}
10115
10116extern __inline __m128
10117__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10118_mm_getmant_ps (__m128 __A, _MM_MANTISSA_NORM_ENUM __B,
10119		_MM_MANTISSA_SIGN_ENUM __C)
10120{
10121  return (__m128) __builtin_ia32_getmantps128_mask ((__v4sf) __A,
10122						    (__C << 2) | __B,
10123						    (__v4sf)
10124						    _mm_setzero_ps (),
10125						    (__mmask8) -1);
10126}
10127
10128extern __inline __m128
10129__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10130_mm_mask_getmant_ps (__m128 __W, __mmask8 __U, __m128 __A,
10131		     _MM_MANTISSA_NORM_ENUM __B,
10132		     _MM_MANTISSA_SIGN_ENUM __C)
10133{
10134  return (__m128) __builtin_ia32_getmantps128_mask ((__v4sf) __A,
10135						    (__C << 2) | __B,
10136						    (__v4sf) __W,
10137						    (__mmask8) __U);
10138}
10139
10140extern __inline __m128
10141__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10142_mm_maskz_getmant_ps (__mmask8 __U, __m128 __A,
10143		      _MM_MANTISSA_NORM_ENUM __B,
10144		      _MM_MANTISSA_SIGN_ENUM __C)
10145{
10146  return (__m128) __builtin_ia32_getmantps128_mask ((__v4sf) __A,
10147						    (__C << 2) | __B,
10148						    (__v4sf)
10149						    _mm_setzero_ps (),
10150						    (__mmask8) __U);
10151}
10152
10153extern __inline __m256d
10154__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10155_mm256_getmant_pd (__m256d __A, _MM_MANTISSA_NORM_ENUM __B,
10156		   _MM_MANTISSA_SIGN_ENUM __C)
10157{
10158  return (__m256d) __builtin_ia32_getmantpd256_mask ((__v4df) __A,
10159						     (__C << 2) | __B,
10160						     (__v4df)
10161						     _mm256_setzero_pd (),
10162						     (__mmask8) -1);
10163}
10164
10165extern __inline __m256d
10166__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10167_mm256_mask_getmant_pd (__m256d __W, __mmask8 __U, __m256d __A,
10168			_MM_MANTISSA_NORM_ENUM __B,
10169			_MM_MANTISSA_SIGN_ENUM __C)
10170{
10171  return (__m256d) __builtin_ia32_getmantpd256_mask ((__v4df) __A,
10172						     (__C << 2) | __B,
10173						     (__v4df) __W,
10174						     (__mmask8) __U);
10175}
10176
10177extern __inline __m256d
10178__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10179_mm256_maskz_getmant_pd (__mmask8 __U, __m256d __A,
10180			 _MM_MANTISSA_NORM_ENUM __B,
10181			 _MM_MANTISSA_SIGN_ENUM __C)
10182{
10183  return (__m256d) __builtin_ia32_getmantpd256_mask ((__v4df) __A,
10184						     (__C << 2) | __B,
10185						     (__v4df)
10186						     _mm256_setzero_pd (),
10187						     (__mmask8) __U);
10188}
10189
10190extern __inline __m128d
10191__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10192_mm_getmant_pd (__m128d __A, _MM_MANTISSA_NORM_ENUM __B,
10193		_MM_MANTISSA_SIGN_ENUM __C)
10194{
10195  return (__m128d) __builtin_ia32_getmantpd128_mask ((__v2df) __A,
10196						     (__C << 2) | __B,
10197						     (__v2df)
10198						     _mm_setzero_pd (),
10199						     (__mmask8) -1);
10200}
10201
10202extern __inline __m128d
10203__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10204_mm_mask_getmant_pd (__m128d __W, __mmask8 __U, __m128d __A,
10205		     _MM_MANTISSA_NORM_ENUM __B,
10206		     _MM_MANTISSA_SIGN_ENUM __C)
10207{
10208  return (__m128d) __builtin_ia32_getmantpd128_mask ((__v2df) __A,
10209						     (__C << 2) | __B,
10210						     (__v2df) __W,
10211						     (__mmask8) __U);
10212}
10213
10214extern __inline __m128d
10215__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10216_mm_maskz_getmant_pd (__mmask8 __U, __m128d __A,
10217		      _MM_MANTISSA_NORM_ENUM __B,
10218		      _MM_MANTISSA_SIGN_ENUM __C)
10219{
10220  return (__m128d) __builtin_ia32_getmantpd128_mask ((__v2df) __A,
10221						     (__C << 2) | __B,
10222						     (__v2df)
10223						     _mm_setzero_pd (),
10224						     (__mmask8) __U);
10225}
10226
10227extern __inline __m256
10228__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10229_mm256_mmask_i32gather_ps (__m256 __v1_old, __mmask8 __mask,
10230			   __m256i __index, float const *__addr,
10231			   int __scale)
10232{
10233  return (__m256) __builtin_ia32_gather3siv8sf ((__v8sf) __v1_old,
10234						__addr,
10235						(__v8si) __index,
10236						__mask, __scale);
10237}
10238
10239extern __inline __m128
10240__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10241_mm_mmask_i32gather_ps (__m128 __v1_old, __mmask8 __mask,
10242			__m128i __index, float const *__addr,
10243			int __scale)
10244{
10245  return (__m128) __builtin_ia32_gather3siv4sf ((__v4sf) __v1_old,
10246						__addr,
10247						(__v4si) __index,
10248						__mask, __scale);
10249}
10250
10251extern __inline __m256d
10252__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10253_mm256_mmask_i32gather_pd (__m256d __v1_old, __mmask8 __mask,
10254			   __m128i __index, double const *__addr,
10255			   int __scale)
10256{
10257  return (__m256d) __builtin_ia32_gather3siv4df ((__v4df) __v1_old,
10258						 __addr,
10259						 (__v4si) __index,
10260						 __mask, __scale);
10261}
10262
10263extern __inline __m128d
10264__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10265_mm_mmask_i32gather_pd (__m128d __v1_old, __mmask8 __mask,
10266			__m128i __index, double const *__addr,
10267			int __scale)
10268{
10269  return (__m128d) __builtin_ia32_gather3siv2df ((__v2df) __v1_old,
10270						 __addr,
10271						 (__v4si) __index,
10272						 __mask, __scale);
10273}
10274
10275extern __inline __m128
10276__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10277_mm256_mmask_i64gather_ps (__m128 __v1_old, __mmask8 __mask,
10278			   __m256i __index, float const *__addr,
10279			   int __scale)
10280{
10281  return (__m128) __builtin_ia32_gather3div8sf ((__v4sf) __v1_old,
10282						__addr,
10283						(__v4di) __index,
10284						__mask, __scale);
10285}
10286
10287extern __inline __m128
10288__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10289_mm_mmask_i64gather_ps (__m128 __v1_old, __mmask8 __mask,
10290			__m128i __index, float const *__addr,
10291			int __scale)
10292{
10293  return (__m128) __builtin_ia32_gather3div4sf ((__v4sf) __v1_old,
10294						__addr,
10295						(__v2di) __index,
10296						__mask, __scale);
10297}
10298
10299extern __inline __m256d
10300__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10301_mm256_mmask_i64gather_pd (__m256d __v1_old, __mmask8 __mask,
10302			   __m256i __index, double const *__addr,
10303			   int __scale)
10304{
10305  return (__m256d) __builtin_ia32_gather3div4df ((__v4df) __v1_old,
10306						 __addr,
10307						 (__v4di) __index,
10308						 __mask, __scale);
10309}
10310
10311extern __inline __m128d
10312__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10313_mm_mmask_i64gather_pd (__m128d __v1_old, __mmask8 __mask,
10314			__m128i __index, double const *__addr,
10315			int __scale)
10316{
10317  return (__m128d) __builtin_ia32_gather3div2df ((__v2df) __v1_old,
10318						 __addr,
10319						 (__v2di) __index,
10320						 __mask, __scale);
10321}
10322
10323extern __inline __m256i
10324__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10325_mm256_mmask_i32gather_epi32 (__m256i __v1_old, __mmask8 __mask,
10326			      __m256i __index, int const *__addr,
10327			      int __scale)
10328{
10329  return (__m256i) __builtin_ia32_gather3siv8si ((__v8si) __v1_old,
10330						 __addr,
10331						 (__v8si) __index,
10332						 __mask, __scale);
10333}
10334
10335extern __inline __m128i
10336__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10337_mm_mmask_i32gather_epi32 (__m128i __v1_old, __mmask8 __mask,
10338			   __m128i __index, int const *__addr,
10339			   int __scale)
10340{
10341  return (__m128i) __builtin_ia32_gather3siv4si ((__v4si) __v1_old,
10342						 __addr,
10343						 (__v4si) __index,
10344						 __mask, __scale);
10345}
10346
10347extern __inline __m256i
10348__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10349_mm256_mmask_i32gather_epi64 (__m256i __v1_old, __mmask8 __mask,
10350			      __m128i __index, long long const *__addr,
10351			      int __scale)
10352{
10353  return (__m256i) __builtin_ia32_gather3siv4di ((__v4di) __v1_old,
10354						 __addr,
10355						 (__v4si) __index,
10356						 __mask, __scale);
10357}
10358
10359extern __inline __m128i
10360__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10361_mm_mmask_i32gather_epi64 (__m128i __v1_old, __mmask8 __mask,
10362			   __m128i __index, long long const *__addr,
10363			   int __scale)
10364{
10365  return (__m128i) __builtin_ia32_gather3siv2di ((__v2di) __v1_old,
10366						 __addr,
10367						 (__v4si) __index,
10368						 __mask, __scale);
10369}
10370
10371extern __inline __m128i
10372__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10373_mm256_mmask_i64gather_epi32 (__m128i __v1_old, __mmask8 __mask,
10374			      __m256i __index, int const *__addr,
10375			      int __scale)
10376{
10377  return (__m128i) __builtin_ia32_gather3div8si ((__v4si) __v1_old,
10378						 __addr,
10379						 (__v4di) __index,
10380						 __mask, __scale);
10381}
10382
10383extern __inline __m128i
10384__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10385_mm_mmask_i64gather_epi32 (__m128i __v1_old, __mmask8 __mask,
10386			   __m128i __index, int const *__addr,
10387			   int __scale)
10388{
10389  return (__m128i) __builtin_ia32_gather3div4si ((__v4si) __v1_old,
10390						 __addr,
10391						 (__v2di) __index,
10392						 __mask, __scale);
10393}
10394
10395extern __inline __m256i
10396__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10397_mm256_mmask_i64gather_epi64 (__m256i __v1_old, __mmask8 __mask,
10398			      __m256i __index, long long const *__addr,
10399			      int __scale)
10400{
10401  return (__m256i) __builtin_ia32_gather3div4di ((__v4di) __v1_old,
10402						 __addr,
10403						 (__v4di) __index,
10404						 __mask, __scale);
10405}
10406
10407extern __inline __m128i
10408__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10409_mm_mmask_i64gather_epi64 (__m128i __v1_old, __mmask8 __mask,
10410			   __m128i __index, long long const *__addr,
10411			   int __scale)
10412{
10413  return (__m128i) __builtin_ia32_gather3div2di ((__v2di) __v1_old,
10414						 __addr,
10415						 (__v2di) __index,
10416						 __mask, __scale);
10417}
10418
10419extern __inline void
10420__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10421_mm256_i32scatter_ps (float *__addr, __m256i __index,
10422		      __m256 __v1, const int __scale)
10423{
10424  __builtin_ia32_scattersiv8sf (__addr, (__mmask8) 0xFF,
10425				(__v8si) __index, (__v8sf) __v1,
10426				__scale);
10427}
10428
10429extern __inline void
10430__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10431_mm256_mask_i32scatter_ps (float *__addr, __mmask8 __mask,
10432			   __m256i __index, __m256 __v1,
10433			   const int __scale)
10434{
10435  __builtin_ia32_scattersiv8sf (__addr, __mask, (__v8si) __index,
10436				(__v8sf) __v1, __scale);
10437}
10438
10439extern __inline void
10440__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10441_mm_i32scatter_ps (float *__addr, __m128i __index, __m128 __v1,
10442		   const int __scale)
10443{
10444  __builtin_ia32_scattersiv4sf (__addr, (__mmask8) 0xFF,
10445				(__v4si) __index, (__v4sf) __v1,
10446				__scale);
10447}
10448
10449extern __inline void
10450__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10451_mm_mask_i32scatter_ps (float *__addr, __mmask8 __mask,
10452			__m128i __index, __m128 __v1,
10453			const int __scale)
10454{
10455  __builtin_ia32_scattersiv4sf (__addr, __mask, (__v4si) __index,
10456				(__v4sf) __v1, __scale);
10457}
10458
10459extern __inline void
10460__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10461_mm256_i32scatter_pd (double *__addr, __m128i __index,
10462		      __m256d __v1, const int __scale)
10463{
10464  __builtin_ia32_scattersiv4df (__addr, (__mmask8) 0xFF,
10465				(__v4si) __index, (__v4df) __v1,
10466				__scale);
10467}
10468
10469extern __inline void
10470__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10471_mm256_mask_i32scatter_pd (double *__addr, __mmask8 __mask,
10472			   __m128i __index, __m256d __v1,
10473			   const int __scale)
10474{
10475  __builtin_ia32_scattersiv4df (__addr, __mask, (__v4si) __index,
10476				(__v4df) __v1, __scale);
10477}
10478
10479extern __inline void
10480__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10481_mm_i32scatter_pd (double *__addr, __m128i __index,
10482		   __m128d __v1, const int __scale)
10483{
10484  __builtin_ia32_scattersiv2df (__addr, (__mmask8) 0xFF,
10485				(__v4si) __index, (__v2df) __v1,
10486				__scale);
10487}
10488
10489extern __inline void
10490__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10491_mm_mask_i32scatter_pd (double *__addr, __mmask8 __mask,
10492			__m128i __index, __m128d __v1,
10493			const int __scale)
10494{
10495  __builtin_ia32_scattersiv2df (__addr, __mask, (__v4si) __index,
10496				(__v2df) __v1, __scale);
10497}
10498
10499extern __inline void
10500__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10501_mm256_i64scatter_ps (float *__addr, __m256i __index,
10502		      __m128 __v1, const int __scale)
10503{
10504  __builtin_ia32_scatterdiv8sf (__addr, (__mmask8) 0xFF,
10505				(__v4di) __index, (__v4sf) __v1,
10506				__scale);
10507}
10508
10509extern __inline void
10510__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10511_mm256_mask_i64scatter_ps (float *__addr, __mmask8 __mask,
10512			   __m256i __index, __m128 __v1,
10513			   const int __scale)
10514{
10515  __builtin_ia32_scatterdiv8sf (__addr, __mask, (__v4di) __index,
10516				(__v4sf) __v1, __scale);
10517}
10518
10519extern __inline void
10520__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10521_mm_i64scatter_ps (float *__addr, __m128i __index, __m128 __v1,
10522		   const int __scale)
10523{
10524  __builtin_ia32_scatterdiv4sf (__addr, (__mmask8) 0xFF,
10525				(__v2di) __index, (__v4sf) __v1,
10526				__scale);
10527}
10528
10529extern __inline void
10530__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10531_mm_mask_i64scatter_ps (float *__addr, __mmask8 __mask,
10532			__m128i __index, __m128 __v1,
10533			const int __scale)
10534{
10535  __builtin_ia32_scatterdiv4sf (__addr, __mask, (__v2di) __index,
10536				(__v4sf) __v1, __scale);
10537}
10538
10539extern __inline void
10540__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10541_mm256_i64scatter_pd (double *__addr, __m256i __index,
10542		      __m256d __v1, const int __scale)
10543{
10544  __builtin_ia32_scatterdiv4df (__addr, (__mmask8) 0xFF,
10545				(__v4di) __index, (__v4df) __v1,
10546				__scale);
10547}
10548
10549extern __inline void
10550__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10551_mm256_mask_i64scatter_pd (double *__addr, __mmask8 __mask,
10552			   __m256i __index, __m256d __v1,
10553			   const int __scale)
10554{
10555  __builtin_ia32_scatterdiv4df (__addr, __mask, (__v4di) __index,
10556				(__v4df) __v1, __scale);
10557}
10558
10559extern __inline void
10560__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10561_mm_i64scatter_pd (double *__addr, __m128i __index,
10562		   __m128d __v1, const int __scale)
10563{
10564  __builtin_ia32_scatterdiv2df (__addr, (__mmask8) 0xFF,
10565				(__v2di) __index, (__v2df) __v1,
10566				__scale);
10567}
10568
10569extern __inline void
10570__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10571_mm_mask_i64scatter_pd (double *__addr, __mmask8 __mask,
10572			__m128i __index, __m128d __v1,
10573			const int __scale)
10574{
10575  __builtin_ia32_scatterdiv2df (__addr, __mask, (__v2di) __index,
10576				(__v2df) __v1, __scale);
10577}
10578
10579extern __inline void
10580__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10581_mm256_i32scatter_epi32 (int *__addr, __m256i __index,
10582			 __m256i __v1, const int __scale)
10583{
10584  __builtin_ia32_scattersiv8si (__addr, (__mmask8) 0xFF,
10585				(__v8si) __index, (__v8si) __v1,
10586				__scale);
10587}
10588
10589extern __inline void
10590__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10591_mm256_mask_i32scatter_epi32 (int *__addr, __mmask8 __mask,
10592			      __m256i __index, __m256i __v1,
10593			      const int __scale)
10594{
10595  __builtin_ia32_scattersiv8si (__addr, __mask, (__v8si) __index,
10596				(__v8si) __v1, __scale);
10597}
10598
10599extern __inline void
10600__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10601_mm_i32scatter_epi32 (int *__addr, __m128i __index,
10602		      __m128i __v1, const int __scale)
10603{
10604  __builtin_ia32_scattersiv4si (__addr, (__mmask8) 0xFF,
10605				(__v4si) __index, (__v4si) __v1,
10606				__scale);
10607}
10608
10609extern __inline void
10610__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10611_mm_mask_i32scatter_epi32 (int *__addr, __mmask8 __mask,
10612			   __m128i __index, __m128i __v1,
10613			   const int __scale)
10614{
10615  __builtin_ia32_scattersiv4si (__addr, __mask, (__v4si) __index,
10616				(__v4si) __v1, __scale);
10617}
10618
10619extern __inline void
10620__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10621_mm256_i32scatter_epi64 (long long *__addr, __m128i __index,
10622			 __m256i __v1, const int __scale)
10623{
10624  __builtin_ia32_scattersiv4di (__addr, (__mmask8) 0xFF,
10625				(__v4si) __index, (__v4di) __v1,
10626				__scale);
10627}
10628
10629extern __inline void
10630__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10631_mm256_mask_i32scatter_epi64 (long long *__addr, __mmask8 __mask,
10632			      __m128i __index, __m256i __v1,
10633			      const int __scale)
10634{
10635  __builtin_ia32_scattersiv4di (__addr, __mask, (__v4si) __index,
10636				(__v4di) __v1, __scale);
10637}
10638
10639extern __inline void
10640__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10641_mm_i32scatter_epi64 (long long *__addr, __m128i __index,
10642		      __m128i __v1, const int __scale)
10643{
10644  __builtin_ia32_scattersiv2di (__addr, (__mmask8) 0xFF,
10645				(__v4si) __index, (__v2di) __v1,
10646				__scale);
10647}
10648
10649extern __inline void
10650__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10651_mm_mask_i32scatter_epi64 (long long *__addr, __mmask8 __mask,
10652			   __m128i __index, __m128i __v1,
10653			   const int __scale)
10654{
10655  __builtin_ia32_scattersiv2di (__addr, __mask, (__v4si) __index,
10656				(__v2di) __v1, __scale);
10657}
10658
10659extern __inline void
10660__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10661_mm256_i64scatter_epi32 (int *__addr, __m256i __index,
10662			 __m128i __v1, const int __scale)
10663{
10664  __builtin_ia32_scatterdiv8si (__addr, (__mmask8) 0xFF,
10665				(__v4di) __index, (__v4si) __v1,
10666				__scale);
10667}
10668
10669extern __inline void
10670__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10671_mm256_mask_i64scatter_epi32 (int *__addr, __mmask8 __mask,
10672			      __m256i __index, __m128i __v1,
10673			      const int __scale)
10674{
10675  __builtin_ia32_scatterdiv8si (__addr, __mask, (__v4di) __index,
10676				(__v4si) __v1, __scale);
10677}
10678
10679extern __inline void
10680__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10681_mm_i64scatter_epi32 (int *__addr, __m128i __index,
10682		      __m128i __v1, const int __scale)
10683{
10684  __builtin_ia32_scatterdiv4si (__addr, (__mmask8) 0xFF,
10685				(__v2di) __index, (__v4si) __v1,
10686				__scale);
10687}
10688
10689extern __inline void
10690__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10691_mm_mask_i64scatter_epi32 (int *__addr, __mmask8 __mask,
10692			   __m128i __index, __m128i __v1,
10693			   const int __scale)
10694{
10695  __builtin_ia32_scatterdiv4si (__addr, __mask, (__v2di) __index,
10696				(__v4si) __v1, __scale);
10697}
10698
10699extern __inline void
10700__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10701_mm256_i64scatter_epi64 (long long *__addr, __m256i __index,
10702			 __m256i __v1, const int __scale)
10703{
10704  __builtin_ia32_scatterdiv4di (__addr, (__mmask8) 0xFF,
10705				(__v4di) __index, (__v4di) __v1,
10706				__scale);
10707}
10708
10709extern __inline void
10710__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10711_mm256_mask_i64scatter_epi64 (long long *__addr, __mmask8 __mask,
10712			      __m256i __index, __m256i __v1,
10713			      const int __scale)
10714{
10715  __builtin_ia32_scatterdiv4di (__addr, __mask, (__v4di) __index,
10716				(__v4di) __v1, __scale);
10717}
10718
10719extern __inline void
10720__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10721_mm_i64scatter_epi64 (long long *__addr, __m128i __index,
10722		      __m128i __v1, const int __scale)
10723{
10724  __builtin_ia32_scatterdiv2di (__addr, (__mmask8) 0xFF,
10725				(__v2di) __index, (__v2di) __v1,
10726				__scale);
10727}
10728
10729extern __inline void
10730__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10731_mm_mask_i64scatter_epi64 (long long *__addr, __mmask8 __mask,
10732			   __m128i __index, __m128i __v1,
10733			   const int __scale)
10734{
10735  __builtin_ia32_scatterdiv2di (__addr, __mask, (__v2di) __index,
10736				(__v2di) __v1, __scale);
10737}
10738
10739extern __inline __m256i
10740__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10741_mm256_mask_shuffle_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
10742			   _MM_PERM_ENUM __mask)
10743{
10744  return (__m256i) __builtin_ia32_pshufd256_mask ((__v8si) __A, __mask,
10745						  (__v8si) __W,
10746						  (__mmask8) __U);
10747}
10748
10749extern __inline __m256i
10750__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10751_mm256_maskz_shuffle_epi32 (__mmask8 __U, __m256i __A,
10752			    _MM_PERM_ENUM __mask)
10753{
10754  return (__m256i) __builtin_ia32_pshufd256_mask ((__v8si) __A, __mask,
10755						  (__v8si)
10756						  _mm256_setzero_si256 (),
10757						  (__mmask8) __U);
10758}
10759
10760extern __inline __m128i
10761__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10762_mm_mask_shuffle_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
10763			_MM_PERM_ENUM __mask)
10764{
10765  return (__m128i) __builtin_ia32_pshufd128_mask ((__v4si) __A, __mask,
10766						  (__v4si) __W,
10767						  (__mmask8) __U);
10768}
10769
10770extern __inline __m128i
10771__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10772_mm_maskz_shuffle_epi32 (__mmask8 __U, __m128i __A,
10773			 _MM_PERM_ENUM __mask)
10774{
10775  return (__m128i) __builtin_ia32_pshufd128_mask ((__v4si) __A, __mask,
10776						  (__v4si)
10777						  _mm_setzero_si128 (),
10778						  (__mmask8) __U);
10779}
10780
10781extern __inline __m256i
10782__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10783_mm256_rol_epi32 (__m256i __A, const int __B)
10784{
10785  return (__m256i) __builtin_ia32_prold256_mask ((__v8si) __A, __B,
10786						 (__v8si)
10787						 _mm256_setzero_si256 (),
10788						 (__mmask8) -1);
10789}
10790
10791extern __inline __m256i
10792__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10793_mm256_mask_rol_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
10794		       const int __B)
10795{
10796  return (__m256i) __builtin_ia32_prold256_mask ((__v8si) __A, __B,
10797						 (__v8si) __W,
10798						 (__mmask8) __U);
10799}
10800
10801extern __inline __m256i
10802__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10803_mm256_maskz_rol_epi32 (__mmask8 __U, __m256i __A, const int __B)
10804{
10805  return (__m256i) __builtin_ia32_prold256_mask ((__v8si) __A, __B,
10806						 (__v8si)
10807						 _mm256_setzero_si256 (),
10808						 (__mmask8) __U);
10809}
10810
10811extern __inline __m128i
10812__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10813_mm_rol_epi32 (__m128i __A, const int __B)
10814{
10815  return (__m128i) __builtin_ia32_prold128_mask ((__v4si) __A, __B,
10816						 (__v4si)
10817						 _mm_setzero_si128 (),
10818						 (__mmask8) -1);
10819}
10820
10821extern __inline __m128i
10822__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10823_mm_mask_rol_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
10824		    const int __B)
10825{
10826  return (__m128i) __builtin_ia32_prold128_mask ((__v4si) __A, __B,
10827						 (__v4si) __W,
10828						 (__mmask8) __U);
10829}
10830
10831extern __inline __m128i
10832__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10833_mm_maskz_rol_epi32 (__mmask8 __U, __m128i __A, const int __B)
10834{
10835  return (__m128i) __builtin_ia32_prold128_mask ((__v4si) __A, __B,
10836						 (__v4si)
10837						 _mm_setzero_si128 (),
10838						 (__mmask8) __U);
10839}
10840
10841extern __inline __m256i
10842__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10843_mm256_ror_epi32 (__m256i __A, const int __B)
10844{
10845  return (__m256i) __builtin_ia32_prord256_mask ((__v8si) __A, __B,
10846						 (__v8si)
10847						 _mm256_setzero_si256 (),
10848						 (__mmask8) -1);
10849}
10850
10851extern __inline __m256i
10852__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10853_mm256_mask_ror_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
10854		       const int __B)
10855{
10856  return (__m256i) __builtin_ia32_prord256_mask ((__v8si) __A, __B,
10857						 (__v8si) __W,
10858						 (__mmask8) __U);
10859}
10860
10861extern __inline __m256i
10862__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10863_mm256_maskz_ror_epi32 (__mmask8 __U, __m256i __A, const int __B)
10864{
10865  return (__m256i) __builtin_ia32_prord256_mask ((__v8si) __A, __B,
10866						 (__v8si)
10867						 _mm256_setzero_si256 (),
10868						 (__mmask8) __U);
10869}
10870
10871extern __inline __m128i
10872__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10873_mm_ror_epi32 (__m128i __A, const int __B)
10874{
10875  return (__m128i) __builtin_ia32_prord128_mask ((__v4si) __A, __B,
10876						 (__v4si)
10877						 _mm_setzero_si128 (),
10878						 (__mmask8) -1);
10879}
10880
10881extern __inline __m128i
10882__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10883_mm_mask_ror_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
10884		    const int __B)
10885{
10886  return (__m128i) __builtin_ia32_prord128_mask ((__v4si) __A, __B,
10887						 (__v4si) __W,
10888						 (__mmask8) __U);
10889}
10890
10891extern __inline __m128i
10892__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10893_mm_maskz_ror_epi32 (__mmask8 __U, __m128i __A, const int __B)
10894{
10895  return (__m128i) __builtin_ia32_prord128_mask ((__v4si) __A, __B,
10896						 (__v4si)
10897						 _mm_setzero_si128 (),
10898						 (__mmask8) __U);
10899}
10900
10901extern __inline __m256i
10902__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10903_mm256_rol_epi64 (__m256i __A, const int __B)
10904{
10905  return (__m256i) __builtin_ia32_prolq256_mask ((__v4di) __A, __B,
10906						 (__v4di)
10907						 _mm256_setzero_si256 (),
10908						 (__mmask8) -1);
10909}
10910
10911extern __inline __m256i
10912__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10913_mm256_mask_rol_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
10914		       const int __B)
10915{
10916  return (__m256i) __builtin_ia32_prolq256_mask ((__v4di) __A, __B,
10917						 (__v4di) __W,
10918						 (__mmask8) __U);
10919}
10920
10921extern __inline __m256i
10922__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10923_mm256_maskz_rol_epi64 (__mmask8 __U, __m256i __A, const int __B)
10924{
10925  return (__m256i) __builtin_ia32_prolq256_mask ((__v4di) __A, __B,
10926						 (__v4di)
10927						 _mm256_setzero_si256 (),
10928						 (__mmask8) __U);
10929}
10930
10931extern __inline __m128i
10932__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10933_mm_rol_epi64 (__m128i __A, const int __B)
10934{
10935  return (__m128i) __builtin_ia32_prolq128_mask ((__v2di) __A, __B,
10936						 (__v2di)
10937						 _mm_setzero_di (),
10938						 (__mmask8) -1);
10939}
10940
10941extern __inline __m128i
10942__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10943_mm_mask_rol_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
10944		    const int __B)
10945{
10946  return (__m128i) __builtin_ia32_prolq128_mask ((__v2di) __A, __B,
10947						 (__v2di) __W,
10948						 (__mmask8) __U);
10949}
10950
10951extern __inline __m128i
10952__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10953_mm_maskz_rol_epi64 (__mmask8 __U, __m128i __A, const int __B)
10954{
10955  return (__m128i) __builtin_ia32_prolq128_mask ((__v2di) __A, __B,
10956						 (__v2di)
10957						 _mm_setzero_di (),
10958						 (__mmask8) __U);
10959}
10960
10961extern __inline __m256i
10962__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10963_mm256_ror_epi64 (__m256i __A, const int __B)
10964{
10965  return (__m256i) __builtin_ia32_prorq256_mask ((__v4di) __A, __B,
10966						 (__v4di)
10967						 _mm256_setzero_si256 (),
10968						 (__mmask8) -1);
10969}
10970
10971extern __inline __m256i
10972__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10973_mm256_mask_ror_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
10974		       const int __B)
10975{
10976  return (__m256i) __builtin_ia32_prorq256_mask ((__v4di) __A, __B,
10977						 (__v4di) __W,
10978						 (__mmask8) __U);
10979}
10980
10981extern __inline __m256i
10982__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10983_mm256_maskz_ror_epi64 (__mmask8 __U, __m256i __A, const int __B)
10984{
10985  return (__m256i) __builtin_ia32_prorq256_mask ((__v4di) __A, __B,
10986						 (__v4di)
10987						 _mm256_setzero_si256 (),
10988						 (__mmask8) __U);
10989}
10990
10991extern __inline __m128i
10992__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10993_mm_ror_epi64 (__m128i __A, const int __B)
10994{
10995  return (__m128i) __builtin_ia32_prorq128_mask ((__v2di) __A, __B,
10996						 (__v2di)
10997						 _mm_setzero_di (),
10998						 (__mmask8) -1);
10999}
11000
11001extern __inline __m128i
11002__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11003_mm_mask_ror_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
11004		    const int __B)
11005{
11006  return (__m128i) __builtin_ia32_prorq128_mask ((__v2di) __A, __B,
11007						 (__v2di) __W,
11008						 (__mmask8) __U);
11009}
11010
11011extern __inline __m128i
11012__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11013_mm_maskz_ror_epi64 (__mmask8 __U, __m128i __A, const int __B)
11014{
11015  return (__m128i) __builtin_ia32_prorq128_mask ((__v2di) __A, __B,
11016						 (__v2di)
11017						 _mm_setzero_di (),
11018						 (__mmask8) __U);
11019}
11020
11021extern __inline __m128i
11022__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11023_mm_alignr_epi32 (__m128i __A, __m128i __B, const int __imm)
11024{
11025  return (__m128i) __builtin_ia32_alignd128_mask ((__v4si) __A,
11026						  (__v4si) __B, __imm,
11027						  (__v4si)
11028						  _mm_setzero_si128 (),
11029						  (__mmask8) -1);
11030}
11031
11032extern __inline __m128i
11033__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11034_mm_mask_alignr_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
11035		       __m128i __B, const int __imm)
11036{
11037  return (__m128i) __builtin_ia32_alignd128_mask ((__v4si) __A,
11038						  (__v4si) __B, __imm,
11039						  (__v4si) __W,
11040						  (__mmask8) __U);
11041}
11042
11043extern __inline __m128i
11044__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11045_mm_maskz_alignr_epi32 (__mmask8 __U, __m128i __A, __m128i __B,
11046			const int __imm)
11047{
11048  return (__m128i) __builtin_ia32_alignd128_mask ((__v4si) __A,
11049						  (__v4si) __B, __imm,
11050						  (__v4si)
11051						  _mm_setzero_si128 (),
11052						  (__mmask8) __U);
11053}
11054
11055extern __inline __m128i
11056__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11057_mm_alignr_epi64 (__m128i __A, __m128i __B, const int __imm)
11058{
11059  return (__m128i) __builtin_ia32_alignq128_mask ((__v2di) __A,
11060						  (__v2di) __B, __imm,
11061						  (__v2di)
11062						  _mm_setzero_di (),
11063						  (__mmask8) -1);
11064}
11065
11066extern __inline __m128i
11067__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11068_mm_mask_alignr_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
11069		       __m128i __B, const int __imm)
11070{
11071  return (__m128i) __builtin_ia32_alignq128_mask ((__v2di) __A,
11072						  (__v2di) __B, __imm,
11073						  (__v2di) __W,
11074						  (__mmask8) __U);
11075}
11076
11077extern __inline __m128i
11078__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11079_mm_maskz_alignr_epi64 (__mmask8 __U, __m128i __A, __m128i __B,
11080			const int __imm)
11081{
11082  return (__m128i) __builtin_ia32_alignq128_mask ((__v2di) __A,
11083						  (__v2di) __B, __imm,
11084						  (__v2di)
11085						  _mm_setzero_di (),
11086						  (__mmask8) __U);
11087}
11088
11089extern __inline __m256i
11090__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11091_mm256_alignr_epi32 (__m256i __A, __m256i __B, const int __imm)
11092{
11093  return (__m256i) __builtin_ia32_alignd256_mask ((__v8si) __A,
11094						  (__v8si) __B, __imm,
11095						  (__v8si)
11096						  _mm256_setzero_si256 (),
11097						  (__mmask8) -1);
11098}
11099
11100extern __inline __m256i
11101__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11102_mm256_mask_alignr_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
11103			  __m256i __B, const int __imm)
11104{
11105  return (__m256i) __builtin_ia32_alignd256_mask ((__v8si) __A,
11106						  (__v8si) __B, __imm,
11107						  (__v8si) __W,
11108						  (__mmask8) __U);
11109}
11110
11111extern __inline __m256i
11112__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11113_mm256_maskz_alignr_epi32 (__mmask8 __U, __m256i __A, __m256i __B,
11114			   const int __imm)
11115{
11116  return (__m256i) __builtin_ia32_alignd256_mask ((__v8si) __A,
11117						  (__v8si) __B, __imm,
11118						  (__v8si)
11119						  _mm256_setzero_si256 (),
11120						  (__mmask8) __U);
11121}
11122
11123extern __inline __m256i
11124__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11125_mm256_alignr_epi64 (__m256i __A, __m256i __B, const int __imm)
11126{
11127  return (__m256i) __builtin_ia32_alignq256_mask ((__v4di) __A,
11128						  (__v4di) __B, __imm,
11129						  (__v4di)
11130						  _mm256_setzero_si256 (),
11131						  (__mmask8) -1);
11132}
11133
11134extern __inline __m256i
11135__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11136_mm256_mask_alignr_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
11137			  __m256i __B, const int __imm)
11138{
11139  return (__m256i) __builtin_ia32_alignq256_mask ((__v4di) __A,
11140						  (__v4di) __B, __imm,
11141						  (__v4di) __W,
11142						  (__mmask8) __U);
11143}
11144
11145extern __inline __m256i
11146__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11147_mm256_maskz_alignr_epi64 (__mmask8 __U, __m256i __A, __m256i __B,
11148			   const int __imm)
11149{
11150  return (__m256i) __builtin_ia32_alignq256_mask ((__v4di) __A,
11151						  (__v4di) __B, __imm,
11152						  (__v4di)
11153						  _mm256_setzero_si256 (),
11154						  (__mmask8) __U);
11155}
11156
11157extern __inline __m128i
11158__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11159_mm_mask_cvtps_ph (__m128i __W, __mmask8 __U, __m128 __A,
11160		   const int __I)
11161{
11162  return (__m128i) __builtin_ia32_vcvtps2ph_mask ((__v4sf) __A, __I,
11163						  (__v8hi) __W,
11164						  (__mmask8) __U);
11165}
11166
11167extern __inline __m128i
11168__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11169_mm_maskz_cvtps_ph (__mmask8 __U, __m128 __A, const int __I)
11170{
11171  return (__m128i) __builtin_ia32_vcvtps2ph_mask ((__v4sf) __A, __I,
11172						  (__v8hi)
11173						  _mm_setzero_hi (),
11174						  (__mmask8) __U);
11175}
11176
11177extern __inline __m128i
11178__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11179_mm256_mask_cvtps_ph (__m128i __W, __mmask8 __U, __m256 __A,
11180		      const int __I)
11181{
11182  return (__m128i) __builtin_ia32_vcvtps2ph256_mask ((__v8sf) __A, __I,
11183						     (__v8hi) __W,
11184						     (__mmask8) __U);
11185}
11186
11187extern __inline __m128i
11188__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11189_mm256_maskz_cvtps_ph (__mmask8 __U, __m256 __A, const int __I)
11190{
11191  return (__m128i) __builtin_ia32_vcvtps2ph256_mask ((__v8sf) __A, __I,
11192						     (__v8hi)
11193						     _mm_setzero_hi (),
11194						     (__mmask8) __U);
11195}
11196
11197extern __inline __m256i
11198__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11199_mm256_mask_srai_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
11200			const int __imm)
11201{
11202  return (__m256i) __builtin_ia32_psradi256_mask ((__v8si) __A, __imm,
11203						  (__v8si) __W,
11204						  (__mmask8) __U);
11205}
11206
11207extern __inline __m256i
11208__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11209_mm256_maskz_srai_epi32 (__mmask8 __U, __m256i __A, const int __imm)
11210{
11211  return (__m256i) __builtin_ia32_psradi256_mask ((__v8si) __A, __imm,
11212						  (__v8si)
11213						  _mm256_setzero_si256 (),
11214						  (__mmask8) __U);
11215}
11216
11217extern __inline __m128i
11218__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11219_mm_mask_srai_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
11220		     const int __imm)
11221{
11222  return (__m128i) __builtin_ia32_psradi128_mask ((__v4si) __A, __imm,
11223						  (__v4si) __W,
11224						  (__mmask8) __U);
11225}
11226
11227extern __inline __m128i
11228__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11229_mm_maskz_srai_epi32 (__mmask8 __U, __m128i __A, const int __imm)
11230{
11231  return (__m128i) __builtin_ia32_psradi128_mask ((__v4si) __A, __imm,
11232						  (__v4si)
11233						  _mm_setzero_si128 (),
11234						  (__mmask8) __U);
11235}
11236
11237extern __inline __m256i
11238__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11239_mm256_srai_epi64 (__m256i __A, const int __imm)
11240{
11241  return (__m256i) __builtin_ia32_psraqi256_mask ((__v4di) __A, __imm,
11242						  (__v4di)
11243						  _mm256_setzero_si256 (),
11244						  (__mmask8) -1);
11245}
11246
11247extern __inline __m256i
11248__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11249_mm256_mask_srai_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
11250			const int __imm)
11251{
11252  return (__m256i) __builtin_ia32_psraqi256_mask ((__v4di) __A, __imm,
11253						  (__v4di) __W,
11254						  (__mmask8) __U);
11255}
11256
11257extern __inline __m256i
11258__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11259_mm256_maskz_srai_epi64 (__mmask8 __U, __m256i __A, const int __imm)
11260{
11261  return (__m256i) __builtin_ia32_psraqi256_mask ((__v4di) __A, __imm,
11262						  (__v4di)
11263						  _mm256_setzero_si256 (),
11264						  (__mmask8) __U);
11265}
11266
11267extern __inline __m128i
11268__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11269_mm_srai_epi64 (__m128i __A, const int __imm)
11270{
11271  return (__m128i) __builtin_ia32_psraqi128_mask ((__v2di) __A, __imm,
11272						  (__v2di)
11273						  _mm_setzero_di (),
11274						  (__mmask8) -1);
11275}
11276
11277extern __inline __m128i
11278__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11279_mm_mask_srai_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
11280		     const int __imm)
11281{
11282  return (__m128i) __builtin_ia32_psraqi128_mask ((__v2di) __A, __imm,
11283						  (__v2di) __W,
11284						  (__mmask8) __U);
11285}
11286
11287extern __inline __m128i
11288__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11289_mm_maskz_srai_epi64 (__mmask8 __U, __m128i __A, const int __imm)
11290{
11291  return (__m128i) __builtin_ia32_psraqi128_mask ((__v2di) __A, __imm,
11292						  (__v2di)
11293						  _mm_setzero_si128 (),
11294						  (__mmask8) __U);
11295}
11296
11297extern __inline __m128i
11298__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11299_mm_mask_slli_epi32 (__m128i __W, __mmask8 __U, __m128i __A, int __B)
11300{
11301  return (__m128i) __builtin_ia32_pslldi128_mask ((__v4si) __A, __B,
11302						  (__v4si) __W,
11303						  (__mmask8) __U);
11304}
11305
11306extern __inline __m128i
11307__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11308_mm_maskz_slli_epi32 (__mmask8 __U, __m128i __A, int __B)
11309{
11310  return (__m128i) __builtin_ia32_pslldi128_mask ((__v4si) __A, __B,
11311						  (__v4si)
11312						  _mm_setzero_si128 (),
11313						  (__mmask8) __U);
11314}
11315
11316extern __inline __m128i
11317__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11318_mm_mask_slli_epi64 (__m128i __W, __mmask8 __U, __m128i __A, int __B)
11319{
11320  return (__m128i) __builtin_ia32_psllqi128_mask ((__v2di) __A, __B,
11321						  (__v2di) __W,
11322						  (__mmask8) __U);
11323}
11324
11325extern __inline __m128i
11326__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11327_mm_maskz_slli_epi64 (__mmask8 __U, __m128i __A, int __B)
11328{
11329  return (__m128i) __builtin_ia32_psllqi128_mask ((__v2di) __A, __B,
11330						  (__v2di)
11331						  _mm_setzero_di (),
11332						  (__mmask8) __U);
11333}
11334
11335extern __inline __m256i
11336__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11337_mm256_mask_slli_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
11338			int __B)
11339{
11340  return (__m256i) __builtin_ia32_pslldi256_mask ((__v8si) __A, __B,
11341						  (__v8si) __W,
11342						  (__mmask8) __U);
11343}
11344
11345extern __inline __m256i
11346__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11347_mm256_maskz_slli_epi32 (__mmask8 __U, __m256i __A, int __B)
11348{
11349  return (__m256i) __builtin_ia32_pslldi256_mask ((__v8si) __A, __B,
11350						  (__v8si)
11351						  _mm256_setzero_si256 (),
11352						  (__mmask8) __U);
11353}
11354
11355extern __inline __m256i
11356__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11357_mm256_mask_slli_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
11358			int __B)
11359{
11360  return (__m256i) __builtin_ia32_psllqi256_mask ((__v4di) __A, __B,
11361						  (__v4di) __W,
11362						  (__mmask8) __U);
11363}
11364
11365extern __inline __m256i
11366__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11367_mm256_maskz_slli_epi64 (__mmask8 __U, __m256i __A, int __B)
11368{
11369  return (__m256i) __builtin_ia32_psllqi256_mask ((__v4di) __A, __B,
11370						  (__v4di)
11371						  _mm256_setzero_si256 (),
11372						  (__mmask8) __U);
11373}
11374
11375extern __inline __m256d
11376__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11377_mm256_mask_permutex_pd (__m256d __W, __mmask8 __U, __m256d __X,
11378			 const int __imm)
11379{
11380  return (__m256d) __builtin_ia32_permdf256_mask ((__v4df) __X, __imm,
11381						  (__v4df) __W,
11382						  (__mmask8) __U);
11383}
11384
11385extern __inline __m256d
11386__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11387_mm256_maskz_permutex_pd (__mmask8 __U, __m256d __X, const int __imm)
11388{
11389  return (__m256d) __builtin_ia32_permdf256_mask ((__v4df) __X, __imm,
11390						  (__v4df)
11391						  _mm256_setzero_pd (),
11392						  (__mmask8) __U);
11393}
11394
11395extern __inline __m256d
11396__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11397_mm256_mask_permute_pd (__m256d __W, __mmask8 __U, __m256d __X,
11398			const int __C)
11399{
11400  return (__m256d) __builtin_ia32_vpermilpd256_mask ((__v4df) __X, __C,
11401						     (__v4df) __W,
11402						     (__mmask8) __U);
11403}
11404
11405extern __inline __m256d
11406__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11407_mm256_maskz_permute_pd (__mmask8 __U, __m256d __X, const int __C)
11408{
11409  return (__m256d) __builtin_ia32_vpermilpd256_mask ((__v4df) __X, __C,
11410						     (__v4df)
11411						     _mm256_setzero_pd (),
11412						     (__mmask8) __U);
11413}
11414
11415extern __inline __m128d
11416__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11417_mm_mask_permute_pd (__m128d __W, __mmask8 __U, __m128d __X,
11418		     const int __C)
11419{
11420  return (__m128d) __builtin_ia32_vpermilpd_mask ((__v2df) __X, __C,
11421						  (__v2df) __W,
11422						  (__mmask8) __U);
11423}
11424
11425extern __inline __m128d
11426__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11427_mm_maskz_permute_pd (__mmask8 __U, __m128d __X, const int __C)
11428{
11429  return (__m128d) __builtin_ia32_vpermilpd_mask ((__v2df) __X, __C,
11430						  (__v2df)
11431						  _mm_setzero_pd (),
11432						  (__mmask8) __U);
11433}
11434
11435extern __inline __m256
11436__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11437_mm256_mask_permute_ps (__m256 __W, __mmask8 __U, __m256 __X,
11438			const int __C)
11439{
11440  return (__m256) __builtin_ia32_vpermilps256_mask ((__v8sf) __X, __C,
11441						    (__v8sf) __W,
11442						    (__mmask8) __U);
11443}
11444
11445extern __inline __m256
11446__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11447_mm256_maskz_permute_ps (__mmask8 __U, __m256 __X, const int __C)
11448{
11449  return (__m256) __builtin_ia32_vpermilps256_mask ((__v8sf) __X, __C,
11450						    (__v8sf)
11451						    _mm256_setzero_ps (),
11452						    (__mmask8) __U);
11453}
11454
11455extern __inline __m128
11456__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11457_mm_mask_permute_ps (__m128 __W, __mmask8 __U, __m128 __X,
11458		     const int __C)
11459{
11460  return (__m128) __builtin_ia32_vpermilps_mask ((__v4sf) __X, __C,
11461						 (__v4sf) __W,
11462						 (__mmask8) __U);
11463}
11464
11465extern __inline __m128
11466__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11467_mm_maskz_permute_ps (__mmask8 __U, __m128 __X, const int __C)
11468{
11469  return (__m128) __builtin_ia32_vpermilps_mask ((__v4sf) __X, __C,
11470						 (__v4sf)
11471						 _mm_setzero_ps (),
11472						 (__mmask8) __U);
11473}
11474
11475extern __inline __m256d
11476__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11477_mm256_mask_blend_pd (__mmask8 __U, __m256d __A, __m256d __W)
11478{
11479  return (__m256d) __builtin_ia32_blendmpd_256_mask ((__v4df) __A,
11480						     (__v4df) __W,
11481						     (__mmask8) __U);
11482}
11483
11484extern __inline __m256
11485__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11486_mm256_mask_blend_ps (__mmask8 __U, __m256 __A, __m256 __W)
11487{
11488  return (__m256) __builtin_ia32_blendmps_256_mask ((__v8sf) __A,
11489						    (__v8sf) __W,
11490						    (__mmask8) __U);
11491}
11492
11493extern __inline __m256i
11494__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11495_mm256_mask_blend_epi64 (__mmask8 __U, __m256i __A, __m256i __W)
11496{
11497  return (__m256i) __builtin_ia32_blendmq_256_mask ((__v4di) __A,
11498						    (__v4di) __W,
11499						    (__mmask8) __U);
11500}
11501
11502extern __inline __m256i
11503__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11504_mm256_mask_blend_epi32 (__mmask8 __U, __m256i __A, __m256i __W)
11505{
11506  return (__m256i) __builtin_ia32_blendmd_256_mask ((__v8si) __A,
11507						    (__v8si) __W,
11508						    (__mmask8) __U);
11509}
11510
11511extern __inline __m128d
11512__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11513_mm_mask_blend_pd (__mmask8 __U, __m128d __A, __m128d __W)
11514{
11515  return (__m128d) __builtin_ia32_blendmpd_128_mask ((__v2df) __A,
11516						     (__v2df) __W,
11517						     (__mmask8) __U);
11518}
11519
11520extern __inline __m128
11521__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11522_mm_mask_blend_ps (__mmask8 __U, __m128 __A, __m128 __W)
11523{
11524  return (__m128) __builtin_ia32_blendmps_128_mask ((__v4sf) __A,
11525						    (__v4sf) __W,
11526						    (__mmask8) __U);
11527}
11528
11529extern __inline __m128i
11530__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11531_mm_mask_blend_epi64 (__mmask8 __U, __m128i __A, __m128i __W)
11532{
11533  return (__m128i) __builtin_ia32_blendmq_128_mask ((__v2di) __A,
11534						    (__v2di) __W,
11535						    (__mmask8) __U);
11536}
11537
11538extern __inline __m128i
11539__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11540_mm_mask_blend_epi32 (__mmask8 __U, __m128i __A, __m128i __W)
11541{
11542  return (__m128i) __builtin_ia32_blendmd_128_mask ((__v4si) __A,
11543						    (__v4si) __W,
11544						    (__mmask8) __U);
11545}
11546
11547extern __inline __mmask8
11548__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11549_mm256_cmp_epi64_mask (__m256i __X, __m256i __Y, const int __P)
11550{
11551  return (__mmask8) __builtin_ia32_cmpq256_mask ((__v4di) __X,
11552						 (__v4di) __Y, __P,
11553						 (__mmask8) -1);
11554}
11555
11556extern __inline __mmask8
11557__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11558_mm256_cmp_epi32_mask (__m256i __X, __m256i __Y, const int __P)
11559{
11560  return (__mmask8) __builtin_ia32_cmpd256_mask ((__v8si) __X,
11561						 (__v8si) __Y, __P,
11562						 (__mmask8) -1);
11563}
11564
11565extern __inline __mmask8
11566__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11567_mm256_cmp_epu64_mask (__m256i __X, __m256i __Y, const int __P)
11568{
11569  return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __X,
11570						  (__v4di) __Y, __P,
11571						  (__mmask8) -1);
11572}
11573
11574extern __inline __mmask8
11575__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11576_mm256_cmp_epu32_mask (__m256i __X, __m256i __Y, const int __P)
11577{
11578  return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __X,
11579						  (__v8si) __Y, __P,
11580						  (__mmask8) -1);
11581}
11582
11583extern __inline __mmask8
11584__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11585_mm256_cmp_pd_mask (__m256d __X, __m256d __Y, const int __P)
11586{
11587  return (__mmask8) __builtin_ia32_cmppd256_mask ((__v4df) __X,
11588						  (__v4df) __Y, __P,
11589						  (__mmask8) -1);
11590}
11591
11592extern __inline __mmask8
11593__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11594_mm256_cmp_ps_mask (__m256 __X, __m256 __Y, const int __P)
11595{
11596  return (__mmask8) __builtin_ia32_cmpps256_mask ((__v8sf) __X,
11597						  (__v8sf) __Y, __P,
11598						  (__mmask8) -1);
11599}
11600
11601extern __inline __mmask8
11602__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11603_mm256_mask_cmp_epi64_mask (__mmask8 __U, __m256i __X, __m256i __Y,
11604			    const int __P)
11605{
11606  return (__mmask8) __builtin_ia32_cmpq256_mask ((__v4di) __X,
11607						 (__v4di) __Y, __P,
11608						 (__mmask8) __U);
11609}
11610
11611extern __inline __mmask8
11612__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11613_mm256_mask_cmp_epi32_mask (__mmask8 __U, __m256i __X, __m256i __Y,
11614			    const int __P)
11615{
11616  return (__mmask8) __builtin_ia32_cmpd256_mask ((__v8si) __X,
11617						 (__v8si) __Y, __P,
11618						 (__mmask8) __U);
11619}
11620
11621extern __inline __mmask8
11622__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11623_mm256_mask_cmp_epu64_mask (__mmask8 __U, __m256i __X, __m256i __Y,
11624			    const int __P)
11625{
11626  return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __X,
11627						  (__v4di) __Y, __P,
11628						  (__mmask8) __U);
11629}
11630
11631extern __inline __mmask8
11632__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11633_mm256_mask_cmp_epu32_mask (__mmask8 __U, __m256i __X, __m256i __Y,
11634			    const int __P)
11635{
11636  return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __X,
11637						  (__v8si) __Y, __P,
11638						  (__mmask8) __U);
11639}
11640
11641extern __inline __mmask8
11642__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11643_mm256_mask_cmp_pd_mask (__mmask8 __U, __m256d __X, __m256d __Y,
11644			 const int __P)
11645{
11646  return (__mmask8) __builtin_ia32_cmppd256_mask ((__v4df) __X,
11647						  (__v4df) __Y, __P,
11648						  (__mmask8) __U);
11649}
11650
11651extern __inline __mmask8
11652__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11653_mm256_mask_cmp_ps_mask (__mmask8 __U, __m256 __X, __m256 __Y,
11654			 const int __P)
11655{
11656  return (__mmask8) __builtin_ia32_cmpps256_mask ((__v8sf) __X,
11657						  (__v8sf) __Y, __P,
11658						  (__mmask8) __U);
11659}
11660
11661extern __inline __mmask8
11662__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11663_mm_cmp_epi64_mask (__m128i __X, __m128i __Y, const int __P)
11664{
11665  return (__mmask8) __builtin_ia32_cmpq128_mask ((__v2di) __X,
11666						 (__v2di) __Y, __P,
11667						 (__mmask8) -1);
11668}
11669
11670extern __inline __mmask8
11671__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11672_mm_cmp_epi32_mask (__m128i __X, __m128i __Y, const int __P)
11673{
11674  return (__mmask8) __builtin_ia32_cmpd128_mask ((__v4si) __X,
11675						 (__v4si) __Y, __P,
11676						 (__mmask8) -1);
11677}
11678
11679extern __inline __mmask8
11680__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11681_mm_cmp_epu64_mask (__m128i __X, __m128i __Y, const int __P)
11682{
11683  return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __X,
11684						  (__v2di) __Y, __P,
11685						  (__mmask8) -1);
11686}
11687
11688extern __inline __mmask8
11689__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11690_mm_cmp_epu32_mask (__m128i __X, __m128i __Y, const int __P)
11691{
11692  return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __X,
11693						  (__v4si) __Y, __P,
11694						  (__mmask8) -1);
11695}
11696
11697extern __inline __mmask8
11698__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11699_mm_cmp_pd_mask (__m128d __X, __m128d __Y, const int __P)
11700{
11701  return (__mmask8) __builtin_ia32_cmppd128_mask ((__v2df) __X,
11702						  (__v2df) __Y, __P,
11703						  (__mmask8) -1);
11704}
11705
11706extern __inline __mmask8
11707__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11708_mm_cmp_ps_mask (__m128 __X, __m128 __Y, const int __P)
11709{
11710  return (__mmask8) __builtin_ia32_cmpps128_mask ((__v4sf) __X,
11711						  (__v4sf) __Y, __P,
11712						  (__mmask8) -1);
11713}
11714
11715extern __inline __mmask8
11716__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11717_mm_mask_cmp_epi64_mask (__mmask8 __U, __m128i __X, __m128i __Y,
11718			 const int __P)
11719{
11720  return (__mmask8) __builtin_ia32_cmpq128_mask ((__v2di) __X,
11721						 (__v2di) __Y, __P,
11722						 (__mmask8) __U);
11723}
11724
11725extern __inline __mmask8
11726__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11727_mm_mask_cmp_epi32_mask (__mmask8 __U, __m128i __X, __m128i __Y,
11728			 const int __P)
11729{
11730  return (__mmask8) __builtin_ia32_cmpd128_mask ((__v4si) __X,
11731						 (__v4si) __Y, __P,
11732						 (__mmask8) __U);
11733}
11734
11735extern __inline __mmask8
11736__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11737_mm_mask_cmp_epu64_mask (__mmask8 __U, __m128i __X, __m128i __Y,
11738			 const int __P)
11739{
11740  return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __X,
11741						  (__v2di) __Y, __P,
11742						  (__mmask8) __U);
11743}
11744
11745extern __inline __mmask8
11746__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11747_mm_mask_cmp_epu32_mask (__mmask8 __U, __m128i __X, __m128i __Y,
11748			 const int __P)
11749{
11750  return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __X,
11751						  (__v4si) __Y, __P,
11752						  (__mmask8) __U);
11753}
11754
11755extern __inline __mmask8
11756__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11757_mm_mask_cmp_pd_mask (__mmask8 __U, __m128d __X, __m128d __Y,
11758		      const int __P)
11759{
11760  return (__mmask8) __builtin_ia32_cmppd128_mask ((__v2df) __X,
11761						  (__v2df) __Y, __P,
11762						  (__mmask8) __U);
11763}
11764
11765extern __inline __mmask8
11766__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11767_mm_mask_cmp_ps_mask (__mmask8 __U, __m128 __X, __m128 __Y,
11768		      const int __P)
11769{
11770  return (__mmask8) __builtin_ia32_cmpps128_mask ((__v4sf) __X,
11771						  (__v4sf) __Y, __P,
11772						  (__mmask8) __U);
11773}
11774
11775extern __inline __m256d
11776__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11777_mm256_permutex_pd (__m256d __X, const int __M)
11778{
11779  return (__m256d) __builtin_ia32_permdf256_mask ((__v4df) __X, __M,
11780						  (__v4df)
11781						  _mm256_undefined_pd (),
11782						  (__mmask8) -1);
11783}
11784
11785extern __inline __mmask8
11786  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11787_mm256_mask_cmpneq_epu32_mask (__mmask8 __M, __m256i __X, __m256i __Y)
11788{
11789  return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __X,
11790						  (__v8si) __Y, 4,
11791						  (__mmask8) __M);
11792}
11793
11794extern __inline __mmask8
11795  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11796_mm256_cmpneq_epu32_mask (__m256i __X, __m256i __Y)
11797{
11798  return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __X,
11799						  (__v8si) __Y, 4,
11800						  (__mmask8) - 1);
11801}
11802
11803extern __inline __mmask8
11804  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11805_mm256_mask_cmplt_epu32_mask (__mmask8 __M, __m256i __X, __m256i __Y)
11806{
11807  return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __X,
11808						  (__v8si) __Y, 1,
11809						  (__mmask8) __M);
11810}
11811
11812extern __inline __mmask8
11813  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11814_mm256_cmplt_epu32_mask (__m256i __X, __m256i __Y)
11815{
11816  return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __X,
11817						  (__v8si) __Y, 1,
11818						  (__mmask8) - 1);
11819}
11820
11821extern __inline __mmask8
11822  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11823_mm256_mask_cmpge_epu32_mask (__mmask8 __M, __m256i __X, __m256i __Y)
11824{
11825  return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __X,
11826						  (__v8si) __Y, 5,
11827						  (__mmask8) __M);
11828}
11829
11830extern __inline __mmask8
11831  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11832_mm256_cmpge_epu32_mask (__m256i __X, __m256i __Y)
11833{
11834  return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __X,
11835						  (__v8si) __Y, 5,
11836						  (__mmask8) - 1);
11837}
11838
11839extern __inline __mmask8
11840  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11841_mm256_mask_cmple_epu32_mask (__mmask8 __M, __m256i __X, __m256i __Y)
11842{
11843  return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __X,
11844						  (__v8si) __Y, 2,
11845						  (__mmask8) __M);
11846}
11847
11848extern __inline __mmask8
11849  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11850_mm256_cmple_epu32_mask (__m256i __X, __m256i __Y)
11851{
11852  return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __X,
11853						  (__v8si) __Y, 2,
11854						  (__mmask8) - 1);
11855}
11856
11857extern __inline __mmask8
11858  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11859_mm256_mask_cmpneq_epu64_mask (__mmask8 __M, __m256i __X, __m256i __Y)
11860{
11861  return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __X,
11862						  (__v4di) __Y, 4,
11863						  (__mmask8) __M);
11864}
11865
11866extern __inline __mmask8
11867  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11868_mm256_cmpneq_epu64_mask (__m256i __X, __m256i __Y)
11869{
11870  return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __X,
11871						  (__v4di) __Y, 4,
11872						  (__mmask8) - 1);
11873}
11874
11875extern __inline __mmask8
11876  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11877_mm256_mask_cmplt_epu64_mask (__mmask8 __M, __m256i __X, __m256i __Y)
11878{
11879  return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __X,
11880						  (__v4di) __Y, 1,
11881						  (__mmask8) __M);
11882}
11883
11884extern __inline __mmask8
11885  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11886_mm256_cmplt_epu64_mask (__m256i __X, __m256i __Y)
11887{
11888  return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __X,
11889						  (__v4di) __Y, 1,
11890						  (__mmask8) - 1);
11891}
11892
11893extern __inline __mmask8
11894  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11895_mm256_mask_cmpge_epu64_mask (__mmask8 __M, __m256i __X, __m256i __Y)
11896{
11897  return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __X,
11898						  (__v4di) __Y, 5,
11899						  (__mmask8) __M);
11900}
11901
11902extern __inline __mmask8
11903  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11904_mm256_cmpge_epu64_mask (__m256i __X, __m256i __Y)
11905{
11906  return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __X,
11907						  (__v4di) __Y, 5,
11908						  (__mmask8) - 1);
11909}
11910
11911extern __inline __mmask8
11912  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11913_mm256_mask_cmple_epu64_mask (__mmask8 __M, __m256i __X, __m256i __Y)
11914{
11915  return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __X,
11916						  (__v4di) __Y, 2,
11917						  (__mmask8) __M);
11918}
11919
11920extern __inline __mmask8
11921  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11922_mm256_cmple_epu64_mask (__m256i __X, __m256i __Y)
11923{
11924  return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __X,
11925						  (__v4di) __Y, 2,
11926						  (__mmask8) - 1);
11927}
11928
11929extern __inline __mmask8
11930  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11931_mm256_mask_cmpneq_epi32_mask (__mmask8 __M, __m256i __X, __m256i __Y)
11932{
11933  return (__mmask8) __builtin_ia32_cmpd256_mask ((__v8si) __X,
11934						 (__v8si) __Y, 4,
11935						 (__mmask8) __M);
11936}
11937
11938extern __inline __mmask8
11939  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11940_mm256_cmpneq_epi32_mask (__m256i __X, __m256i __Y)
11941{
11942  return (__mmask8) __builtin_ia32_cmpd256_mask ((__v8si) __X,
11943						 (__v8si) __Y, 4,
11944						 (__mmask8) - 1);
11945}
11946
11947extern __inline __mmask8
11948  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11949_mm256_mask_cmplt_epi32_mask (__mmask8 __M, __m256i __X, __m256i __Y)
11950{
11951  return (__mmask8) __builtin_ia32_cmpd256_mask ((__v8si) __X,
11952						 (__v8si) __Y, 1,
11953						 (__mmask8) __M);
11954}
11955
11956extern __inline __mmask8
11957  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11958_mm256_cmplt_epi32_mask (__m256i __X, __m256i __Y)
11959{
11960  return (__mmask8) __builtin_ia32_cmpd256_mask ((__v8si) __X,
11961						 (__v8si) __Y, 1,
11962						 (__mmask8) - 1);
11963}
11964
11965extern __inline __mmask8
11966  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11967_mm256_mask_cmpge_epi32_mask (__mmask8 __M, __m256i __X, __m256i __Y)
11968{
11969  return (__mmask8) __builtin_ia32_cmpd256_mask ((__v8si) __X,
11970						 (__v8si) __Y, 5,
11971						 (__mmask8) __M);
11972}
11973
11974extern __inline __mmask8
11975  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11976_mm256_cmpge_epi32_mask (__m256i __X, __m256i __Y)
11977{
11978  return (__mmask8) __builtin_ia32_cmpd256_mask ((__v8si) __X,
11979						 (__v8si) __Y, 5,
11980						 (__mmask8) - 1);
11981}
11982
11983extern __inline __mmask8
11984  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11985_mm256_mask_cmple_epi32_mask (__mmask8 __M, __m256i __X, __m256i __Y)
11986{
11987  return (__mmask8) __builtin_ia32_cmpd256_mask ((__v8si) __X,
11988						 (__v8si) __Y, 2,
11989						 (__mmask8) __M);
11990}
11991
11992extern __inline __mmask8
11993  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11994_mm256_cmple_epi32_mask (__m256i __X, __m256i __Y)
11995{
11996  return (__mmask8) __builtin_ia32_cmpd256_mask ((__v8si) __X,
11997						 (__v8si) __Y, 2,
11998						 (__mmask8) - 1);
11999}
12000
12001extern __inline __mmask8
12002  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12003_mm256_mask_cmpneq_epi64_mask (__mmask8 __M, __m256i __X, __m256i __Y)
12004{
12005  return (__mmask8) __builtin_ia32_cmpq256_mask ((__v4di) __X,
12006						 (__v4di) __Y, 4,
12007						 (__mmask8) __M);
12008}
12009
12010extern __inline __mmask8
12011  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12012_mm256_cmpneq_epi64_mask (__m256i __X, __m256i __Y)
12013{
12014  return (__mmask8) __builtin_ia32_cmpq256_mask ((__v4di) __X,
12015						 (__v4di) __Y, 4,
12016						 (__mmask8) - 1);
12017}
12018
12019extern __inline __mmask8
12020  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12021_mm256_mask_cmplt_epi64_mask (__mmask8 __M, __m256i __X, __m256i __Y)
12022{
12023  return (__mmask8) __builtin_ia32_cmpq256_mask ((__v4di) __X,
12024						 (__v4di) __Y, 1,
12025						 (__mmask8) __M);
12026}
12027
12028extern __inline __mmask8
12029  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12030_mm256_cmplt_epi64_mask (__m256i __X, __m256i __Y)
12031{
12032  return (__mmask8) __builtin_ia32_cmpq256_mask ((__v4di) __X,
12033						 (__v4di) __Y, 1,
12034						 (__mmask8) - 1);
12035}
12036
12037extern __inline __mmask8
12038  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12039_mm256_mask_cmpge_epi64_mask (__mmask8 __M, __m256i __X, __m256i __Y)
12040{
12041  return (__mmask8) __builtin_ia32_cmpq256_mask ((__v4di) __X,
12042						 (__v4di) __Y, 5,
12043						 (__mmask8) __M);
12044}
12045
12046extern __inline __mmask8
12047  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12048_mm256_cmpge_epi64_mask (__m256i __X, __m256i __Y)
12049{
12050  return (__mmask8) __builtin_ia32_cmpq256_mask ((__v4di) __X,
12051						 (__v4di) __Y, 5,
12052						 (__mmask8) - 1);
12053}
12054
12055extern __inline __mmask8
12056  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12057_mm256_mask_cmple_epi64_mask (__mmask8 __M, __m256i __X, __m256i __Y)
12058{
12059  return (__mmask8) __builtin_ia32_cmpq256_mask ((__v4di) __X,
12060						 (__v4di) __Y, 2,
12061						 (__mmask8) __M);
12062}
12063
12064extern __inline __mmask8
12065  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12066_mm256_cmple_epi64_mask (__m256i __X, __m256i __Y)
12067{
12068  return (__mmask8) __builtin_ia32_cmpq256_mask ((__v4di) __X,
12069						 (__v4di) __Y, 2,
12070						 (__mmask8) - 1);
12071}
12072
12073extern __inline __mmask8
12074  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12075_mm_mask_cmpneq_epu32_mask (__mmask8 __M, __m128i __X, __m128i __Y)
12076{
12077  return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __X,
12078						  (__v4si) __Y, 4,
12079						  (__mmask8) __M);
12080}
12081
12082extern __inline __mmask8
12083  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12084_mm_cmpneq_epu32_mask (__m128i __X, __m128i __Y)
12085{
12086  return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __X,
12087						  (__v4si) __Y, 4,
12088						  (__mmask8) - 1);
12089}
12090
12091extern __inline __mmask8
12092  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12093_mm_mask_cmplt_epu32_mask (__mmask8 __M, __m128i __X, __m128i __Y)
12094{
12095  return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __X,
12096						  (__v4si) __Y, 1,
12097						  (__mmask8) __M);
12098}
12099
12100extern __inline __mmask8
12101  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12102_mm_cmplt_epu32_mask (__m128i __X, __m128i __Y)
12103{
12104  return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __X,
12105						  (__v4si) __Y, 1,
12106						  (__mmask8) - 1);
12107}
12108
12109extern __inline __mmask8
12110  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12111_mm_mask_cmpge_epu32_mask (__mmask8 __M, __m128i __X, __m128i __Y)
12112{
12113  return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __X,
12114						  (__v4si) __Y, 5,
12115						  (__mmask8) __M);
12116}
12117
12118extern __inline __mmask8
12119  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12120_mm_cmpge_epu32_mask (__m128i __X, __m128i __Y)
12121{
12122  return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __X,
12123						  (__v4si) __Y, 5,
12124						  (__mmask8) - 1);
12125}
12126
12127extern __inline __mmask8
12128  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12129_mm_mask_cmple_epu32_mask (__mmask8 __M, __m128i __X, __m128i __Y)
12130{
12131  return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __X,
12132						  (__v4si) __Y, 2,
12133						  (__mmask8) __M);
12134}
12135
12136extern __inline __mmask8
12137  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12138_mm_cmple_epu32_mask (__m128i __X, __m128i __Y)
12139{
12140  return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __X,
12141						  (__v4si) __Y, 2,
12142						  (__mmask8) - 1);
12143}
12144
12145extern __inline __mmask8
12146  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12147_mm_mask_cmpneq_epu64_mask (__mmask8 __M, __m128i __X, __m128i __Y)
12148{
12149  return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __X,
12150						  (__v2di) __Y, 4,
12151						  (__mmask8) __M);
12152}
12153
12154extern __inline __mmask8
12155  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12156_mm_cmpneq_epu64_mask (__m128i __X, __m128i __Y)
12157{
12158  return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __X,
12159						  (__v2di) __Y, 4,
12160						  (__mmask8) - 1);
12161}
12162
12163extern __inline __mmask8
12164  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12165_mm_mask_cmplt_epu64_mask (__mmask8 __M, __m128i __X, __m128i __Y)
12166{
12167  return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __X,
12168						  (__v2di) __Y, 1,
12169						  (__mmask8) __M);
12170}
12171
12172extern __inline __mmask8
12173  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12174_mm_cmplt_epu64_mask (__m128i __X, __m128i __Y)
12175{
12176  return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __X,
12177						  (__v2di) __Y, 1,
12178						  (__mmask8) - 1);
12179}
12180
12181extern __inline __mmask8
12182  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12183_mm_mask_cmpge_epu64_mask (__mmask8 __M, __m128i __X, __m128i __Y)
12184{
12185  return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __X,
12186						  (__v2di) __Y, 5,
12187						  (__mmask8) __M);
12188}
12189
12190extern __inline __mmask8
12191  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12192_mm_cmpge_epu64_mask (__m128i __X, __m128i __Y)
12193{
12194  return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __X,
12195						  (__v2di) __Y, 5,
12196						  (__mmask8) - 1);
12197}
12198
12199extern __inline __mmask8
12200  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12201_mm_mask_cmple_epu64_mask (__mmask8 __M, __m128i __X, __m128i __Y)
12202{
12203  return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __X,
12204						  (__v2di) __Y, 2,
12205						  (__mmask8) __M);
12206}
12207
12208extern __inline __mmask8
12209  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12210_mm_cmple_epu64_mask (__m128i __X, __m128i __Y)
12211{
12212  return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __X,
12213						  (__v2di) __Y, 2,
12214						  (__mmask8) - 1);
12215}
12216
12217extern __inline __mmask8
12218  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12219_mm_mask_cmpneq_epi32_mask (__mmask8 __M, __m128i __X, __m128i __Y)
12220{
12221  return (__mmask8) __builtin_ia32_cmpd128_mask ((__v4si) __X,
12222						 (__v4si) __Y, 4,
12223						 (__mmask8) __M);
12224}
12225
12226extern __inline __mmask8
12227  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12228_mm_cmpneq_epi32_mask (__m128i __X, __m128i __Y)
12229{
12230  return (__mmask8) __builtin_ia32_cmpd128_mask ((__v4si) __X,
12231						 (__v4si) __Y, 4,
12232						 (__mmask8) - 1);
12233}
12234
12235extern __inline __mmask8
12236  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12237_mm_mask_cmplt_epi32_mask (__mmask8 __M, __m128i __X, __m128i __Y)
12238{
12239  return (__mmask8) __builtin_ia32_cmpd128_mask ((__v4si) __X,
12240						 (__v4si) __Y, 1,
12241						 (__mmask8) __M);
12242}
12243
12244extern __inline __mmask8
12245  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12246_mm_cmplt_epi32_mask (__m128i __X, __m128i __Y)
12247{
12248  return (__mmask8) __builtin_ia32_cmpd128_mask ((__v4si) __X,
12249						 (__v4si) __Y, 1,
12250						 (__mmask8) - 1);
12251}
12252
12253extern __inline __mmask8
12254  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12255_mm_mask_cmpge_epi32_mask (__mmask8 __M, __m128i __X, __m128i __Y)
12256{
12257  return (__mmask8) __builtin_ia32_cmpd128_mask ((__v4si) __X,
12258						 (__v4si) __Y, 5,
12259						 (__mmask8) __M);
12260}
12261
12262extern __inline __mmask8
12263  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12264_mm_cmpge_epi32_mask (__m128i __X, __m128i __Y)
12265{
12266  return (__mmask8) __builtin_ia32_cmpd128_mask ((__v4si) __X,
12267						 (__v4si) __Y, 5,
12268						 (__mmask8) - 1);
12269}
12270
12271extern __inline __mmask8
12272  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12273_mm_mask_cmple_epi32_mask (__mmask8 __M, __m128i __X, __m128i __Y)
12274{
12275  return (__mmask8) __builtin_ia32_cmpd128_mask ((__v4si) __X,
12276						 (__v4si) __Y, 2,
12277						 (__mmask8) __M);
12278}
12279
12280extern __inline __mmask8
12281  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12282_mm_cmple_epi32_mask (__m128i __X, __m128i __Y)
12283{
12284  return (__mmask8) __builtin_ia32_cmpd128_mask ((__v4si) __X,
12285						 (__v4si) __Y, 2,
12286						 (__mmask8) - 1);
12287}
12288
12289extern __inline __mmask8
12290  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12291_mm_mask_cmpneq_epi64_mask (__mmask8 __M, __m128i __X, __m128i __Y)
12292{
12293  return (__mmask8) __builtin_ia32_cmpq128_mask ((__v2di) __X,
12294						 (__v2di) __Y, 4,
12295						 (__mmask8) __M);
12296}
12297
12298extern __inline __mmask8
12299  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12300_mm_cmpneq_epi64_mask (__m128i __X, __m128i __Y)
12301{
12302  return (__mmask8) __builtin_ia32_cmpq128_mask ((__v2di) __X,
12303						 (__v2di) __Y, 4,
12304						 (__mmask8) - 1);
12305}
12306
12307extern __inline __mmask8
12308  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12309_mm_mask_cmplt_epi64_mask (__mmask8 __M, __m128i __X, __m128i __Y)
12310{
12311  return (__mmask8) __builtin_ia32_cmpq128_mask ((__v2di) __X,
12312						 (__v2di) __Y, 1,
12313						 (__mmask8) __M);
12314}
12315
12316extern __inline __mmask8
12317  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12318_mm_cmplt_epi64_mask (__m128i __X, __m128i __Y)
12319{
12320  return (__mmask8) __builtin_ia32_cmpq128_mask ((__v2di) __X,
12321						 (__v2di) __Y, 1,
12322						 (__mmask8) - 1);
12323}
12324
12325extern __inline __mmask8
12326  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12327_mm_mask_cmpge_epi64_mask (__mmask8 __M, __m128i __X, __m128i __Y)
12328{
12329  return (__mmask8) __builtin_ia32_cmpq128_mask ((__v2di) __X,
12330						 (__v2di) __Y, 5,
12331						 (__mmask8) __M);
12332}
12333
12334extern __inline __mmask8
12335  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12336_mm_cmpge_epi64_mask (__m128i __X, __m128i __Y)
12337{
12338  return (__mmask8) __builtin_ia32_cmpq128_mask ((__v2di) __X,
12339						 (__v2di) __Y, 5,
12340						 (__mmask8) - 1);
12341}
12342
12343extern __inline __mmask8
12344  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12345_mm_mask_cmple_epi64_mask (__mmask8 __M, __m128i __X, __m128i __Y)
12346{
12347  return (__mmask8) __builtin_ia32_cmpq128_mask ((__v2di) __X,
12348						 (__v2di) __Y, 2,
12349						 (__mmask8) __M);
12350}
12351
12352extern __inline __mmask8
12353  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12354_mm_cmple_epi64_mask (__m128i __X, __m128i __Y)
12355{
12356  return (__mmask8) __builtin_ia32_cmpq128_mask ((__v2di) __X,
12357						 (__v2di) __Y, 2,
12358						 (__mmask8) - 1);
12359}
12360
12361#else
12362#define _mm256_permutex_pd(X, M)						\
12363  ((__m256d) __builtin_ia32_permdf256_mask ((__v4df)(__m256d)(X), (int)(M),	\
12364					    (__v4df)(__m256d)_mm256_undefined_pd(),\
12365					    (__mmask8)-1))
12366
12367#define _mm256_maskz_permutex_epi64(M, X, I)                    \
12368  ((__m256i) __builtin_ia32_permdi256_mask ((__v4di)(__m256i)(X),    \
12369					    (int)(I),                \
12370					    (__v4di)(__m256i)        \
12371					    (_mm256_setzero_si256()),\
12372					    (__mmask8)(M)))
12373
12374#define _mm256_mask_permutex_epi64(W, M, X, I)               \
12375  ((__m256i) __builtin_ia32_permdi256_mask ((__v4di)(__m256i)(X), \
12376					    (int)(I),             \
12377					    (__v4di)(__m256i)(W), \
12378					    (__mmask8)(M)))
12379
12380#define _mm256_insertf32x4(X, Y, C)                                     \
12381  ((__m256) __builtin_ia32_insertf32x4_256_mask ((__v8sf)(__m256) (X),  \
12382    (__v4sf)(__m128) (Y), (int) (C),					\
12383    (__v8sf)(__m256)_mm256_setzero_ps(),				\
12384    (__mmask8)-1))
12385
12386#define _mm256_mask_insertf32x4(W, U, X, Y, C)                          \
12387  ((__m256) __builtin_ia32_insertf32x4_256_mask ((__v8sf)(__m256) (X),  \
12388    (__v4sf)(__m128) (Y), (int) (C),					\
12389    (__v8sf)(__m256)(W),						\
12390    (__mmask8)(U)))
12391
12392#define _mm256_maskz_insertf32x4(U, X, Y, C)                            \
12393  ((__m256) __builtin_ia32_insertf32x4_256_mask ((__v8sf)(__m256) (X),	\
12394    (__v4sf)(__m128) (Y), (int) (C),					\
12395    (__v8sf)(__m256)_mm256_setzero_ps(),				\
12396    (__mmask8)(U)))
12397
12398#define _mm256_inserti32x4(X, Y, C)                                     \
12399  ((__m256i) __builtin_ia32_inserti32x4_256_mask ((__v8si)(__m256i) (X),\
12400    (__v4si)(__m128i) (Y), (int) (C),					\
12401    (__v8si)(__m256i)_mm256_setzero_si256(),				\
12402    (__mmask8)-1))
12403
12404#define _mm256_mask_inserti32x4(W, U, X, Y, C)                          \
12405  ((__m256i) __builtin_ia32_inserti32x4_256_mask ((__v8si)(__m256i) (X),\
12406    (__v4si)(__m128i) (Y), (int) (C),					\
12407    (__v8si)(__m256i)(W),						\
12408    (__mmask8)(U)))
12409
12410#define _mm256_maskz_inserti32x4(U, X, Y, C)                            \
12411  ((__m256i) __builtin_ia32_inserti32x4_256_mask ((__v8si)(__m256i) (X),\
12412    (__v4si)(__m128i) (Y), (int) (C),					\
12413    (__v8si)(__m256i)_mm256_setzero_si256(),				\
12414    (__mmask8)(U)))
12415
12416#define _mm256_extractf32x4_ps(X, C)                                    \
12417  ((__m128) __builtin_ia32_extractf32x4_256_mask ((__v8sf)(__m256) (X), \
12418    (int) (C),								\
12419    (__v4sf)(__m128)_mm_setzero_ps(),					\
12420    (__mmask8)-1))
12421
12422#define _mm256_mask_extractf32x4_ps(W, U, X, C)                         \
12423  ((__m128) __builtin_ia32_extractf32x4_256_mask ((__v8sf)(__m256) (X), \
12424    (int) (C),								\
12425    (__v4sf)(__m128)(W),						\
12426    (__mmask8)(U)))
12427
12428#define _mm256_maskz_extractf32x4_ps(U, X, C)                           \
12429  ((__m128) __builtin_ia32_extractf32x4_256_mask ((__v8sf)(__m256) (X), \
12430    (int) (C),								\
12431    (__v4sf)(__m128)_mm_setzero_ps(),					\
12432    (__mmask8)(U)))
12433
12434#define _mm256_extracti32x4_epi32(X, C)                                 \
12435  ((__m128i) __builtin_ia32_extracti32x4_256_mask ((__v8si)(__m256i) (X),\
12436    (int) (C), (__v4si)(__m128i)_mm_setzero_si128 (), (__mmask8)-1))
12437
12438#define _mm256_mask_extracti32x4_epi32(W, U, X, C)                      \
12439  ((__m128i) __builtin_ia32_extracti32x4_256_mask ((__v8si)(__m256i) (X),\
12440    (int) (C), (__v4si)(__m128i)(W), (__mmask8)(U)))
12441
12442#define _mm256_maskz_extracti32x4_epi32(U, X, C)                        \
12443  ((__m128i) __builtin_ia32_extracti32x4_256_mask ((__v8si)(__m256i) (X),\
12444    (int) (C), (__v4si)(__m128i)_mm_setzero_si128 (), (__mmask8)(U)))
12445
12446#define _mm256_shuffle_i64x2(X, Y, C)                                                   \
12447  ((__m256i)  __builtin_ia32_shuf_i64x2_256_mask ((__v4di)(__m256i)(X),                 \
12448                                                  (__v4di)(__m256i)(Y), (int)(C),       \
12449                                                  (__v4di)(__m256i)_mm256_setzero_si256 (), \
12450                                                  (__mmask8)-1))
12451
12452#define _mm256_mask_shuffle_i64x2(W, U, X, Y, C)                                        \
12453  ((__m256i)  __builtin_ia32_shuf_i64x2_256_mask ((__v4di)(__m256i)(X),                 \
12454                                                  (__v4di)(__m256i)(Y), (int)(C),       \
12455                                                  (__v4di)(__m256i)(W),\
12456                                                  (__mmask8)(U)))
12457
12458#define _mm256_maskz_shuffle_i64x2(U, X, Y, C)                                          \
12459  ((__m256i)  __builtin_ia32_shuf_i64x2_256_mask ((__v4di)(__m256i)(X),                 \
12460                                                  (__v4di)(__m256i)(Y), (int)(C),       \
12461                                                  (__v4di)(__m256i)_mm256_setzero_si256 (), \
12462                                                  (__mmask8)(U)))
12463
12464#define _mm256_shuffle_i32x4(X, Y, C)                                                   \
12465  ((__m256i)  __builtin_ia32_shuf_i32x4_256_mask ((__v8si)(__m256i)(X),                 \
12466                                                  (__v8si)(__m256i)(Y), (int)(C),       \
12467                                                  (__v8si)(__m256i)_mm256_setzero_si256(), \
12468                                                  (__mmask8)-1))
12469
12470#define _mm256_mask_shuffle_i32x4(W, U, X, Y, C)                                        \
12471  ((__m256i)  __builtin_ia32_shuf_i32x4_256_mask ((__v8si)(__m256i)(X),                 \
12472                                                  (__v8si)(__m256i)(Y), (int)(C),       \
12473                                                  (__v8si)(__m256i)(W),                 \
12474                                                  (__mmask8)(U)))
12475
12476#define _mm256_maskz_shuffle_i32x4(U, X, Y, C)                                          \
12477  ((__m256i)  __builtin_ia32_shuf_i32x4_256_mask ((__v8si)(__m256i)(X),                 \
12478                                                  (__v8si)(__m256i)(Y), (int)(C),       \
12479                                                  (__v8si)(__m256i)_mm256_setzero_si256(), \
12480                                                  (__mmask8)(U)))
12481
12482#define _mm256_shuffle_f64x2(X, Y, C)                                                   \
12483  ((__m256d)  __builtin_ia32_shuf_f64x2_256_mask ((__v4df)(__m256d)(X),                 \
12484                                                  (__v4df)(__m256d)(Y), (int)(C),       \
12485                                                  (__v4df)(__m256d)_mm256_setzero_pd(), \
12486                                                  (__mmask8)-1))
12487
12488#define _mm256_mask_shuffle_f64x2(W, U, X, Y, C)                                        \
12489  ((__m256d)  __builtin_ia32_shuf_f64x2_256_mask ((__v4df)(__m256d)(X),                 \
12490                                                  (__v4df)(__m256d)(Y), (int)(C),       \
12491                                                  (__v4df)(__m256d)(W),                 \
12492                                                  (__mmask8)(U)))
12493
12494#define _mm256_maskz_shuffle_f64x2(U, X, Y, C)                                          \
12495  ((__m256d)  __builtin_ia32_shuf_f64x2_256_mask ((__v4df)(__m256d)(X),                 \
12496                                                  (__v4df)(__m256d)(Y), (int)(C),       \
12497                                                  (__v4df)(__m256d)_mm256_setzero_pd(), \
12498                                                  (__mmask8)(U)))
12499
12500#define _mm256_shuffle_f32x4(X, Y, C)                                                   \
12501  ((__m256)  __builtin_ia32_shuf_f32x4_256_mask ((__v8sf)(__m256)(X),                   \
12502                                                 (__v8sf)(__m256)(Y), (int)(C),         \
12503                                                 (__v8sf)(__m256)_mm256_setzero_ps(),   \
12504                                                 (__mmask8)-1))
12505
12506#define _mm256_mask_shuffle_f32x4(W, U, X, Y, C)                                        \
12507  ((__m256)  __builtin_ia32_shuf_f32x4_256_mask ((__v8sf)(__m256)(X),                   \
12508                                                 (__v8sf)(__m256)(Y), (int)(C),         \
12509                                                 (__v8sf)(__m256)(W),                   \
12510                                                 (__mmask8)(U)))
12511
12512#define _mm256_maskz_shuffle_f32x4(U, X, Y, C)                                          \
12513  ((__m256)  __builtin_ia32_shuf_f32x4_256_mask ((__v8sf)(__m256)(X),                   \
12514                                                 (__v8sf)(__m256)(Y), (int)(C),         \
12515                                                 (__v8sf)(__m256)_mm256_setzero_ps(),   \
12516                                                 (__mmask8)(U)))
12517
12518#define _mm256_mask_shuffle_pd(W, U, A, B, C)                                   \
12519  ((__m256d)__builtin_ia32_shufpd256_mask ((__v4df)(__m256d)(A),                \
12520                                           (__v4df)(__m256d)(B), (int)(C),      \
12521                                           (__v4df)(__m256d)(W),                \
12522                                           (__mmask8)(U)))
12523
12524#define _mm256_maskz_shuffle_pd(U, A, B, C)                                     \
12525  ((__m256d)__builtin_ia32_shufpd256_mask ((__v4df)(__m256d)(A),                \
12526                                           (__v4df)(__m256d)(B), (int)(C),      \
12527                                           (__v4df)(__m256d)_mm256_setzero_pd(),\
12528                                           (__mmask8)(U)))
12529
12530#define _mm_mask_shuffle_pd(W, U, A, B, C)                                      \
12531  ((__m128d)__builtin_ia32_shufpd128_mask ((__v2df)(__m128d)(A),                \
12532                                           (__v2df)(__m128d)(B), (int)(C),      \
12533                                           (__v2df)(__m128d)(W),                \
12534                                           (__mmask8)(U)))
12535
12536#define _mm_maskz_shuffle_pd(U, A, B, C)                                        \
12537  ((__m128d)__builtin_ia32_shufpd128_mask ((__v2df)(__m128d)(A),                \
12538                                           (__v2df)(__m128d)(B), (int)(C),      \
12539                                           (__v2df)(__m128d)_mm_setzero_pd(),   \
12540                                           (__mmask8)(U)))
12541
12542#define _mm256_mask_shuffle_ps(W, U, A, B, C)                                   \
12543  ((__m256) __builtin_ia32_shufps256_mask ((__v8sf)(__m256)(A),                 \
12544                                           (__v8sf)(__m256)(B), (int)(C),       \
12545                                           (__v8sf)(__m256)(W),                 \
12546                                           (__mmask8)(U)))
12547
12548#define _mm256_maskz_shuffle_ps(U, A, B, C)                                     \
12549  ((__m256) __builtin_ia32_shufps256_mask ((__v8sf)(__m256)(A),                 \
12550                                           (__v8sf)(__m256)(B), (int)(C),       \
12551                                           (__v8sf)(__m256)_mm256_setzero_ps(), \
12552                                           (__mmask8)(U)))
12553
12554#define _mm_mask_shuffle_ps(W, U, A, B, C)                                      \
12555  ((__m128) __builtin_ia32_shufps128_mask ((__v4sf)(__m128)(A),                 \
12556                                           (__v4sf)(__m128)(B), (int)(C),       \
12557                                           (__v4sf)(__m128)(W),                 \
12558                                           (__mmask8)(U)))
12559
12560#define _mm_maskz_shuffle_ps(U, A, B, C)                                        \
12561  ((__m128) __builtin_ia32_shufps128_mask ((__v4sf)(__m128)(A),                 \
12562                                           (__v4sf)(__m128)(B), (int)(C),       \
12563                                           (__v4sf)(__m128)_mm_setzero_ps(),    \
12564                                           (__mmask8)(U)))
12565
12566#define _mm256_fixupimm_pd(X, Y, Z, C)                                          \
12567  ((__m256d)__builtin_ia32_fixupimmpd256_mask ((__v4df)(__m256d)(X),		\
12568					       (__v4df)(__m256d)(Y),		\
12569					       (__v4di)(__m256i)(Z), (int)(C),	\
12570					       (__mmask8)(-1)))
12571
12572#define _mm256_mask_fixupimm_pd(X, U, Y, Z, C)                                  \
12573   ((__m256d)__builtin_ia32_fixupimmpd256_mask ((__v4df)(__m256d)(X),           \
12574						(__v4df)(__m256d)(Y),           \
12575						(__v4di)(__m256i)(Z), (int)(C), \
12576						(__mmask8)(U)))
12577
12578#define _mm256_maskz_fixupimm_pd(U, X, Y, Z, C)                                 \
12579   ((__m256d)__builtin_ia32_fixupimmpd256_maskz ((__v4df)(__m256d)(X),          \
12580						 (__v4df)(__m256d)(Y),          \
12581						 (__v4di)(__m256i)(Z), (int)(C),\
12582						 (__mmask8)(U)))
12583
12584#define _mm256_fixupimm_ps(X, Y, Z, C)						\
12585  ((__m256)__builtin_ia32_fixupimmps256_mask ((__v8sf)(__m256)(X),		\
12586					      (__v8sf)(__m256)(Y),		\
12587					      (__v8si)(__m256i)(Z), (int)(C),	\
12588					      (__mmask8)(-1)))
12589
12590
12591#define _mm256_mask_fixupimm_ps(X, U, Y, Z, C)                                  \
12592    ((__m256)__builtin_ia32_fixupimmps256_mask ((__v8sf)(__m256)(X),            \
12593						(__v8sf)(__m256)(Y),            \
12594						(__v8si)(__m256i)(Z), (int)(C), \
12595						(__mmask8)(U)))
12596
12597#define _mm256_maskz_fixupimm_ps(U, X, Y, Z, C)                                 \
12598    ((__m256)__builtin_ia32_fixupimmps256_maskz ((__v8sf)(__m256)(X),           \
12599						 (__v8sf)(__m256)(Y),           \
12600						 (__v8si)(__m256i)(Z), (int)(C),\
12601						 (__mmask8)(U)))
12602
12603#define _mm_fixupimm_pd(X, Y, Z, C)						\
12604  ((__m128d)__builtin_ia32_fixupimmpd128_mask ((__v2df)(__m128d)(X),		\
12605					       (__v2df)(__m128d)(Y),		\
12606					       (__v2di)(__m128i)(Z), (int)(C), 	\
12607					       (__mmask8)(-1)))
12608
12609
12610#define _mm_mask_fixupimm_pd(X, U, Y, Z, C)                                       \
12611     ((__m128d)__builtin_ia32_fixupimmpd128_mask ((__v2df)(__m128d)(X),           \
12612						  (__v2df)(__m128d)(Y),           \
12613						  (__v2di)(__m128i)(Z), (int)(C), \
12614						  (__mmask8)(U)))
12615
12616#define _mm_maskz_fixupimm_pd(U, X, Y, Z, C)                                      \
12617     ((__m128d)__builtin_ia32_fixupimmpd128_maskz ((__v2df)(__m128d)(X),          \
12618						   (__v2df)(__m128d)(Y),          \
12619						   (__v2di)(__m128i)(Z), (int)(C),\
12620						   (__mmask8)(U)))
12621
12622#define _mm_fixupimm_ps(X, Y, Z, C)						\
12623   ((__m128)__builtin_ia32_fixupimmps128_mask ((__v4sf)(__m128)(X),		\
12624					       (__v4sf)(__m128)(Y),		\
12625					       (__v4si)(__m128i)(Z), (int)(C), 	\
12626					       (__mmask8)(-1)))
12627
12628#define _mm_mask_fixupimm_ps(X, U, Y, Z, C)                                      \
12629      ((__m128)__builtin_ia32_fixupimmps128_mask ((__v4sf)(__m128)(X),           \
12630						  (__v4sf)(__m128)(Y),           \
12631						  (__v4si)(__m128i)(Z), (int)(C),\
12632						  (__mmask8)(U)))
12633
12634#define _mm_maskz_fixupimm_ps(U, X, Y, Z, C)                                      \
12635      ((__m128)__builtin_ia32_fixupimmps128_maskz ((__v4sf)(__m128)(X),           \
12636						   (__v4sf)(__m128)(Y),           \
12637						   (__v4si)(__m128i)(Z), (int)(C),\
12638						   (__mmask8)(U)))
12639
12640#define _mm256_mask_srli_epi32(W, U, A, B)				\
12641  ((__m256i) __builtin_ia32_psrldi256_mask ((__v8si)(__m256i)(A),	\
12642    (int)(B), (__v8si)(__m256i)(W), (__mmask8)(U)))
12643
12644#define _mm256_maskz_srli_epi32(U, A, B)				\
12645  ((__m256i) __builtin_ia32_psrldi256_mask ((__v8si)(__m256i)(A),	\
12646    (int)(B), (__v8si)_mm256_setzero_si256(), (__mmask8)(U)))
12647
12648#define _mm_mask_srli_epi32(W, U, A, B)                                 \
12649  ((__m128i) __builtin_ia32_psrldi128_mask ((__v4si)(__m128i)(A),       \
12650    (int)(B), (__v4si)(__m128i)(W), (__mmask8)(U)))
12651
12652#define _mm_maskz_srli_epi32(U, A, B)                                   \
12653  ((__m128i) __builtin_ia32_psrldi128_mask ((__v4si)(__m128i)(A),       \
12654    (int)(B), (__v4si)_mm_setzero_si128(), (__mmask8)(U)))
12655
12656#define _mm256_mask_srli_epi64(W, U, A, B)				\
12657  ((__m256i) __builtin_ia32_psrlqi256_mask ((__v4di)(__m256i)(A),	\
12658    (int)(B), (__v4di)(__m256i)(W), (__mmask8)(U)))
12659
12660#define _mm256_maskz_srli_epi64(U, A, B)				\
12661  ((__m256i) __builtin_ia32_psrlqi256_mask ((__v4di)(__m256i)(A),	\
12662    (int)(B), (__v4di)_mm256_setzero_si256 (), (__mmask8)(U)))
12663
12664#define _mm_mask_srli_epi64(W, U, A, B)                                 \
12665  ((__m128i) __builtin_ia32_psrlqi128_mask ((__v2di)(__m128i)(A),       \
12666    (int)(B), (__v2di)(__m128i)(W), (__mmask8)(U)))
12667
12668#define _mm_maskz_srli_epi64(U, A, B)                                   \
12669  ((__m128i) __builtin_ia32_psrlqi128_mask ((__v2di)(__m128i)(A),       \
12670    (int)(B), (__v2di)_mm_setzero_si128(), (__mmask8)(U)))
12671
12672#define _mm256_mask_slli_epi32(W, U, X, C)                                \
12673  ((__m256i)__builtin_ia32_pslldi256_mask ((__v8si)(__m256i)(X), (int)(C),\
12674    (__v8si)(__m256i)(W),\
12675    (__mmask8)(U)))
12676
12677#define _mm256_maskz_slli_epi32(U, X, C)                                  \
12678  ((__m256i)__builtin_ia32_pslldi256_mask ((__v8si)(__m256i)(X), (int)(C),\
12679    (__v8si)(__m256i)_mm256_setzero_si256(),\
12680    (__mmask8)(U)))
12681
12682#define _mm256_mask_slli_epi64(W, U, X, C)                                \
12683  ((__m256i)__builtin_ia32_psllqi256_mask ((__v4di)(__m256i)(X), (int)(C),\
12684    (__v4di)(__m256i)(W),\
12685    (__mmask8)(U)))
12686
12687#define _mm256_maskz_slli_epi64(U, X, C)                                  \
12688  ((__m256i)__builtin_ia32_psllqi256_mask ((__v4di)(__m256i)(X), (int)(C),\
12689    (__v4di)(__m256i)_mm256_setzero_si256 (),\
12690    (__mmask8)(U)))
12691
12692#define _mm_mask_slli_epi32(W, U, X, C)					  \
12693  ((__m128i)__builtin_ia32_pslldi128_mask ((__v4si)(__m128i)(X), (int)(C),\
12694    (__v4si)(__m128i)(W),\
12695    (__mmask8)(U)))
12696
12697#define _mm_maskz_slli_epi32(U, X, C)					  \
12698  ((__m128i)__builtin_ia32_pslldi128_mask ((__v4si)(__m128i)(X), (int)(C),\
12699    (__v4si)(__m128i)_mm_setzero_si128 (),\
12700    (__mmask8)(U)))
12701
12702#define _mm_mask_slli_epi64(W, U, X, C)					  \
12703  ((__m128i)__builtin_ia32_psllqi128_mask ((__v2di)(__m128i)(X), (int)(C),\
12704    (__v2di)(__m128i)(W),\
12705    (__mmask8)(U)))
12706
12707#define _mm_maskz_slli_epi64(U, X, C)					  \
12708  ((__m128i)__builtin_ia32_psllqi128_mask ((__v2di)(__m128i)(X), (int)(C),\
12709    (__v2di)(__m128i)_mm_setzero_di(),\
12710    (__mmask8)(U)))
12711
12712#define _mm256_ternarylogic_epi64(A, B, C, I)                           \
12713  ((__m256i) __builtin_ia32_pternlogq256_mask ((__v4di)(__m256i)(A),	\
12714    (__v4di)(__m256i)(B), (__v4di)(__m256i)(C), (int)(I), (__mmask8)-1))
12715
12716#define _mm256_mask_ternarylogic_epi64(A, U, B, C, I)			\
12717  ((__m256i) __builtin_ia32_pternlogq256_mask ((__v4di)(__m256i)(A),	\
12718    (__v4di)(__m256i)(B), (__v4di)(__m256i)(C), (int)(I), (__mmask8)(U)))
12719
12720#define _mm256_maskz_ternarylogic_epi64(U, A, B, C, I)			\
12721  ((__m256i) __builtin_ia32_pternlogq256_maskz ((__v4di)(__m256i)(A),	\
12722    (__v4di)(__m256i)(B), (__v4di)(__m256i)(C), (int)(I), (__mmask8)(U)))
12723
12724#define _mm256_ternarylogic_epi32(A, B, C, I)                           \
12725  ((__m256i) __builtin_ia32_pternlogd256_mask ((__v8si)(__m256i)(A),	\
12726    (__v8si)(__m256i)(B), (__v8si)(__m256i)(C), (int)(I), (__mmask8)-1))
12727
12728#define _mm256_mask_ternarylogic_epi32(A, U, B, C, I)                   \
12729  ((__m256i) __builtin_ia32_pternlogd256_mask ((__v8si)(__m256i)(A),	\
12730    (__v8si)(__m256i)(B), (__v8si)(__m256i)(C), (int)(I), (__mmask8)(U)))
12731
12732#define _mm256_maskz_ternarylogic_epi32(U, A, B, C, I)			\
12733  ((__m256i) __builtin_ia32_pternlogd256_maskz ((__v8si)(__m256i)(A),	\
12734    (__v8si)(__m256i)(B), (__v8si)(__m256i)(C), (int)(I), (__mmask8)(U)))
12735
12736#define _mm_ternarylogic_epi64(A, B, C, I)                              \
12737  ((__m128i) __builtin_ia32_pternlogq128_mask ((__v2di)(__m128i)(A),	\
12738    (__v2di)(__m128i)(B), (__v2di)(__m128i)(C), (int)(I), (__mmask8)-1))
12739
12740#define _mm_mask_ternarylogic_epi64(A, U, B, C, I)			\
12741  ((__m128i) __builtin_ia32_pternlogq128_mask ((__v2di)(__m128i)(A),	\
12742    (__v2di)(__m128i)(B), (__v2di)(__m128i)(C), (int)(I), (__mmask8)(U)))
12743
12744#define _mm_maskz_ternarylogic_epi64(U, A, B, C, I)			\
12745  ((__m128i) __builtin_ia32_pternlogq128_maskz ((__v2di)(__m128i)(A),	\
12746    (__v2di)(__m128i)(B), (__v2di)(__m128i)(C), (int)(I), (__mmask8)(U)))
12747
12748#define _mm_ternarylogic_epi32(A, B, C, I)                              \
12749  ((__m128i) __builtin_ia32_pternlogd128_mask ((__v4si)(__m128i)(A),	\
12750    (__v4si)(__m128i)(B), (__v4si)(__m128i)(C), (int)(I), (__mmask8)-1))
12751
12752#define _mm_mask_ternarylogic_epi32(A, U, B, C, I)			\
12753  ((__m128i) __builtin_ia32_pternlogd128_mask ((__v4si)(__m128i)(A),	\
12754    (__v4si)(__m128i)(B), (__v4si)(__m128i)(C), (int)(I), (__mmask8)(U)))
12755
12756#define _mm_maskz_ternarylogic_epi32(U, A, B, C, I)			\
12757  ((__m128i) __builtin_ia32_pternlogd128_maskz ((__v4si)(__m128i)(A),	\
12758    (__v4si)(__m128i)(B), (__v4si)(__m128i)(C), (int)(I), (__mmask8)(U)))
12759
12760#define _mm256_roundscale_ps(A, B)				        \
12761  ((__m256) __builtin_ia32_rndscaleps_256_mask ((__v8sf)(__m256)(A),    \
12762    (int)(B), (__v8sf)(__m256)_mm256_setzero_ps(), (__mmask8)-1))
12763
12764#define _mm256_mask_roundscale_ps(W, U, A, B)			        \
12765  ((__m256) __builtin_ia32_rndscaleps_256_mask ((__v8sf)(__m256)(A),    \
12766    (int)(B), (__v8sf)(__m256)(W), (__mmask8)(U)))
12767
12768#define _mm256_maskz_roundscale_ps(U, A, B)			        \
12769  ((__m256) __builtin_ia32_rndscaleps_256_mask ((__v8sf)(__m256)(A),    \
12770    (int)(B), (__v8sf)(__m256)_mm256_setzero_ps(), (__mmask8)(U)))
12771
12772#define _mm256_roundscale_pd(A, B)				        \
12773  ((__m256d) __builtin_ia32_rndscalepd_256_mask ((__v4df)(__m256d)(A),  \
12774    (int)(B), (__v4df)(__m256d)_mm256_setzero_pd(), (__mmask8)-1))
12775
12776#define _mm256_mask_roundscale_pd(W, U, A, B)			        \
12777  ((__m256d) __builtin_ia32_rndscalepd_256_mask ((__v4df)(__m256d)(A),  \
12778    (int)(B), (__v4df)(__m256d)(W), (__mmask8)(U)))
12779
12780#define _mm256_maskz_roundscale_pd(U, A, B)			        \
12781  ((__m256d) __builtin_ia32_rndscalepd_256_mask ((__v4df)(__m256d)(A),  \
12782    (int)(B), (__v4df)(__m256d)_mm256_setzero_pd(), (__mmask8)(U)))
12783
12784#define _mm_roundscale_ps(A, B)					        \
12785  ((__m128) __builtin_ia32_rndscaleps_128_mask ((__v4sf)(__m128)(A),    \
12786    (int)(B), (__v4sf)(__m128)_mm_setzero_ps(), (__mmask8)-1))
12787
12788#define _mm_mask_roundscale_ps(W, U, A, B)			        \
12789  ((__m128) __builtin_ia32_rndscaleps_128_mask ((__v4sf)(__m128)(A),    \
12790    (int)(B), (__v4sf)(__m128)(W), (__mmask8)(U)))
12791
12792#define _mm_maskz_roundscale_ps(U, A, B)			        \
12793  ((__m128) __builtin_ia32_rndscaleps_128_mask ((__v4sf)(__m128)(A),    \
12794    (int)(B), (__v4sf)(__m128)_mm_setzero_ps(), (__mmask8)(U)))
12795
12796#define _mm_roundscale_pd(A, B)					        \
12797  ((__m128d) __builtin_ia32_rndscalepd_128_mask ((__v2df)(__m128d)(A),  \
12798    (int)(B), (__v2df)(__m128d)_mm_setzero_pd(), (__mmask8)-1))
12799
12800#define _mm_mask_roundscale_pd(W, U, A, B)			        \
12801  ((__m128d) __builtin_ia32_rndscalepd_128_mask ((__v2df)(__m128d)(A),  \
12802    (int)(B), (__v2df)(__m128d)(W), (__mmask8)(U)))
12803
12804#define _mm_maskz_roundscale_pd(U, A, B)			        \
12805  ((__m128d) __builtin_ia32_rndscalepd_128_mask ((__v2df)(__m128d)(A),  \
12806    (int)(B), (__v2df)(__m128d)_mm_setzero_pd(), (__mmask8)(U)))
12807
12808#define _mm256_getmant_ps(X, B, C)                                              \
12809  ((__m256) __builtin_ia32_getmantps256_mask ((__v8sf)(__m256) (X),             \
12810                                         (int)(((C)<<2) | (B)),                 \
12811                                         (__v8sf)(__m256)_mm256_setzero_ps(),   \
12812                                         (__mmask8)-1))
12813
12814#define _mm256_mask_getmant_ps(W, U, X, B, C)                                   \
12815  ((__m256) __builtin_ia32_getmantps256_mask ((__v8sf)(__m256) (X),             \
12816                                         (int)(((C)<<2) | (B)),                 \
12817                                         (__v8sf)(__m256)(W),                   \
12818                                         (__mmask8)(U)))
12819
12820#define _mm256_maskz_getmant_ps(U, X, B, C)                                     \
12821  ((__m256) __builtin_ia32_getmantps256_mask ((__v8sf)(__m256) (X),             \
12822                                         (int)(((C)<<2) | (B)),                 \
12823                                         (__v8sf)(__m256)_mm256_setzero_ps(),   \
12824                                         (__mmask8)(U)))
12825
12826#define _mm_getmant_ps(X, B, C)                                                 \
12827  ((__m128) __builtin_ia32_getmantps128_mask ((__v4sf)(__m128) (X),             \
12828                                         (int)(((C)<<2) | (B)),                 \
12829                                         (__v4sf)(__m128)_mm_setzero_ps(),      \
12830                                         (__mmask8)-1))
12831
12832#define _mm_mask_getmant_ps(W, U, X, B, C)                                      \
12833  ((__m128) __builtin_ia32_getmantps128_mask ((__v4sf)(__m128) (X),             \
12834                                         (int)(((C)<<2) | (B)),                 \
12835                                         (__v4sf)(__m128)(W),                   \
12836                                         (__mmask8)(U)))
12837
12838#define _mm_maskz_getmant_ps(U, X, B, C)                                        \
12839  ((__m128) __builtin_ia32_getmantps128_mask ((__v4sf)(__m128) (X),             \
12840                                         (int)(((C)<<2) | (B)),                 \
12841                                         (__v4sf)(__m128)_mm_setzero_ps(),      \
12842                                         (__mmask8)(U)))
12843
12844#define _mm256_getmant_pd(X, B, C)                                              \
12845  ((__m256d) __builtin_ia32_getmantpd256_mask ((__v4df)(__m256d) (X),           \
12846                                         (int)(((C)<<2) | (B)),                 \
12847                                          (__v4df)(__m256d)_mm256_setzero_pd(), \
12848                                          (__mmask8)-1))
12849
12850#define _mm256_mask_getmant_pd(W, U, X, B, C)                                   \
12851  ((__m256d) __builtin_ia32_getmantpd256_mask ((__v4df)(__m256d) (X),           \
12852                                         (int)(((C)<<2) | (B)),                 \
12853                                          (__v4df)(__m256d)(W),                 \
12854                                          (__mmask8)(U)))
12855
12856#define _mm256_maskz_getmant_pd(U, X, B, C)                                     \
12857  ((__m256d) __builtin_ia32_getmantpd256_mask ((__v4df)(__m256d) (X),           \
12858                                         (int)(((C)<<2) | (B)),                 \
12859                                          (__v4df)(__m256d)_mm256_setzero_pd(), \
12860                                          (__mmask8)(U)))
12861
12862#define _mm_getmant_pd(X, B, C)                                                 \
12863  ((__m128d) __builtin_ia32_getmantpd128_mask ((__v2df)(__m128d) (X),           \
12864                                         (int)(((C)<<2) | (B)),                 \
12865                                          (__v2df)(__m128d)_mm_setzero_pd(),    \
12866                                          (__mmask8)-1))
12867
12868#define _mm_mask_getmant_pd(W, U, X, B, C)                                      \
12869  ((__m128d) __builtin_ia32_getmantpd128_mask ((__v2df)(__m128d) (X),           \
12870                                         (int)(((C)<<2) | (B)),                 \
12871                                          (__v2df)(__m128d)(W),                 \
12872                                          (__mmask8)(U)))
12873
12874#define _mm_maskz_getmant_pd(U, X, B, C)                                        \
12875  ((__m128d) __builtin_ia32_getmantpd128_mask ((__v2df)(__m128d) (X),           \
12876                                         (int)(((C)<<2) | (B)),                 \
12877                                          (__v2df)(__m128d)_mm_setzero_pd(),    \
12878                                          (__mmask8)(U)))
12879
12880#define _mm256_mmask_i32gather_ps(V1OLD, MASK, INDEX, ADDR, SCALE)	\
12881  (__m256) __builtin_ia32_gather3siv8sf ((__v8sf)(__m256)V1OLD,		\
12882					 (float const *)ADDR,		\
12883					 (__v8si)(__m256i)INDEX,	\
12884					 (__mmask8)MASK, (int)SCALE)
12885
12886#define _mm_mmask_i32gather_ps(V1OLD, MASK, INDEX, ADDR, SCALE)		\
12887  (__m128) __builtin_ia32_gather3siv4sf ((__v4sf)(__m128)V1OLD,		\
12888					 (float const *)ADDR,		\
12889					 (__v4si)(__m128i)INDEX,	\
12890					 (__mmask8)MASK, (int)SCALE)
12891
12892#define _mm256_mmask_i32gather_pd(V1OLD, MASK, INDEX, ADDR, SCALE)	\
12893  (__m256d) __builtin_ia32_gather3siv4df ((__v4df)(__m256d)V1OLD,	\
12894					  (double const *)ADDR,		\
12895					  (__v4si)(__m128i)INDEX,	\
12896					  (__mmask8)MASK, (int)SCALE)
12897
12898#define _mm_mmask_i32gather_pd(V1OLD, MASK, INDEX, ADDR, SCALE)		\
12899  (__m128d) __builtin_ia32_gather3siv2df ((__v2df)(__m128d)V1OLD,	\
12900					  (double const *)ADDR,		\
12901					  (__v4si)(__m128i)INDEX,	\
12902					  (__mmask8)MASK, (int)SCALE)
12903
12904#define _mm256_mmask_i64gather_ps(V1OLD, MASK, INDEX, ADDR, SCALE)	\
12905  (__m128) __builtin_ia32_gather3div8sf ((__v4sf)(__m128)V1OLD,		\
12906					 (float const *)ADDR,		\
12907					 (__v4di)(__m256i)INDEX,	\
12908					 (__mmask8)MASK, (int)SCALE)
12909
12910#define _mm_mmask_i64gather_ps(V1OLD, MASK, INDEX, ADDR, SCALE)		\
12911  (__m128) __builtin_ia32_gather3div4sf ((__v4sf)(__m128)V1OLD,		\
12912					 (float const *)ADDR,		\
12913					 (__v2di)(__m128i)INDEX,	\
12914					 (__mmask8)MASK, (int)SCALE)
12915
12916#define _mm256_mmask_i64gather_pd(V1OLD, MASK, INDEX, ADDR, SCALE)	\
12917  (__m256d) __builtin_ia32_gather3div4df ((__v4df)(__m256d)V1OLD,	\
12918					  (double const *)ADDR,		\
12919					  (__v4di)(__m256i)INDEX,	\
12920					  (__mmask8)MASK, (int)SCALE)
12921
12922#define _mm_mmask_i64gather_pd(V1OLD, MASK, INDEX, ADDR, SCALE)		\
12923  (__m128d) __builtin_ia32_gather3div2df ((__v2df)(__m128d)V1OLD,	\
12924					  (double const *)ADDR,		\
12925					  (__v2di)(__m128i)INDEX,	\
12926					  (__mmask8)MASK, (int)SCALE)
12927
12928#define _mm256_mmask_i32gather_epi32(V1OLD, MASK, INDEX, ADDR, SCALE)	\
12929  (__m256i) __builtin_ia32_gather3siv8si ((__v8si)(__m256i)V1OLD,	\
12930					  (int const *)ADDR,		\
12931					  (__v8si)(__m256i)INDEX,	\
12932					  (__mmask8)MASK, (int)SCALE)
12933
12934#define _mm_mmask_i32gather_epi32(V1OLD, MASK, INDEX, ADDR, SCALE)	\
12935  (__m128i) __builtin_ia32_gather3siv4si ((__v4si)(__m128i)V1OLD,	\
12936					  (int const *)ADDR,		\
12937					  (__v4si)(__m128i)INDEX,	\
12938					  (__mmask8)MASK, (int)SCALE)
12939
12940#define _mm256_mmask_i32gather_epi64(V1OLD, MASK, INDEX, ADDR, SCALE)	\
12941  (__m256i) __builtin_ia32_gather3siv4di ((__v4di)(__m256i)V1OLD,	\
12942					  (long long const *)ADDR,	\
12943					  (__v4si)(__m128i)INDEX,	\
12944					  (__mmask8)MASK, (int)SCALE)
12945
12946#define _mm_mmask_i32gather_epi64(V1OLD, MASK, INDEX, ADDR, SCALE)	\
12947  (__m128i) __builtin_ia32_gather3siv2di ((__v2di)(__m128i)V1OLD,	\
12948					  (long long const *)ADDR,	\
12949					  (__v4si)(__m128i)INDEX,	\
12950					  (__mmask8)MASK, (int)SCALE)
12951
12952#define _mm256_mmask_i64gather_epi32(V1OLD, MASK, INDEX, ADDR, SCALE)	\
12953  (__m128i) __builtin_ia32_gather3div8si ((__v4si)(__m128i)V1OLD,	\
12954					  (int const *)ADDR,		\
12955					  (__v4di)(__m256i)INDEX,	\
12956					  (__mmask8)MASK, (int)SCALE)
12957
12958#define _mm_mmask_i64gather_epi32(V1OLD, MASK, INDEX, ADDR, SCALE)	\
12959  (__m128i) __builtin_ia32_gather3div4si ((__v4si)(__m128i)V1OLD,	\
12960					  (int const *)ADDR,		\
12961					  (__v2di)(__m128i)INDEX,	\
12962					  (__mmask8)MASK, (int)SCALE)
12963
12964#define _mm256_mmask_i64gather_epi64(V1OLD, MASK, INDEX, ADDR, SCALE)	\
12965  (__m256i) __builtin_ia32_gather3div4di ((__v4di)(__m256i)V1OLD,	\
12966					  (long long const *)ADDR,	\
12967					  (__v4di)(__m256i)INDEX,	\
12968					  (__mmask8)MASK, (int)SCALE)
12969
12970#define _mm_mmask_i64gather_epi64(V1OLD, MASK, INDEX, ADDR, SCALE)	\
12971  (__m128i) __builtin_ia32_gather3div2di ((__v2di)(__m128i)V1OLD,	\
12972					  (long long const *)ADDR,	\
12973					  (__v2di)(__m128i)INDEX,	\
12974					  (__mmask8)MASK, (int)SCALE)
12975
12976#define _mm256_i32scatter_ps(ADDR, INDEX, V1, SCALE)			\
12977  __builtin_ia32_scattersiv8sf ((float *)ADDR, (__mmask8)0xFF,		\
12978				(__v8si)(__m256i)INDEX,			\
12979				(__v8sf)(__m256)V1, (int)SCALE)
12980
12981#define _mm256_mask_i32scatter_ps(ADDR, MASK, INDEX, V1, SCALE)		\
12982  __builtin_ia32_scattersiv8sf ((float *)ADDR, (__mmask8)MASK,		\
12983				(__v8si)(__m256i)INDEX,			\
12984				(__v8sf)(__m256)V1, (int)SCALE)
12985
12986#define _mm_i32scatter_ps(ADDR, INDEX, V1, SCALE)			\
12987  __builtin_ia32_scattersiv4sf ((float *)ADDR, (__mmask8)0xFF,		\
12988				(__v4si)(__m128i)INDEX,			\
12989				(__v4sf)(__m128)V1, (int)SCALE)
12990
12991#define _mm_mask_i32scatter_ps(ADDR, MASK, INDEX, V1, SCALE)		\
12992  __builtin_ia32_scattersiv4sf ((float *)ADDR, (__mmask8)MASK,		\
12993				(__v4si)(__m128i)INDEX,			\
12994				(__v4sf)(__m128)V1, (int)SCALE)
12995
12996#define _mm256_i32scatter_pd(ADDR, INDEX, V1, SCALE)			\
12997  __builtin_ia32_scattersiv4df ((double *)ADDR, (__mmask8)0xFF,		\
12998				(__v4si)(__m128i)INDEX,			\
12999				(__v4df)(__m256d)V1, (int)SCALE)
13000
13001#define _mm256_mask_i32scatter_pd(ADDR, MASK, INDEX, V1, SCALE)		\
13002  __builtin_ia32_scattersiv4df ((double *)ADDR, (__mmask8)MASK,		\
13003				(__v4si)(__m128i)INDEX,			\
13004				(__v4df)(__m256d)V1, (int)SCALE)
13005
13006#define _mm_i32scatter_pd(ADDR, INDEX, V1, SCALE)			\
13007  __builtin_ia32_scattersiv2df ((double *)ADDR, (__mmask8)0xFF,		\
13008				(__v4si)(__m128i)INDEX,			\
13009				(__v2df)(__m128d)V1, (int)SCALE)
13010
13011#define _mm_mask_i32scatter_pd(ADDR, MASK, INDEX, V1, SCALE)		\
13012  __builtin_ia32_scattersiv2df ((double *)ADDR, (__mmask8)MASK,		\
13013				(__v4si)(__m128i)INDEX,			\
13014				(__v2df)(__m128d)V1, (int)SCALE)
13015
13016#define _mm256_i64scatter_ps(ADDR, INDEX, V1, SCALE)			\
13017  __builtin_ia32_scatterdiv8sf ((float *)ADDR, (__mmask8)0xFF,		\
13018				(__v4di)(__m256i)INDEX,			\
13019				(__v4sf)(__m128)V1, (int)SCALE)
13020
13021#define _mm256_mask_i64scatter_ps(ADDR, MASK, INDEX, V1, SCALE)		\
13022  __builtin_ia32_scatterdiv8sf ((float *)ADDR, (__mmask8)MASK,		\
13023				(__v4di)(__m256i)INDEX,			\
13024				(__v4sf)(__m128)V1, (int)SCALE)
13025
13026#define _mm_i64scatter_ps(ADDR, INDEX, V1, SCALE)			\
13027  __builtin_ia32_scatterdiv4sf ((float *)ADDR, (__mmask8)0xFF,		\
13028				(__v2di)(__m128i)INDEX,			\
13029				(__v4sf)(__m128)V1, (int)SCALE)
13030
13031#define _mm_mask_i64scatter_ps(ADDR, MASK, INDEX, V1, SCALE)		\
13032  __builtin_ia32_scatterdiv4sf ((float *)ADDR, (__mmask8)MASK,		\
13033				(__v2di)(__m128i)INDEX,			\
13034				(__v4sf)(__m128)V1, (int)SCALE)
13035
13036#define _mm256_i64scatter_pd(ADDR, INDEX, V1, SCALE)			\
13037  __builtin_ia32_scatterdiv4df ((double *)ADDR, (__mmask8)0xFF,		\
13038				(__v4di)(__m256i)INDEX,			\
13039				(__v4df)(__m256d)V1, (int)SCALE)
13040
13041#define _mm256_mask_i64scatter_pd(ADDR, MASK, INDEX, V1, SCALE)		\
13042  __builtin_ia32_scatterdiv4df ((double *)ADDR, (__mmask8)MASK,		\
13043				(__v4di)(__m256i)INDEX,			\
13044				(__v4df)(__m256d)V1, (int)SCALE)
13045
13046#define _mm_i64scatter_pd(ADDR, INDEX, V1, SCALE)			\
13047  __builtin_ia32_scatterdiv2df ((double *)ADDR, (__mmask8)0xFF,		\
13048				(__v2di)(__m128i)INDEX,			\
13049				(__v2df)(__m128d)V1, (int)SCALE)
13050
13051#define _mm_mask_i64scatter_pd(ADDR, MASK, INDEX, V1, SCALE)		\
13052  __builtin_ia32_scatterdiv2df ((double *)ADDR, (__mmask8)MASK,		\
13053				(__v2di)(__m128i)INDEX,			\
13054				(__v2df)(__m128d)V1, (int)SCALE)
13055
13056#define _mm256_i32scatter_epi32(ADDR, INDEX, V1, SCALE)			\
13057  __builtin_ia32_scattersiv8si ((int *)ADDR, (__mmask8)0xFF,		\
13058				(__v8si)(__m256i)INDEX,			\
13059				(__v8si)(__m256i)V1, (int)SCALE)
13060
13061#define _mm256_mask_i32scatter_epi32(ADDR, MASK, INDEX, V1, SCALE)	\
13062  __builtin_ia32_scattersiv8si ((int *)ADDR, (__mmask8)MASK,		\
13063				(__v8si)(__m256i)INDEX,			\
13064				(__v8si)(__m256i)V1, (int)SCALE)
13065
13066#define _mm_i32scatter_epi32(ADDR, INDEX, V1, SCALE)			\
13067  __builtin_ia32_scattersiv4si ((int *)ADDR, (__mmask8)0xFF,		\
13068				(__v4si)(__m128i)INDEX,			\
13069				(__v4si)(__m128i)V1, (int)SCALE)
13070
13071#define _mm_mask_i32scatter_epi32(ADDR, MASK, INDEX, V1, SCALE)		\
13072  __builtin_ia32_scattersiv4si ((int *)ADDR, (__mmask8)MASK,		\
13073				(__v4si)(__m128i)INDEX,			\
13074				(__v4si)(__m128i)V1, (int)SCALE)
13075
13076#define _mm256_i32scatter_epi64(ADDR, INDEX, V1, SCALE)			\
13077  __builtin_ia32_scattersiv4di ((long long *)ADDR, (__mmask8)0xFF,	\
13078				(__v4si)(__m128i)INDEX,			\
13079				(__v4di)(__m256i)V1, (int)SCALE)
13080
13081#define _mm256_mask_i32scatter_epi64(ADDR, MASK, INDEX, V1, SCALE)	\
13082  __builtin_ia32_scattersiv4di ((long long *)ADDR, (__mmask8)MASK,	\
13083				(__v4si)(__m128i)INDEX,			\
13084				(__v4di)(__m256i)V1, (int)SCALE)
13085
13086#define _mm_i32scatter_epi64(ADDR, INDEX, V1, SCALE)			\
13087  __builtin_ia32_scattersiv2di ((long long *)ADDR, (__mmask8)0xFF,	\
13088				(__v4si)(__m128i)INDEX,			\
13089				(__v2di)(__m128i)V1, (int)SCALE)
13090
13091#define _mm_mask_i32scatter_epi64(ADDR, MASK, INDEX, V1, SCALE)		\
13092  __builtin_ia32_scattersiv2di ((long long *)ADDR, (__mmask8)MASK,	\
13093				(__v4si)(__m128i)INDEX,			\
13094				(__v2di)(__m128i)V1, (int)SCALE)
13095
13096#define _mm256_i64scatter_epi32(ADDR, INDEX, V1, SCALE)			\
13097  __builtin_ia32_scatterdiv8si ((int *)ADDR, (__mmask8)0xFF,		\
13098				(__v4di)(__m256i)INDEX,			\
13099				(__v4si)(__m128i)V1, (int)SCALE)
13100
13101#define _mm256_mask_i64scatter_epi32(ADDR, MASK, INDEX, V1, SCALE)	\
13102  __builtin_ia32_scatterdiv8si ((int *)ADDR, (__mmask8)MASK,		\
13103				(__v4di)(__m256i)INDEX,			\
13104				(__v4si)(__m128i)V1, (int)SCALE)
13105
13106#define _mm_i64scatter_epi32(ADDR, INDEX, V1, SCALE)			\
13107  __builtin_ia32_scatterdiv4si ((int *)ADDR, (__mmask8)0xFF,		\
13108				(__v2di)(__m128i)INDEX,			\
13109				(__v4si)(__m128i)V1, (int)SCALE)
13110
13111#define _mm_mask_i64scatter_epi32(ADDR, MASK, INDEX, V1, SCALE)		\
13112  __builtin_ia32_scatterdiv4si ((int *)ADDR, (__mmask8)MASK,		\
13113				(__v2di)(__m128i)INDEX,			\
13114				(__v4si)(__m128i)V1, (int)SCALE)
13115
13116#define _mm256_i64scatter_epi64(ADDR, INDEX, V1, SCALE)			\
13117  __builtin_ia32_scatterdiv4di ((long long *)ADDR, (__mmask8)0xFF,	\
13118				(__v4di)(__m256i)INDEX,			\
13119				(__v4di)(__m256i)V1, (int)SCALE)
13120
13121#define _mm256_mask_i64scatter_epi64(ADDR, MASK, INDEX, V1, SCALE)	\
13122  __builtin_ia32_scatterdiv4di ((long long *)ADDR, (__mmask8)MASK,	\
13123				(__v4di)(__m256i)INDEX,			\
13124				(__v4di)(__m256i)V1, (int)SCALE)
13125
13126#define _mm_i64scatter_epi64(ADDR, INDEX, V1, SCALE)			\
13127  __builtin_ia32_scatterdiv2di ((long long *)ADDR, (__mmask8)0xFF,	\
13128				(__v2di)(__m128i)INDEX,			\
13129				(__v2di)(__m128i)V1, (int)SCALE)
13130
13131#define _mm_mask_i64scatter_epi64(ADDR, MASK, INDEX, V1, SCALE)		\
13132  __builtin_ia32_scatterdiv2di ((long long *)ADDR, (__mmask8)MASK,	\
13133				(__v2di)(__m128i)INDEX,			\
13134				(__v2di)(__m128i)V1, (int)SCALE)
13135
13136#define _mm256_mask_shuffle_epi32(W, U, X, C)                                       \
13137  ((__m256i)  __builtin_ia32_pshufd256_mask ((__v8si)(__m256i)(X), (int)(C),        \
13138                                             (__v8si)(__m256i)(W),                  \
13139                                             (__mmask8)(U)))
13140
13141#define _mm256_maskz_shuffle_epi32(U, X, C)                                         \
13142  ((__m256i)  __builtin_ia32_pshufd256_mask ((__v8si)(__m256i)(X), (int)(C),        \
13143                                             (__v8si)(__m256i)_mm256_setzero_si256(),  \
13144                                             (__mmask8)(U)))
13145
13146#define _mm_mask_shuffle_epi32(W, U, X, C)                                          \
13147  ((__m128i)  __builtin_ia32_pshufd128_mask ((__v4si)(__m128i)(X), (int)(C),        \
13148                                             (__v4si)(__m128i)(W),                  \
13149                                             (__mmask8)(U)))
13150
13151#define _mm_maskz_shuffle_epi32(U, X, C)                                            \
13152  ((__m128i)  __builtin_ia32_pshufd128_mask ((__v4si)(__m128i)(X), (int)(C),        \
13153                                             (__v4si)(__m128i)_mm_setzero_si128 (),     \
13154                                             (__mmask8)(U)))
13155
13156#define _mm256_rol_epi64(A, B)                                                 \
13157  ((__m256i)__builtin_ia32_prolq256_mask ((__v4di)(__m256i)(A), (int)(B),      \
13158                                          (__v4di)(__m256i)_mm256_setzero_si256 (),\
13159                                          (__mmask8)-1))
13160
13161#define _mm256_mask_rol_epi64(W, U, A, B)                                      \
13162  ((__m256i)__builtin_ia32_prolq256_mask ((__v4di)(__m256i)(A), (int)(B),      \
13163                                          (__v4di)(__m256i)(W),                \
13164                                          (__mmask8)(U)))
13165
13166#define _mm256_maskz_rol_epi64(U, A, B)                                        \
13167  ((__m256i)__builtin_ia32_prolq256_mask ((__v4di)(__m256i)(A), (int)(B),      \
13168                                          (__v4di)(__m256i)_mm256_setzero_si256 (),\
13169                                          (__mmask8)(U)))
13170
13171#define _mm_rol_epi64(A, B)                                                    \
13172  ((__m128i)__builtin_ia32_prolq128_mask ((__v2di)(__m128i)(A), (int)(B),      \
13173                                          (__v2di)(__m128i)_mm_setzero_di(),   \
13174                                          (__mmask8)-1))
13175
13176#define _mm_mask_rol_epi64(W, U, A, B)                                         \
13177  ((__m128i)__builtin_ia32_prolq128_mask ((__v2di)(__m128i)(A), (int)(B),      \
13178                                          (__v2di)(__m128i)(W),                \
13179                                          (__mmask8)(U)))
13180
13181#define _mm_maskz_rol_epi64(U, A, B)                                           \
13182  ((__m128i)__builtin_ia32_prolq128_mask ((__v2di)(__m128i)(A), (int)(B),      \
13183                                          (__v2di)(__m128i)_mm_setzero_di(),   \
13184                                          (__mmask8)(U)))
13185
13186#define _mm256_ror_epi64(A, B)                                                 \
13187  ((__m256i)__builtin_ia32_prorq256_mask ((__v4di)(__m256i)(A), (int)(B),      \
13188                                          (__v4di)(__m256i)_mm256_setzero_si256 (),\
13189                                          (__mmask8)-1))
13190
13191#define _mm256_mask_ror_epi64(W, U, A, B)                                      \
13192  ((__m256i)__builtin_ia32_prorq256_mask ((__v4di)(__m256i)(A), (int)(B),      \
13193                                          (__v4di)(__m256i)(W),                \
13194                                          (__mmask8)(U)))
13195
13196#define _mm256_maskz_ror_epi64(U, A, B)                                        \
13197  ((__m256i)__builtin_ia32_prorq256_mask ((__v4di)(__m256i)(A), (int)(B),      \
13198                                          (__v4di)(__m256i)_mm256_setzero_si256 (),\
13199                                          (__mmask8)(U)))
13200
13201#define _mm_ror_epi64(A, B)                                                    \
13202  ((__m128i)__builtin_ia32_prorq128_mask ((__v2di)(__m128i)(A), (int)(B),      \
13203                                          (__v2di)(__m128i)_mm_setzero_di(),   \
13204                                          (__mmask8)-1))
13205
13206#define _mm_mask_ror_epi64(W, U, A, B)                                         \
13207  ((__m128i)__builtin_ia32_prorq128_mask ((__v2di)(__m128i)(A), (int)(B),      \
13208                                          (__v2di)(__m128i)(W),                \
13209                                          (__mmask8)(U)))
13210
13211#define _mm_maskz_ror_epi64(U, A, B)                                           \
13212  ((__m128i)__builtin_ia32_prorq128_mask ((__v2di)(__m128i)(A), (int)(B),      \
13213                                          (__v2di)(__m128i)_mm_setzero_di(),   \
13214                                          (__mmask8)(U)))
13215
13216#define _mm256_rol_epi32(A, B)                                                 \
13217  ((__m256i)__builtin_ia32_prold256_mask ((__v8si)(__m256i)(A), (int)(B),      \
13218                                          (__v8si)(__m256i)_mm256_setzero_si256(),\
13219                                          (__mmask8)-1))
13220
13221#define _mm256_mask_rol_epi32(W, U, A, B)                                      \
13222  ((__m256i)__builtin_ia32_prold256_mask ((__v8si)(__m256i)(A), (int)(B),      \
13223                                          (__v8si)(__m256i)(W),                \
13224                                          (__mmask8)(U)))
13225
13226#define _mm256_maskz_rol_epi32(U, A, B)                                        \
13227  ((__m256i)__builtin_ia32_prold256_mask ((__v8si)(__m256i)(A), (int)(B),      \
13228                                          (__v8si)(__m256i)_mm256_setzero_si256(),\
13229                                          (__mmask8)(U)))
13230
13231#define _mm_rol_epi32(A, B)                                                    \
13232  ((__m128i)__builtin_ia32_prold128_mask ((__v4si)(__m128i)(A), (int)(B),      \
13233                                          (__v4si)(__m128i)_mm_setzero_si128 (),   \
13234                                          (__mmask8)-1))
13235
13236#define _mm_mask_rol_epi32(W, U, A, B)                                         \
13237  ((__m128i)__builtin_ia32_prold128_mask ((__v4si)(__m128i)(A), (int)(B),      \
13238                                          (__v4si)(__m128i)(W),                \
13239                                          (__mmask8)(U)))
13240
13241#define _mm_maskz_rol_epi32(U, A, B)                                           \
13242  ((__m128i)__builtin_ia32_prold128_mask ((__v4si)(__m128i)(A), (int)(B),      \
13243                                          (__v4si)(__m128i)_mm_setzero_si128 (),   \
13244                                          (__mmask8)(U)))
13245
13246#define _mm256_ror_epi32(A, B)                                                 \
13247  ((__m256i)__builtin_ia32_prord256_mask ((__v8si)(__m256i)(A), (int)(B),      \
13248                                          (__v8si)(__m256i)_mm256_setzero_si256(),\
13249                                          (__mmask8)-1))
13250
13251#define _mm256_mask_ror_epi32(W, U, A, B)                                      \
13252  ((__m256i)__builtin_ia32_prord256_mask ((__v8si)(__m256i)(A), (int)(B),      \
13253                                          (__v8si)(__m256i)(W),                \
13254                                          (__mmask8)(U)))
13255
13256#define _mm256_maskz_ror_epi32(U, A, B)                                        \
13257  ((__m256i)__builtin_ia32_prord256_mask ((__v8si)(__m256i)(A), (int)(B),      \
13258                                          (__v8si)(__m256i)_mm256_setzero_si256(),\
13259                                          (__mmask8)(U)))
13260
13261#define _mm_ror_epi32(A, B)                                                    \
13262  ((__m128i)__builtin_ia32_prord128_mask ((__v4si)(__m128i)(A), (int)(B),      \
13263                                          (__v4si)(__m128i)_mm_setzero_si128 (),   \
13264                                          (__mmask8)-1))
13265
13266#define _mm_mask_ror_epi32(W, U, A, B)                                         \
13267  ((__m128i)__builtin_ia32_prord128_mask ((__v4si)(__m128i)(A), (int)(B),      \
13268                                          (__v4si)(__m128i)(W),                \
13269                                          (__mmask8)(U)))
13270
13271#define _mm_maskz_ror_epi32(U, A, B)                                           \
13272  ((__m128i)__builtin_ia32_prord128_mask ((__v4si)(__m128i)(A), (int)(B),      \
13273                                          (__v4si)(__m128i)_mm_setzero_si128 (),   \
13274                                          (__mmask8)(U)))
13275
13276#define _mm256_alignr_epi32(X, Y, C)                                        \
13277    ((__m256i)__builtin_ia32_alignd256_mask ((__v8si)(__m256i)(X),          \
13278        (__v8si)(__m256i)(Y), (int)(C), (__v8si)(__m256i)(X), (__mmask8)-1))
13279
13280#define _mm256_mask_alignr_epi32(W, U, X, Y, C)                             \
13281    ((__m256i)__builtin_ia32_alignd256_mask ((__v8si)(__m256i)(X),          \
13282        (__v8si)(__m256i)(Y), (int)(C), (__v8si)(__m256i)(W), (__mmask8)(U)))
13283
13284#define _mm256_maskz_alignr_epi32(U, X, Y, C)                               \
13285    ((__m256i)__builtin_ia32_alignd256_mask ((__v8si)(__m256i)(X),          \
13286        (__v8si)(__m256i)(Y), (int)(C), (__v8si)(__m256i)_mm256_setzero_si256 (),\
13287        (__mmask8)(U)))
13288
13289#define _mm256_alignr_epi64(X, Y, C)                                        \
13290    ((__m256i)__builtin_ia32_alignq256_mask ((__v4di)(__m256i)(X),          \
13291        (__v4di)(__m256i)(Y), (int)(C), (__v4di)(__m256i)(X), (__mmask8)-1))
13292
13293#define _mm256_mask_alignr_epi64(W, U, X, Y, C)                             \
13294    ((__m256i)__builtin_ia32_alignq256_mask ((__v4di)(__m256i)(X),          \
13295        (__v4di)(__m256i)(Y), (int)(C), (__v4di)(__m256i)(W), (__mmask8)(U)))
13296
13297#define _mm256_maskz_alignr_epi64(U, X, Y, C)                               \
13298    ((__m256i)__builtin_ia32_alignq256_mask ((__v4di)(__m256i)(X),          \
13299        (__v4di)(__m256i)(Y), (int)(C), (__v4di)(__m256i)_mm256_setzero_si256 (),\
13300        (__mmask8)(U)))
13301
13302#define _mm_alignr_epi32(X, Y, C)                                           \
13303    ((__m128i)__builtin_ia32_alignd128_mask ((__v4si)(__m128i)(X),          \
13304        (__v4si)(__m128i)(Y), (int)(C), (__v4si)(__m128i)(X), (__mmask8)-1))
13305
13306#define _mm_mask_alignr_epi32(W, U, X, Y, C)                                \
13307    ((__m128i)__builtin_ia32_alignd128_mask ((__v4si)(__m128i)(X),          \
13308        (__v4si)(__m128i)(Y), (int)(C), (__v4si)(__m128i)(W), (__mmask8)(U)))
13309
13310#define _mm_maskz_alignr_epi32(U, X, Y, C)                                  \
13311    ((__m128i)__builtin_ia32_alignd128_mask ((__v4si)(__m128i)(X),          \
13312        (__v4si)(__m128i)(Y), (int)(C), (__v4si)(__m128i)_mm_setzero_si128(),\
13313        (__mmask8)(U)))
13314
13315#define _mm_alignr_epi64(X, Y, C)                                           \
13316    ((__m128i)__builtin_ia32_alignq128_mask ((__v2di)(__m128i)(X),          \
13317        (__v2di)(__m128i)(Y), (int)(C), (__v2di)(__m128i)(X), (__mmask8)-1))
13318
13319#define _mm_mask_alignr_epi64(W, U, X, Y, C)                                \
13320    ((__m128i)__builtin_ia32_alignq128_mask ((__v2di)(__m128i)(X),          \
13321        (__v2di)(__m128i)(Y), (int)(C), (__v2di)(__m128i)(X), (__mmask8)-1))
13322
13323#define _mm_maskz_alignr_epi64(U, X, Y, C)                                  \
13324    ((__m128i)__builtin_ia32_alignq128_mask ((__v2di)(__m128i)(X),          \
13325        (__v2di)(__m128i)(Y), (int)(C), (__v2di)(__m128i)_mm_setzero_si128(),\
13326        (__mmask8)(U)))
13327
13328#define _mm_mask_cvtps_ph(W, U, A, I)						\
13329  ((__m128i) __builtin_ia32_vcvtps2ph_mask ((__v4sf)(__m128) A, (int) (I),      \
13330      (__v8hi)(__m128i) (W), (__mmask8) (U)))
13331
13332#define _mm_maskz_cvtps_ph(U, A, I)						\
13333  ((__m128i) __builtin_ia32_vcvtps2ph_mask ((__v4sf)(__m128) A, (int) (I),      \
13334      (__v8hi)(__m128i) _mm_setzero_hi(), (__mmask8) (U)))
13335
13336#define _mm256_mask_cvtps_ph(W, U, A, I)					\
13337  ((__m128i) __builtin_ia32_vcvtps2ph256_mask ((__v8sf)(__m256) A, (int) (I),	\
13338      (__v8hi)(__m128i) (W), (__mmask8) (U)))
13339
13340#define _mm256_maskz_cvtps_ph(U, A, I)						\
13341  ((__m128i) __builtin_ia32_vcvtps2ph256_mask ((__v8sf)(__m256) A, (int) (I),   \
13342      (__v8hi)(__m128i) _mm_setzero_hi(), (__mmask8) (U)))
13343
13344#define _mm256_mask_srai_epi32(W, U, A, B)				\
13345  ((__m256i) __builtin_ia32_psradi256_mask ((__v8si)(__m256i)(A),	\
13346    (int)(B), (__v8si)(__m256i)(W), (__mmask8)(U)))
13347
13348#define _mm256_maskz_srai_epi32(U, A, B)				\
13349  ((__m256i) __builtin_ia32_psradi256_mask ((__v8si)(__m256i)(A),	\
13350    (int)(B), (__v8si)_mm256_setzero_si256(), (__mmask8)(U)))
13351
13352#define _mm_mask_srai_epi32(W, U, A, B)                                 \
13353  ((__m128i) __builtin_ia32_psradi128_mask ((__v4si)(__m128i)(A),       \
13354    (int)(B), (__v4si)(__m128i)(W), (__mmask8)(U)))
13355
13356#define _mm_maskz_srai_epi32(U, A, B)                                   \
13357  ((__m128i) __builtin_ia32_psradi128_mask ((__v4si)(__m128i)(A),       \
13358    (int)(B), (__v4si)_mm_setzero_si128(), (__mmask8)(U)))
13359
13360#define _mm256_srai_epi64(A, B)						\
13361  ((__m256i) __builtin_ia32_psraqi256_mask ((__v4di)(__m256i)(A),	\
13362    (int)(B), (__v4di)_mm256_setzero_si256 (), (__mmask8)-1))
13363
13364#define _mm256_mask_srai_epi64(W, U, A, B)				\
13365  ((__m256i) __builtin_ia32_psraqi256_mask ((__v4di)(__m256i)(A),	\
13366    (int)(B), (__v4di)(__m256i)(W), (__mmask8)(U)))
13367
13368#define _mm256_maskz_srai_epi64(U, A, B)				\
13369  ((__m256i) __builtin_ia32_psraqi256_mask ((__v4di)(__m256i)(A),	\
13370    (int)(B), (__v4di)_mm256_setzero_si256 (), (__mmask8)(U)))
13371
13372#define _mm_srai_epi64(A, B)						\
13373  ((__m128i) __builtin_ia32_psraqi128_mask ((__v2di)(__m128i)(A),       \
13374    (int)(B), (__v2di)_mm_setzero_si128(), (__mmask8)-1))
13375
13376#define _mm_mask_srai_epi64(W, U, A, B)                                 \
13377  ((__m128i) __builtin_ia32_psraqi128_mask ((__v2di)(__m128i)(A),       \
13378    (int)(B), (__v2di)(__m128i)(W), (__mmask8)(U)))
13379
13380#define _mm_maskz_srai_epi64(U, A, B)                                   \
13381  ((__m128i) __builtin_ia32_psraqi128_mask ((__v2di)(__m128i)(A),       \
13382    (int)(B), (__v2di)_mm_setzero_si128(), (__mmask8)(U)))
13383
13384#define _mm256_mask_permutex_pd(W, U, A, B)                             \
13385  ((__m256d) __builtin_ia32_permdf256_mask ((__v4df)(__m256d)(A),       \
13386    (int)(B), (__v4df)(__m256d)(W), (__mmask8)(U)))
13387
13388#define _mm256_maskz_permutex_pd(U, A, B)				\
13389  ((__m256d) __builtin_ia32_permdf256_mask ((__v4df)(__m256d)(A),       \
13390    (int)(B), (__v4df)(__m256d)_mm256_setzero_pd(), (__mmask8)(U)))
13391
13392#define _mm256_mask_permute_pd(W, U, X, C)					    \
13393  ((__m256d) __builtin_ia32_vpermilpd256_mask ((__v4df)(__m256d)(X), (int)(C),	    \
13394					      (__v4df)(__m256d)(W),		    \
13395					      (__mmask8)(U)))
13396
13397#define _mm256_maskz_permute_pd(U, X, C)					    \
13398  ((__m256d) __builtin_ia32_vpermilpd256_mask ((__v4df)(__m256d)(X), (int)(C),	    \
13399					      (__v4df)(__m256d)_mm256_setzero_pd(), \
13400					      (__mmask8)(U)))
13401
13402#define _mm256_mask_permute_ps(W, U, X, C)					    \
13403  ((__m256) __builtin_ia32_vpermilps256_mask ((__v8sf)(__m256)(X), (int)(C),	    \
13404					      (__v8sf)(__m256)(W), (__mmask8)(U)))
13405
13406#define _mm256_maskz_permute_ps(U, X, C)					    \
13407  ((__m256) __builtin_ia32_vpermilps256_mask ((__v8sf)(__m256)(X), (int)(C),	    \
13408					      (__v8sf)(__m256)_mm256_setzero_ps(),  \
13409					      (__mmask8)(U)))
13410
13411#define _mm_mask_permute_pd(W, U, X, C)						    \
13412  ((__m128d) __builtin_ia32_vpermilpd_mask ((__v2df)(__m128d)(X), (int)(C),	    \
13413					    (__v2df)(__m128d)(W), (__mmask8)(U)))
13414
13415#define _mm_maskz_permute_pd(U, X, C)						    \
13416  ((__m128d) __builtin_ia32_vpermilpd_mask ((__v2df)(__m128d)(X), (int)(C),	    \
13417					    (__v2df)(__m128d)_mm_setzero_pd(),	    \
13418					    (__mmask8)(U)))
13419
13420#define _mm_mask_permute_ps(W, U, X, C)						    \
13421  ((__m128) __builtin_ia32_vpermilps_mask ((__v4sf)(__m128)(X), (int)(C),	    \
13422					  (__v4sf)(__m128)(W), (__mmask8)(U)))
13423
13424#define _mm_maskz_permute_ps(U, X, C)						    \
13425  ((__m128) __builtin_ia32_vpermilps_mask ((__v4sf)(__m128)(X), (int)(C),	    \
13426					  (__v4sf)(__m128)_mm_setzero_ps(),	    \
13427					  (__mmask8)(U)))
13428
13429#define _mm256_mask_blend_pd(__U, __A, __W)			      \
13430  ((__m256d) __builtin_ia32_blendmpd_256_mask ((__v4df) (__A),	      \
13431						     (__v4df) (__W),  \
13432						     (__mmask8) (__U)))
13433
13434#define _mm256_mask_blend_ps(__U, __A, __W)			      \
13435  ((__m256) __builtin_ia32_blendmps_256_mask ((__v8sf) (__A),	      \
13436						    (__v8sf) (__W),   \
13437						    (__mmask8) (__U)))
13438
13439#define _mm256_mask_blend_epi64(__U, __A, __W)			      \
13440  ((__m256i) __builtin_ia32_blendmq_256_mask ((__v4di) (__A),	      \
13441						    (__v4di) (__W),   \
13442						    (__mmask8) (__U)))
13443
13444#define _mm256_mask_blend_epi32(__U, __A, __W)			      \
13445  ((__m256i) __builtin_ia32_blendmd_256_mask ((__v8si) (__A),	      \
13446						    (__v8si) (__W),   \
13447						    (__mmask8) (__U)))
13448
13449#define _mm_mask_blend_pd(__U, __A, __W)			      \
13450  ((__m128d) __builtin_ia32_blendmpd_128_mask ((__v2df) (__A),	      \
13451						     (__v2df) (__W),  \
13452						     (__mmask8) (__U)))
13453
13454#define _mm_mask_blend_ps(__U, __A, __W)			      \
13455  ((__m128) __builtin_ia32_blendmps_128_mask ((__v4sf) (__A),	      \
13456						    (__v4sf) (__W),   \
13457						    (__mmask8) (__U)))
13458
13459#define _mm_mask_blend_epi64(__U, __A, __W)			      \
13460  ((__m128i) __builtin_ia32_blendmq_128_mask ((__v2di) (__A),	      \
13461						    (__v2di) (__W),   \
13462						    (__mmask8) (__U)))
13463
13464#define _mm_mask_blend_epi32(__U, __A, __W)			      \
13465  ((__m128i) __builtin_ia32_blendmd_128_mask ((__v4si) (__A),	      \
13466						    (__v4si) (__W),   \
13467						    (__mmask8) (__U)))
13468
13469#define _mm256_cmp_epu32_mask(X, Y, P)					\
13470  ((__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si)(__m256i)(X),	\
13471					    (__v8si)(__m256i)(Y), (int)(P),\
13472					    (__mmask8)-1))
13473
13474#define _mm256_cmp_epi64_mask(X, Y, P)					\
13475  ((__mmask8) __builtin_ia32_cmpq256_mask ((__v4di)(__m256i)(X),	\
13476					   (__v4di)(__m256i)(Y), (int)(P),\
13477					   (__mmask8)-1))
13478
13479#define _mm256_cmp_epi32_mask(X, Y, P)					\
13480  ((__mmask8) __builtin_ia32_cmpd256_mask ((__v8si)(__m256i)(X),	\
13481					   (__v8si)(__m256i)(Y), (int)(P),\
13482					   (__mmask8)-1))
13483
13484#define _mm256_cmp_epu64_mask(X, Y, P)					\
13485  ((__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di)(__m256i)(X),	\
13486					    (__v4di)(__m256i)(Y), (int)(P),\
13487					    (__mmask8)-1))
13488
13489#define _mm256_cmp_pd_mask(X, Y, P)					\
13490  ((__mmask8) __builtin_ia32_cmppd256_mask ((__v4df)(__m256d)(X),	\
13491					    (__v4df)(__m256d)(Y), (int)(P),\
13492					    (__mmask8)-1))
13493
13494#define _mm256_cmp_ps_mask(X, Y, P)					\
13495  ((__mmask8) __builtin_ia32_cmpps256_mask ((__v8sf)(__m256)(X),	\
13496					     (__v8sf)(__m256)(Y), (int)(P),\
13497					     (__mmask8)-1))
13498
13499#define _mm256_mask_cmp_epi64_mask(M, X, Y, P)				\
13500  ((__mmask8) __builtin_ia32_cmpq256_mask ((__v4di)(__m256i)(X),	\
13501					   (__v4di)(__m256i)(Y), (int)(P),\
13502					   (__mmask8)(M)))
13503
13504#define _mm256_mask_cmp_epi32_mask(M, X, Y, P)				\
13505  ((__mmask8) __builtin_ia32_cmpd256_mask ((__v8si)(__m256i)(X),	\
13506					   (__v8si)(__m256i)(Y), (int)(P),\
13507					   (__mmask8)(M)))
13508
13509#define _mm256_mask_cmp_epu64_mask(M, X, Y, P)				\
13510  ((__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di)(__m256i)(X),	\
13511					    (__v4di)(__m256i)(Y), (int)(P),\
13512					    (__mmask8)(M)))
13513
13514#define _mm256_mask_cmp_epu32_mask(M, X, Y, P)				\
13515  ((__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si)(__m256i)(X),	\
13516					    (__v8si)(__m256i)(Y), (int)(P),\
13517					    (__mmask8)(M)))
13518
13519#define _mm256_mask_cmp_pd_mask(M, X, Y, P)				\
13520  ((__mmask8) __builtin_ia32_cmppd256_mask ((__v4df)(__m256d)(X),	\
13521					    (__v4df)(__m256d)(Y), (int)(P),\
13522					    (__mmask8)(M)))
13523
13524#define _mm256_mask_cmp_ps_mask(M, X, Y, P)				\
13525  ((__mmask8) __builtin_ia32_cmpps256_mask ((__v8sf)(__m256)(X),	\
13526					     (__v8sf)(__m256)(Y), (int)(P),\
13527					     (__mmask8)(M)))
13528
13529#define _mm_cmp_epi64_mask(X, Y, P)					\
13530  ((__mmask8) __builtin_ia32_cmpq128_mask ((__v2di)(__m128i)(X),	\
13531					   (__v2di)(__m128i)(Y), (int)(P),\
13532					   (__mmask8)-1))
13533
13534#define _mm_cmp_epi32_mask(X, Y, P)					\
13535  ((__mmask8) __builtin_ia32_cmpd128_mask ((__v4si)(__m128i)(X),	\
13536					   (__v4si)(__m128i)(Y), (int)(P),\
13537					   (__mmask8)-1))
13538
13539#define _mm_cmp_epu64_mask(X, Y, P)					\
13540  ((__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di)(__m128i)(X),	\
13541					    (__v2di)(__m128i)(Y), (int)(P),\
13542					    (__mmask8)-1))
13543
13544#define _mm_cmp_epu32_mask(X, Y, P)					\
13545  ((__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si)(__m128i)(X),	\
13546					    (__v4si)(__m128i)(Y), (int)(P),\
13547					    (__mmask8)-1))
13548
13549#define _mm_cmp_pd_mask(X, Y, P)					\
13550  ((__mmask8) __builtin_ia32_cmppd128_mask ((__v2df)(__m128d)(X),	\
13551					    (__v2df)(__m128d)(Y), (int)(P),\
13552					    (__mmask8)-1))
13553
13554#define _mm_cmp_ps_mask(X, Y, P)					\
13555  ((__mmask8) __builtin_ia32_cmpps128_mask ((__v4sf)(__m128)(X),	\
13556					     (__v4sf)(__m128)(Y), (int)(P),\
13557					     (__mmask8)-1))
13558
13559#define _mm_mask_cmp_epi64_mask(M, X, Y, P)				\
13560  ((__mmask8) __builtin_ia32_cmpq128_mask ((__v2di)(__m128i)(X),	\
13561					   (__v2di)(__m128i)(Y), (int)(P),\
13562					   (__mmask8)(M)))
13563
13564#define _mm_mask_cmp_epi32_mask(M, X, Y, P)				\
13565  ((__mmask8) __builtin_ia32_cmpd128_mask ((__v4si)(__m128i)(X),	\
13566					   (__v4si)(__m128i)(Y), (int)(P),\
13567					   (__mmask8)(M)))
13568
13569#define _mm_mask_cmp_epu64_mask(M, X, Y, P)				\
13570  ((__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di)(__m128i)(X),	\
13571					    (__v2di)(__m128i)(Y), (int)(P),\
13572					    (__mmask8)(M)))
13573
13574#define _mm_mask_cmp_epu32_mask(M, X, Y, P)				\
13575  ((__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si)(__m128i)(X),	\
13576					    (__v4si)(__m128i)(Y), (int)(P),\
13577					    (__mmask8)(M)))
13578
13579#define _mm_mask_cmp_pd_mask(M, X, Y, P)				\
13580  ((__mmask8) __builtin_ia32_cmppd128_mask ((__v2df)(__m128d)(X),	\
13581					    (__v2df)(__m128d)(Y), (int)(P),\
13582					    (__mmask8)(M)))
13583
13584#define _mm_mask_cmp_ps_mask(M, X, Y, P)				\
13585  ((__mmask8) __builtin_ia32_cmpps128_mask ((__v4sf)(__m128)(X),	\
13586					     (__v4sf)(__m128)(Y), (int)(P),\
13587					     (__mmask8)(M)))
13588
13589#endif
13590
13591#define _mm256_permutexvar_ps(A, B)	_mm256_permutevar8x32_ps((B), (A))
13592
13593#ifdef __DISABLE_AVX512VL__
13594#undef __DISABLE_AVX512VL__
13595#pragma GCC pop_options
13596#endif /* __DISABLE_AVX512VL__ */
13597
13598#endif /* _AVX512VLINTRIN_H_INCLUDED */
13599